blob: d31bc70337ab4b6433bb6321a0118b57c9ef6be6 [file] [log] [blame]
Benjamin Peterson90f5ba52010-03-11 22:53:45 +00001#!/usr/bin/env python3
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002#-------------------------------------------------------------------
3# tarfile.py
4#-------------------------------------------------------------------
Christian Heimes9c1257e2007-11-04 11:37:22 +00005# Copyright (C) 2002 Lars Gustaebel <lars@gustaebel.de>
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00006# All rights reserved.
7#
8# Permission is hereby granted, free of charge, to any person
9# obtaining a copy of this software and associated documentation
10# files (the "Software"), to deal in the Software without
11# restriction, including without limitation the rights to use,
12# copy, modify, merge, publish, distribute, sublicense, and/or sell
13# copies of the Software, and to permit persons to whom the
14# Software is furnished to do so, subject to the following
15# conditions:
16#
17# The above copyright notice and this permission notice shall be
18# included in all copies or substantial portions of the Software.
19#
20# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
21# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
22# OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
23# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
24# HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
25# WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
26# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
27# OTHER DEALINGS IN THE SOFTWARE.
28#
29"""Read from and write to tar format archives.
30"""
31
Guido van Rossumd8faa362007-04-27 19:54:29 +000032version = "0.9.0"
Guido van Rossum98297ee2007-11-06 21:34:58 +000033__author__ = "Lars Gust\u00e4bel (lars@gustaebel.de)"
Senthil Kumaran7c9719c2011-07-28 22:32:49 +080034__date__ = "$Date: 2011-02-25 17:42:01 +0200 (Fri, 25 Feb 2011) $"
35__cvsid__ = "$Id: tarfile.py 88586 2011-02-25 15:42:01Z marc-andre.lemburg $"
Guido van Rossum98297ee2007-11-06 21:34:58 +000036__credits__ = "Gustavo Niemeyer, Niels Gust\u00e4bel, Richard Townsend."
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +000037
38#---------
39# Imports
40#---------
41import sys
42import os
Eli Bendersky74c503b2012-01-03 06:26:13 +020043import io
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +000044import shutil
45import stat
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +000046import time
47import struct
Thomas Wouters89f507f2006-12-13 04:49:30 +000048import copy
Guido van Rossumd8faa362007-04-27 19:54:29 +000049import re
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +000050
51try:
52 import grp, pwd
53except ImportError:
54 grp = pwd = None
55
Brian Curtin16633fa2010-07-09 13:54:27 +000056# os.symlink on Windows prior to 6.0 raises NotImplementedError
57symlink_exception = (AttributeError, NotImplementedError)
58try:
59 # WindowsError (1314) will be raised if the caller does not hold the
60 # SeCreateSymbolicLinkPrivilege privilege
61 symlink_exception += (WindowsError,)
62except NameError:
63 pass
64
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +000065# from tarfile import *
66__all__ = ["TarFile", "TarInfo", "is_tarfile", "TarError"]
67
Georg Brandl1a3284e2007-12-02 09:40:06 +000068from builtins import open as _open # Since 'open' is TarFile.open
Guido van Rossum8f78fe92006-08-24 04:03:53 +000069
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +000070#---------------------------------------------------------
71# tar constants
72#---------------------------------------------------------
Lars Gustäbelb506dc32007-08-07 18:36:16 +000073NUL = b"\0" # the null character
Guido van Rossumd8faa362007-04-27 19:54:29 +000074BLOCKSIZE = 512 # length of processing blocks
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +000075RECORDSIZE = BLOCKSIZE * 20 # length of records
Lars Gustäbelb506dc32007-08-07 18:36:16 +000076GNU_MAGIC = b"ustar \0" # magic gnu tar string
77POSIX_MAGIC = b"ustar\x0000" # magic posix tar string
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +000078
Guido van Rossumd8faa362007-04-27 19:54:29 +000079LENGTH_NAME = 100 # maximum length of a filename
80LENGTH_LINK = 100 # maximum length of a linkname
81LENGTH_PREFIX = 155 # maximum length of the prefix field
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +000082
Lars Gustäbelb506dc32007-08-07 18:36:16 +000083REGTYPE = b"0" # regular file
84AREGTYPE = b"\0" # regular file
85LNKTYPE = b"1" # link (inside tarfile)
86SYMTYPE = b"2" # symbolic link
87CHRTYPE = b"3" # character special device
88BLKTYPE = b"4" # block special device
89DIRTYPE = b"5" # directory
90FIFOTYPE = b"6" # fifo special device
91CONTTYPE = b"7" # contiguous file
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +000092
Lars Gustäbelb506dc32007-08-07 18:36:16 +000093GNUTYPE_LONGNAME = b"L" # GNU tar longname
94GNUTYPE_LONGLINK = b"K" # GNU tar longlink
95GNUTYPE_SPARSE = b"S" # GNU tar sparse file
Guido van Rossumd8faa362007-04-27 19:54:29 +000096
Lars Gustäbelb506dc32007-08-07 18:36:16 +000097XHDTYPE = b"x" # POSIX.1-2001 extended header
98XGLTYPE = b"g" # POSIX.1-2001 global header
99SOLARIS_XHDTYPE = b"X" # Solaris extended header
Guido van Rossumd8faa362007-04-27 19:54:29 +0000100
101USTAR_FORMAT = 0 # POSIX.1-1988 (ustar) format
102GNU_FORMAT = 1 # GNU tar format
103PAX_FORMAT = 2 # POSIX.1-2001 (pax) format
104DEFAULT_FORMAT = GNU_FORMAT
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000105
106#---------------------------------------------------------
107# tarfile constants
108#---------------------------------------------------------
Guido van Rossumd8faa362007-04-27 19:54:29 +0000109# File types that tarfile supports:
110SUPPORTED_TYPES = (REGTYPE, AREGTYPE, LNKTYPE,
111 SYMTYPE, DIRTYPE, FIFOTYPE,
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000112 CONTTYPE, CHRTYPE, BLKTYPE,
113 GNUTYPE_LONGNAME, GNUTYPE_LONGLINK,
114 GNUTYPE_SPARSE)
115
Guido van Rossumd8faa362007-04-27 19:54:29 +0000116# File types that will be treated as a regular file.
117REGULAR_TYPES = (REGTYPE, AREGTYPE,
118 CONTTYPE, GNUTYPE_SPARSE)
119
120# File types that are part of the GNU tar format.
121GNU_TYPES = (GNUTYPE_LONGNAME, GNUTYPE_LONGLINK,
122 GNUTYPE_SPARSE)
123
124# Fields from a pax header that override a TarInfo attribute.
125PAX_FIELDS = ("path", "linkpath", "size", "mtime",
126 "uid", "gid", "uname", "gname")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000127
Lars Gustäbel1465cc22010-05-17 18:02:50 +0000128# Fields from a pax header that are affected by hdrcharset.
129PAX_NAME_FIELDS = {"path", "linkpath", "uname", "gname"}
130
Guido van Rossume7ba4952007-06-06 23:52:48 +0000131# Fields in a pax header that are numbers, all other fields
132# are treated as strings.
133PAX_NUMBER_FIELDS = {
134 "atime": float,
135 "ctime": float,
136 "mtime": float,
137 "uid": int,
138 "gid": int,
139 "size": int
140}
141
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000142#---------------------------------------------------------
143# Bits used in the mode field, values in octal.
144#---------------------------------------------------------
Guido van Rossumcd16bf62007-06-13 18:07:49 +0000145S_IFLNK = 0o120000 # symbolic link
146S_IFREG = 0o100000 # regular file
147S_IFBLK = 0o060000 # block device
148S_IFDIR = 0o040000 # directory
149S_IFCHR = 0o020000 # character device
150S_IFIFO = 0o010000 # fifo
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000151
Guido van Rossumcd16bf62007-06-13 18:07:49 +0000152TSUID = 0o4000 # set UID on execution
153TSGID = 0o2000 # set GID on execution
154TSVTX = 0o1000 # reserved
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000155
Guido van Rossumcd16bf62007-06-13 18:07:49 +0000156TUREAD = 0o400 # read by owner
157TUWRITE = 0o200 # write by owner
158TUEXEC = 0o100 # execute/search by owner
159TGREAD = 0o040 # read by group
160TGWRITE = 0o020 # write by group
161TGEXEC = 0o010 # execute/search by group
162TOREAD = 0o004 # read by other
163TOWRITE = 0o002 # write by other
164TOEXEC = 0o001 # execute/search by other
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000165
166#---------------------------------------------------------
Guido van Rossumd8faa362007-04-27 19:54:29 +0000167# initialization
168#---------------------------------------------------------
Victor Stinner0f35e2c2010-06-11 23:46:47 +0000169if os.name in ("nt", "ce"):
170 ENCODING = "utf-8"
171else:
172 ENCODING = sys.getfilesystemencoding()
Guido van Rossumd8faa362007-04-27 19:54:29 +0000173
174#---------------------------------------------------------
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000175# Some useful functions
176#---------------------------------------------------------
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000177
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000178def stn(s, length, encoding, errors):
179 """Convert a string to a null-terminated bytes object.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000180 """
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000181 s = s.encode(encoding, errors)
Thomas Wouters89f507f2006-12-13 04:49:30 +0000182 return s[:length] + (length - len(s)) * NUL
Thomas Wouters477c8d52006-05-27 19:21:47 +0000183
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000184def nts(s, encoding, errors):
185 """Convert a null-terminated bytes object to a string.
Guido van Rossumd8faa362007-04-27 19:54:29 +0000186 """
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000187 p = s.find(b"\0")
188 if p != -1:
189 s = s[:p]
190 return s.decode(encoding, errors)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000191
Thomas Wouters477c8d52006-05-27 19:21:47 +0000192def nti(s):
193 """Convert a number field to a python number.
194 """
195 # There are two possible encodings for a number field, see
196 # itn() below.
Lars Gustäbelac3d1372011-10-14 12:46:40 +0200197 if s[0] in (0o200, 0o377):
198 n = 0
199 for i in range(len(s) - 1):
200 n <<= 8
201 n += s[i + 1]
202 if s[0] == 0o377:
203 n = -(256 ** (len(s) - 1) - n)
204 else:
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000205 try:
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000206 n = int(nts(s, "ascii", "strict") or "0", 8)
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000207 except ValueError:
Lars Gustäbel9520a432009-11-22 18:48:49 +0000208 raise InvalidHeaderError("invalid header")
Thomas Wouters477c8d52006-05-27 19:21:47 +0000209 return n
210
Guido van Rossumd8faa362007-04-27 19:54:29 +0000211def itn(n, digits=8, format=DEFAULT_FORMAT):
Thomas Wouters477c8d52006-05-27 19:21:47 +0000212 """Convert a python number to a number field.
213 """
214 # POSIX 1003.1-1988 requires numbers to be encoded as a string of
215 # octal digits followed by a null-byte, this allows values up to
216 # (8**(digits-1))-1. GNU tar allows storing numbers greater than
Lars Gustäbelac3d1372011-10-14 12:46:40 +0200217 # that if necessary. A leading 0o200 or 0o377 byte indicate this
218 # particular encoding, the following digits-1 bytes are a big-endian
219 # base-256 representation. This allows values up to (256**(digits-1))-1.
220 # A 0o200 byte indicates a positive number, a 0o377 byte a negative
221 # number.
Thomas Wouters477c8d52006-05-27 19:21:47 +0000222 if 0 <= n < 8 ** (digits - 1):
Lars Gustäbela280ca752007-08-28 07:34:33 +0000223 s = bytes("%0*o" % (digits - 1, n), "ascii") + NUL
Lars Gustäbelac3d1372011-10-14 12:46:40 +0200224 elif format == GNU_FORMAT and -256 ** (digits - 1) <= n < 256 ** (digits - 1):
225 if n >= 0:
226 s = bytearray([0o200])
227 else:
228 s = bytearray([0o377])
229 n = 256 ** digits + n
Thomas Wouters477c8d52006-05-27 19:21:47 +0000230
Guido van Rossum805365e2007-05-07 22:24:25 +0000231 for i in range(digits - 1):
Lars Gustäbelac3d1372011-10-14 12:46:40 +0200232 s.insert(1, n & 0o377)
Thomas Wouters477c8d52006-05-27 19:21:47 +0000233 n >>= 8
Lars Gustäbelac3d1372011-10-14 12:46:40 +0200234 else:
235 raise ValueError("overflow in number field")
236
Thomas Wouters477c8d52006-05-27 19:21:47 +0000237 return s
238
239def calc_chksums(buf):
240 """Calculate the checksum for a member's header by summing up all
241 characters except for the chksum field which is treated as if
242 it was filled with spaces. According to the GNU tar sources,
243 some tars (Sun and NeXT) calculate chksum with signed char,
244 which will be different if there are chars in the buffer with
245 the high bit set. So we calculate two checksums, unsigned and
246 signed.
247 """
Ross Lagerwall468ff4c2012-05-17 19:49:27 +0200248 unsigned_chksum = 256 + sum(struct.unpack_from("148B8x356B", buf))
249 signed_chksum = 256 + sum(struct.unpack_from("148b8x356b", buf))
Thomas Wouters477c8d52006-05-27 19:21:47 +0000250 return unsigned_chksum, signed_chksum
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000251
252def copyfileobj(src, dst, length=None):
253 """Copy length bytes from fileobj src to fileobj dst.
254 If length is None, copy the entire content.
255 """
256 if length == 0:
257 return
258 if length is None:
259 shutil.copyfileobj(src, dst)
260 return
261
262 BUFSIZE = 16 * 1024
263 blocks, remainder = divmod(length, BUFSIZE)
Guido van Rossum805365e2007-05-07 22:24:25 +0000264 for b in range(blocks):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000265 buf = src.read(BUFSIZE)
266 if len(buf) < BUFSIZE:
Thomas Wouters477c8d52006-05-27 19:21:47 +0000267 raise IOError("end of file reached")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000268 dst.write(buf)
269
270 if remainder != 0:
271 buf = src.read(remainder)
272 if len(buf) < remainder:
Thomas Wouters477c8d52006-05-27 19:21:47 +0000273 raise IOError("end of file reached")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000274 dst.write(buf)
275 return
276
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000277def filemode(mode):
Giampaolo Rodola'ffa1d0b2012-05-15 15:30:25 +0200278 """Deprecated in this location; use stat.filemode."""
279 import warnings
280 warnings.warn("deprecated in favor of stat.filemode",
281 DeprecationWarning, 2)
282 return stat.filemode(mode)
283
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000284
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000285class TarError(Exception):
286 """Base exception."""
287 pass
288class ExtractError(TarError):
289 """General exception for extract errors."""
290 pass
291class ReadError(TarError):
Ezio Melotti30b9d5d2013-08-17 15:50:46 +0300292 """Exception for unreadable tar archives."""
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000293 pass
294class CompressionError(TarError):
295 """Exception for unavailable compression methods."""
296 pass
297class StreamError(TarError):
298 """Exception for unsupported operations on stream-like TarFiles."""
299 pass
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000300class HeaderError(TarError):
Lars Gustäbel9520a432009-11-22 18:48:49 +0000301 """Base exception for header errors."""
302 pass
303class EmptyHeaderError(HeaderError):
304 """Exception for empty headers."""
305 pass
306class TruncatedHeaderError(HeaderError):
307 """Exception for truncated headers."""
308 pass
309class EOFHeaderError(HeaderError):
310 """Exception for end of file headers."""
311 pass
312class InvalidHeaderError(HeaderError):
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000313 """Exception for invalid headers."""
314 pass
Lars Gustäbel9520a432009-11-22 18:48:49 +0000315class SubsequentHeaderError(HeaderError):
316 """Exception for missing and invalid extended headers."""
317 pass
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000318
319#---------------------------
320# internal stream interface
321#---------------------------
322class _LowLevelFile:
323 """Low-level file object. Supports reading and writing.
324 It is used instead of a regular file object for streaming
325 access.
326 """
327
328 def __init__(self, name, mode):
329 mode = {
330 "r": os.O_RDONLY,
331 "w": os.O_WRONLY | os.O_CREAT | os.O_TRUNC,
332 }[mode]
333 if hasattr(os, "O_BINARY"):
334 mode |= os.O_BINARY
Lars Gustäbeld6eb70b2010-04-29 15:37:02 +0000335 self.fd = os.open(name, mode, 0o666)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000336
337 def close(self):
338 os.close(self.fd)
339
340 def read(self, size):
341 return os.read(self.fd, size)
342
343 def write(self, s):
344 os.write(self.fd, s)
345
346class _Stream:
347 """Class that serves as an adapter between TarFile and
348 a stream-like object. The stream-like object only
349 needs to have a read() or write() method and is accessed
350 blockwise. Use of gzip or bzip2 compression is possible.
351 A stream-like object could be for example: sys.stdin,
352 sys.stdout, a socket, a tape device etc.
353
354 _Stream is intended to be used only internally.
355 """
356
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000357 def __init__(self, name, mode, comptype, fileobj, bufsize):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000358 """Construct a _Stream object.
359 """
360 self._extfileobj = True
361 if fileobj is None:
362 fileobj = _LowLevelFile(name, mode)
363 self._extfileobj = False
364
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000365 if comptype == '*':
366 # Enable transparent compression detection for the
367 # stream interface
368 fileobj = _StreamProxy(fileobj)
369 comptype = fileobj.getcomptype()
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000370
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000371 self.name = name or ""
372 self.mode = mode
373 self.comptype = comptype
374 self.fileobj = fileobj
375 self.bufsize = bufsize
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000376 self.buf = b""
Guido van Rossume2a383d2007-01-15 16:59:06 +0000377 self.pos = 0
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000378 self.closed = False
379
Antoine Pitrou605c2932010-09-23 20:15:14 +0000380 try:
381 if comptype == "gz":
382 try:
383 import zlib
384 except ImportError:
385 raise CompressionError("zlib module is not available")
386 self.zlib = zlib
387 self.crc = zlib.crc32(b"")
388 if mode == "r":
389 self._init_read_gz()
Lars Gustäbel0a9dd2f2011-12-10 20:38:14 +0100390 self.exception = zlib.error
Antoine Pitrou605c2932010-09-23 20:15:14 +0000391 else:
392 self._init_write_gz()
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000393
Lars Gustäbel0a9dd2f2011-12-10 20:38:14 +0100394 elif comptype == "bz2":
Antoine Pitrou605c2932010-09-23 20:15:14 +0000395 try:
396 import bz2
397 except ImportError:
398 raise CompressionError("bz2 module is not available")
399 if mode == "r":
400 self.dbuf = b""
401 self.cmp = bz2.BZ2Decompressor()
Lars Gustäbel0a9dd2f2011-12-10 20:38:14 +0100402 self.exception = IOError
Antoine Pitrou605c2932010-09-23 20:15:14 +0000403 else:
404 self.cmp = bz2.BZ2Compressor()
Lars Gustäbel0a9dd2f2011-12-10 20:38:14 +0100405
406 elif comptype == "xz":
407 try:
408 import lzma
409 except ImportError:
410 raise CompressionError("lzma module is not available")
411 if mode == "r":
412 self.dbuf = b""
413 self.cmp = lzma.LZMADecompressor()
414 self.exception = lzma.LZMAError
415 else:
416 self.cmp = lzma.LZMACompressor()
417
418 elif comptype != "tar":
419 raise CompressionError("unknown compression type %r" % comptype)
420
Antoine Pitrou605c2932010-09-23 20:15:14 +0000421 except:
422 if not self._extfileobj:
423 self.fileobj.close()
424 self.closed = True
425 raise
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000426
427 def __del__(self):
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000428 if hasattr(self, "closed") and not self.closed:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000429 self.close()
430
431 def _init_write_gz(self):
432 """Initialize for writing with gzip compression.
433 """
434 self.cmp = self.zlib.compressobj(9, self.zlib.DEFLATED,
435 -self.zlib.MAX_WBITS,
436 self.zlib.DEF_MEM_LEVEL,
437 0)
Guido van Rossume2a383d2007-01-15 16:59:06 +0000438 timestamp = struct.pack("<L", int(time.time()))
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000439 self.__write(b"\037\213\010\010" + timestamp + b"\002\377")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000440 if self.name.endswith(".gz"):
441 self.name = self.name[:-3]
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000442 # RFC1952 says we must use ISO-8859-1 for the FNAME field.
443 self.__write(self.name.encode("iso-8859-1", "replace") + NUL)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000444
445 def write(self, s):
446 """Write string s to the stream.
447 """
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000448 if self.comptype == "gz":
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000449 self.crc = self.zlib.crc32(s, self.crc)
450 self.pos += len(s)
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000451 if self.comptype != "tar":
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000452 s = self.cmp.compress(s)
453 self.__write(s)
454
455 def __write(self, s):
456 """Write string s to the stream if a whole new block
457 is ready to be written.
458 """
459 self.buf += s
460 while len(self.buf) > self.bufsize:
461 self.fileobj.write(self.buf[:self.bufsize])
462 self.buf = self.buf[self.bufsize:]
463
464 def close(self):
465 """Close the _Stream object. No operation should be
466 done on it afterwards.
467 """
468 if self.closed:
469 return
470
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000471 if self.mode == "w" and self.comptype != "tar":
Martin v. Löwisc234a522004-08-22 21:28:33 +0000472 self.buf += self.cmp.flush()
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000473
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000474 if self.mode == "w" and self.buf:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000475 self.fileobj.write(self.buf)
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000476 self.buf = b""
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000477 if self.comptype == "gz":
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000478 # The native zlib crc is an unsigned 32-bit integer, but
479 # the Python wrapper implicitly casts that to a signed C
480 # long. So, on a 32-bit box self.crc may "look negative",
481 # while the same crc on a 64-bit box may "look positive".
482 # To avoid irksome warnings from the `struct` module, force
483 # it to look positive on all boxes.
Guido van Rossume2a383d2007-01-15 16:59:06 +0000484 self.fileobj.write(struct.pack("<L", self.crc & 0xffffffff))
485 self.fileobj.write(struct.pack("<L", self.pos & 0xffffFFFF))
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000486
487 if not self._extfileobj:
488 self.fileobj.close()
489
490 self.closed = True
491
492 def _init_read_gz(self):
493 """Initialize for reading a gzip compressed fileobj.
494 """
495 self.cmp = self.zlib.decompressobj(-self.zlib.MAX_WBITS)
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000496 self.dbuf = b""
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000497
498 # taken from gzip.GzipFile with some alterations
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000499 if self.__read(2) != b"\037\213":
Thomas Wouters477c8d52006-05-27 19:21:47 +0000500 raise ReadError("not a gzip file")
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000501 if self.__read(1) != b"\010":
Thomas Wouters477c8d52006-05-27 19:21:47 +0000502 raise CompressionError("unsupported compression method")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000503
504 flag = ord(self.__read(1))
505 self.__read(6)
506
507 if flag & 4:
508 xlen = ord(self.__read(1)) + 256 * ord(self.__read(1))
509 self.read(xlen)
510 if flag & 8:
511 while True:
512 s = self.__read(1)
513 if not s or s == NUL:
514 break
515 if flag & 16:
516 while True:
517 s = self.__read(1)
518 if not s or s == NUL:
519 break
520 if flag & 2:
521 self.__read(2)
522
523 def tell(self):
524 """Return the stream's file pointer position.
525 """
526 return self.pos
527
528 def seek(self, pos=0):
529 """Set the stream's file pointer to pos. Negative seeking
530 is forbidden.
531 """
532 if pos - self.pos >= 0:
533 blocks, remainder = divmod(pos - self.pos, self.bufsize)
Guido van Rossum805365e2007-05-07 22:24:25 +0000534 for i in range(blocks):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000535 self.read(self.bufsize)
536 self.read(remainder)
537 else:
Thomas Wouters477c8d52006-05-27 19:21:47 +0000538 raise StreamError("seeking backwards is not allowed")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000539 return self.pos
540
541 def read(self, size=None):
542 """Return the next size number of bytes from the stream.
543 If size is not defined, return all bytes of the stream
544 up to EOF.
545 """
546 if size is None:
547 t = []
548 while True:
549 buf = self._read(self.bufsize)
550 if not buf:
551 break
552 t.append(buf)
553 buf = "".join(t)
554 else:
555 buf = self._read(size)
556 self.pos += len(buf)
557 return buf
558
559 def _read(self, size):
560 """Return size bytes from the stream.
561 """
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000562 if self.comptype == "tar":
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000563 return self.__read(size)
564
565 c = len(self.dbuf)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000566 while c < size:
567 buf = self.__read(self.bufsize)
568 if not buf:
569 break
Guido van Rossumd8faa362007-04-27 19:54:29 +0000570 try:
571 buf = self.cmp.decompress(buf)
Lars Gustäbel0a9dd2f2011-12-10 20:38:14 +0100572 except self.exception:
Guido van Rossumd8faa362007-04-27 19:54:29 +0000573 raise ReadError("invalid compressed data")
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000574 self.dbuf += buf
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000575 c += len(buf)
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000576 buf = self.dbuf[:size]
577 self.dbuf = self.dbuf[size:]
578 return buf
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000579
580 def __read(self, size):
581 """Return size bytes from stream. If internal buffer is empty,
582 read another block from the stream.
583 """
584 c = len(self.buf)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000585 while c < size:
586 buf = self.fileobj.read(self.bufsize)
587 if not buf:
588 break
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000589 self.buf += buf
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000590 c += len(buf)
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000591 buf = self.buf[:size]
592 self.buf = self.buf[size:]
593 return buf
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000594# class _Stream
595
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000596class _StreamProxy(object):
597 """Small proxy class that enables transparent compression
598 detection for the Stream interface (mode 'r|*').
599 """
600
601 def __init__(self, fileobj):
602 self.fileobj = fileobj
603 self.buf = self.fileobj.read(BLOCKSIZE)
604
605 def read(self, size):
606 self.read = self.fileobj.read
607 return self.buf
608
609 def getcomptype(self):
Lars Gustäbel0a9dd2f2011-12-10 20:38:14 +0100610 if self.buf.startswith(b"\x1f\x8b\x08"):
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000611 return "gz"
Lars Gustäbel0a9dd2f2011-12-10 20:38:14 +0100612 elif self.buf[0:3] == b"BZh" and self.buf[4:10] == b"1AY&SY":
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000613 return "bz2"
Lars Gustäbel0a9dd2f2011-12-10 20:38:14 +0100614 elif self.buf.startswith((b"\x5d\x00\x00\x80", b"\xfd7zXZ")):
615 return "xz"
616 else:
617 return "tar"
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000618
619 def close(self):
620 self.fileobj.close()
621# class StreamProxy
622
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000623#------------------------
624# Extraction file object
625#------------------------
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000626class _FileInFile(object):
627 """A thin wrapper around an existing file object that
628 provides a part of its data as an individual file
629 object.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000630 """
631
Lars Gustäbel9cbdd752010-10-29 09:08:19 +0000632 def __init__(self, fileobj, offset, size, blockinfo=None):
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000633 self.fileobj = fileobj
634 self.offset = offset
635 self.size = size
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000636 self.position = 0
Lars Gustäbel7a919e92012-05-05 18:15:03 +0200637 self.name = getattr(fileobj, "name", None)
638 self.closed = False
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000639
Lars Gustäbel9cbdd752010-10-29 09:08:19 +0000640 if blockinfo is None:
641 blockinfo = [(0, size)]
642
643 # Construct a map with data and zero blocks.
644 self.map_index = 0
645 self.map = []
646 lastpos = 0
647 realpos = self.offset
648 for offset, size in blockinfo:
649 if offset > lastpos:
650 self.map.append((False, lastpos, offset, None))
651 self.map.append((True, offset, offset + size, realpos))
652 realpos += size
653 lastpos = offset + size
654 if lastpos < self.size:
655 self.map.append((False, lastpos, self.size, None))
656
Lars Gustäbel7a919e92012-05-05 18:15:03 +0200657 def flush(self):
658 pass
659
660 def readable(self):
661 return True
662
663 def writable(self):
664 return False
665
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000666 def seekable(self):
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000667 return self.fileobj.seekable()
668
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000669 def tell(self):
670 """Return the current file position.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000671 """
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000672 return self.position
673
Lars Gustäbel7a919e92012-05-05 18:15:03 +0200674 def seek(self, position, whence=io.SEEK_SET):
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000675 """Seek to a position in the file.
676 """
Lars Gustäbel7a919e92012-05-05 18:15:03 +0200677 if whence == io.SEEK_SET:
678 self.position = min(max(position, 0), self.size)
679 elif whence == io.SEEK_CUR:
680 if position < 0:
681 self.position = max(self.position + position, 0)
682 else:
683 self.position = min(self.position + position, self.size)
684 elif whence == io.SEEK_END:
685 self.position = max(min(self.size + position, self.size), 0)
686 else:
687 raise ValueError("Invalid argument")
688 return self.position
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000689
690 def read(self, size=None):
691 """Read data from the file.
692 """
693 if size is None:
694 size = self.size - self.position
695 else:
696 size = min(size, self.size - self.position)
697
Lars Gustäbel9cbdd752010-10-29 09:08:19 +0000698 buf = b""
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000699 while size > 0:
Lars Gustäbel9cbdd752010-10-29 09:08:19 +0000700 while True:
701 data, start, stop, offset = self.map[self.map_index]
702 if start <= self.position < stop:
703 break
704 else:
705 self.map_index += 1
706 if self.map_index == len(self.map):
707 self.map_index = 0
708 length = min(size, stop - self.position)
709 if data:
Lars Gustäbeldd071042011-02-23 11:42:22 +0000710 self.fileobj.seek(offset + (self.position - start))
711 buf += self.fileobj.read(length)
Lars Gustäbel9cbdd752010-10-29 09:08:19 +0000712 else:
713 buf += NUL * length
714 size -= length
715 self.position += length
716 return buf
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000717
Lars Gustäbel7a919e92012-05-05 18:15:03 +0200718 def readinto(self, b):
719 buf = self.read(len(b))
720 b[:len(buf)] = buf
721 return len(buf)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000722
723 def close(self):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000724 self.closed = True
Lars Gustäbel7a919e92012-05-05 18:15:03 +0200725#class _FileInFile
Martin v. Löwisdf241532005-03-03 08:17:42 +0000726
Lars Gustäbelb062a2f2012-05-14 13:18:16 +0200727class ExFileObject(io.BufferedReader):
728
729 def __init__(self, tarfile, tarinfo):
730 fileobj = _FileInFile(tarfile.fileobj, tarinfo.offset_data,
731 tarinfo.size, tarinfo.sparse)
732 super().__init__(fileobj)
733#class ExFileObject
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000734
735#------------------
736# Exported Classes
737#------------------
738class TarInfo(object):
739 """Informational class which holds the details about an
740 archive member given by a tar header block.
741 TarInfo objects are returned by TarFile.getmember(),
742 TarFile.getmembers() and TarFile.gettarinfo() and are
743 usually created internally.
744 """
745
Lars Gustäbelc2ea8c62008-04-14 10:05:48 +0000746 __slots__ = ("name", "mode", "uid", "gid", "size", "mtime",
747 "chksum", "type", "linkname", "uname", "gname",
748 "devmajor", "devminor",
749 "offset", "offset_data", "pax_headers", "sparse",
750 "tarfile", "_sparse_structs", "_link_target")
751
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000752 def __init__(self, name=""):
753 """Construct a TarInfo object. name is the optional name
754 of the member.
755 """
Guido van Rossumd8faa362007-04-27 19:54:29 +0000756 self.name = name # member name
Guido van Rossumcd16bf62007-06-13 18:07:49 +0000757 self.mode = 0o644 # file permissions
Thomas Wouters477c8d52006-05-27 19:21:47 +0000758 self.uid = 0 # user id
759 self.gid = 0 # group id
760 self.size = 0 # file size
761 self.mtime = 0 # modification time
762 self.chksum = 0 # header checksum
763 self.type = REGTYPE # member type
764 self.linkname = "" # link name
Lars Gustäbel331b8002010-10-04 15:18:47 +0000765 self.uname = "" # user name
766 self.gname = "" # group name
Thomas Wouters477c8d52006-05-27 19:21:47 +0000767 self.devmajor = 0 # device major number
768 self.devminor = 0 # device minor number
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000769
Thomas Wouters477c8d52006-05-27 19:21:47 +0000770 self.offset = 0 # the tar header starts here
771 self.offset_data = 0 # the file's data starts here
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000772
Lars Gustäbelc2ea8c62008-04-14 10:05:48 +0000773 self.sparse = None # sparse member information
Guido van Rossumd8faa362007-04-27 19:54:29 +0000774 self.pax_headers = {} # pax header information
775
776 # In pax headers the "name" and "linkname" field are called
777 # "path" and "linkpath".
778 def _getpath(self):
779 return self.name
780 def _setpath(self, name):
781 self.name = name
782 path = property(_getpath, _setpath)
783
784 def _getlinkpath(self):
785 return self.linkname
786 def _setlinkpath(self, linkname):
787 self.linkname = linkname
788 linkpath = property(_getlinkpath, _setlinkpath)
789
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000790 def __repr__(self):
791 return "<%s %r at %#x>" % (self.__class__.__name__,self.name,id(self))
792
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000793 def get_info(self):
Guido van Rossumd8faa362007-04-27 19:54:29 +0000794 """Return the TarInfo's attributes as a dictionary.
795 """
796 info = {
Lars Gustäbelbfdfdda2009-08-28 19:59:59 +0000797 "name": self.name,
Guido van Rossumcd16bf62007-06-13 18:07:49 +0000798 "mode": self.mode & 0o7777,
Guido van Rossumd8faa362007-04-27 19:54:29 +0000799 "uid": self.uid,
800 "gid": self.gid,
801 "size": self.size,
802 "mtime": self.mtime,
803 "chksum": self.chksum,
804 "type": self.type,
Lars Gustäbelbfdfdda2009-08-28 19:59:59 +0000805 "linkname": self.linkname,
Guido van Rossumd8faa362007-04-27 19:54:29 +0000806 "uname": self.uname,
807 "gname": self.gname,
808 "devmajor": self.devmajor,
809 "devminor": self.devminor
810 }
811
812 if info["type"] == DIRTYPE and not info["name"].endswith("/"):
813 info["name"] += "/"
814
815 return info
816
Victor Stinnerde629d42010-05-05 21:43:57 +0000817 def tobuf(self, format=DEFAULT_FORMAT, encoding=ENCODING, errors="surrogateescape"):
Guido van Rossumd8faa362007-04-27 19:54:29 +0000818 """Return a tar header as a string of 512 byte blocks.
819 """
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000820 info = self.get_info()
Guido van Rossume7ba4952007-06-06 23:52:48 +0000821
Guido van Rossumd8faa362007-04-27 19:54:29 +0000822 if format == USTAR_FORMAT:
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000823 return self.create_ustar_header(info, encoding, errors)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000824 elif format == GNU_FORMAT:
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000825 return self.create_gnu_header(info, encoding, errors)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000826 elif format == PAX_FORMAT:
Lars Gustäbel1465cc22010-05-17 18:02:50 +0000827 return self.create_pax_header(info, encoding)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000828 else:
829 raise ValueError("invalid format")
830
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000831 def create_ustar_header(self, info, encoding, errors):
Guido van Rossumd8faa362007-04-27 19:54:29 +0000832 """Return the object as a ustar header block.
833 """
Guido van Rossumd8faa362007-04-27 19:54:29 +0000834 info["magic"] = POSIX_MAGIC
835
836 if len(info["linkname"]) > LENGTH_LINK:
837 raise ValueError("linkname is too long")
838
839 if len(info["name"]) > LENGTH_NAME:
840 info["prefix"], info["name"] = self._posix_split_name(info["name"])
841
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000842 return self._create_header(info, USTAR_FORMAT, encoding, errors)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000843
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000844 def create_gnu_header(self, info, encoding, errors):
Guido van Rossumd8faa362007-04-27 19:54:29 +0000845 """Return the object as a GNU header block sequence.
846 """
Guido van Rossumd8faa362007-04-27 19:54:29 +0000847 info["magic"] = GNU_MAGIC
848
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000849 buf = b""
Guido van Rossumd8faa362007-04-27 19:54:29 +0000850 if len(info["linkname"]) > LENGTH_LINK:
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000851 buf += self._create_gnu_long_header(info["linkname"], GNUTYPE_LONGLINK, encoding, errors)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000852
853 if len(info["name"]) > LENGTH_NAME:
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000854 buf += self._create_gnu_long_header(info["name"], GNUTYPE_LONGNAME, encoding, errors)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000855
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000856 return buf + self._create_header(info, GNU_FORMAT, encoding, errors)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000857
Lars Gustäbel1465cc22010-05-17 18:02:50 +0000858 def create_pax_header(self, info, encoding):
Guido van Rossumd8faa362007-04-27 19:54:29 +0000859 """Return the object as a ustar header block. If it cannot be
860 represented this way, prepend a pax extended header sequence
861 with supplement information.
862 """
Guido van Rossumd8faa362007-04-27 19:54:29 +0000863 info["magic"] = POSIX_MAGIC
864 pax_headers = self.pax_headers.copy()
865
866 # Test string fields for values that exceed the field length or cannot
867 # be represented in ASCII encoding.
868 for name, hname, length in (
869 ("name", "path", LENGTH_NAME), ("linkname", "linkpath", LENGTH_LINK),
870 ("uname", "uname", 32), ("gname", "gname", 32)):
871
Guido van Rossume7ba4952007-06-06 23:52:48 +0000872 if hname in pax_headers:
873 # The pax header has priority.
874 continue
875
Guido van Rossumd8faa362007-04-27 19:54:29 +0000876 # Try to encode the string as ASCII.
877 try:
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000878 info[name].encode("ascii", "strict")
Guido van Rossumd8faa362007-04-27 19:54:29 +0000879 except UnicodeEncodeError:
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000880 pax_headers[hname] = info[name]
Guido van Rossumd8faa362007-04-27 19:54:29 +0000881 continue
882
Guido van Rossume7ba4952007-06-06 23:52:48 +0000883 if len(info[name]) > length:
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000884 pax_headers[hname] = info[name]
Guido van Rossumd8faa362007-04-27 19:54:29 +0000885
886 # Test number fields for values that exceed the field limit or values
887 # that like to be stored as float.
888 for name, digits in (("uid", 8), ("gid", 8), ("size", 12), ("mtime", 12)):
Guido van Rossume7ba4952007-06-06 23:52:48 +0000889 if name in pax_headers:
890 # The pax header has priority. Avoid overflow.
891 info[name] = 0
892 continue
893
Guido van Rossumd8faa362007-04-27 19:54:29 +0000894 val = info[name]
895 if not 0 <= val < 8 ** (digits - 1) or isinstance(val, float):
Guido van Rossumef87d6e2007-05-02 19:09:54 +0000896 pax_headers[name] = str(val)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000897 info[name] = 0
898
Guido van Rossume7ba4952007-06-06 23:52:48 +0000899 # Create a pax extended header if necessary.
Guido van Rossumd8faa362007-04-27 19:54:29 +0000900 if pax_headers:
Lars Gustäbel1465cc22010-05-17 18:02:50 +0000901 buf = self._create_pax_generic_header(pax_headers, XHDTYPE, encoding)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000902 else:
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000903 buf = b""
Guido van Rossumd8faa362007-04-27 19:54:29 +0000904
Lars Gustäbel3741eff2007-08-21 12:17:05 +0000905 return buf + self._create_header(info, USTAR_FORMAT, "ascii", "replace")
Guido van Rossumd8faa362007-04-27 19:54:29 +0000906
907 @classmethod
Lars Gustäbel3741eff2007-08-21 12:17:05 +0000908 def create_pax_global_header(cls, pax_headers):
Guido van Rossumd8faa362007-04-27 19:54:29 +0000909 """Return the object as a pax global header block sequence.
910 """
Marc-André Lemburg8f36af72011-02-25 15:42:01 +0000911 return cls._create_pax_generic_header(pax_headers, XGLTYPE, "utf-8")
Guido van Rossumd8faa362007-04-27 19:54:29 +0000912
913 def _posix_split_name(self, name):
914 """Split a name longer than 100 chars into a prefix
915 and a name part.
916 """
917 prefix = name[:LENGTH_PREFIX + 1]
918 while prefix and prefix[-1] != "/":
919 prefix = prefix[:-1]
920
921 name = name[len(prefix):]
922 prefix = prefix[:-1]
923
924 if not prefix or len(name) > LENGTH_NAME:
925 raise ValueError("name is too long")
926 return prefix, name
927
928 @staticmethod
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000929 def _create_header(info, format, encoding, errors):
Guido van Rossumd8faa362007-04-27 19:54:29 +0000930 """Return a header block. info is a dictionary with file
931 information, format must be one of the *_FORMAT constants.
932 """
933 parts = [
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000934 stn(info.get("name", ""), 100, encoding, errors),
Guido van Rossumcd16bf62007-06-13 18:07:49 +0000935 itn(info.get("mode", 0) & 0o7777, 8, format),
Guido van Rossumd8faa362007-04-27 19:54:29 +0000936 itn(info.get("uid", 0), 8, format),
937 itn(info.get("gid", 0), 8, format),
938 itn(info.get("size", 0), 12, format),
939 itn(info.get("mtime", 0), 12, format),
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000940 b" ", # checksum field
Guido van Rossumd8faa362007-04-27 19:54:29 +0000941 info.get("type", REGTYPE),
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000942 stn(info.get("linkname", ""), 100, encoding, errors),
943 info.get("magic", POSIX_MAGIC),
Lars Gustäbel331b8002010-10-04 15:18:47 +0000944 stn(info.get("uname", ""), 32, encoding, errors),
945 stn(info.get("gname", ""), 32, encoding, errors),
Guido van Rossumd8faa362007-04-27 19:54:29 +0000946 itn(info.get("devmajor", 0), 8, format),
947 itn(info.get("devminor", 0), 8, format),
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000948 stn(info.get("prefix", ""), 155, encoding, errors)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000949 ]
950
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000951 buf = struct.pack("%ds" % BLOCKSIZE, b"".join(parts))
Guido van Rossumd8faa362007-04-27 19:54:29 +0000952 chksum = calc_chksums(buf[-BLOCKSIZE:])[0]
Lars Gustäbela280ca752007-08-28 07:34:33 +0000953 buf = buf[:-364] + bytes("%06o\0" % chksum, "ascii") + buf[-357:]
Guido van Rossumd8faa362007-04-27 19:54:29 +0000954 return buf
955
956 @staticmethod
957 def _create_payload(payload):
958 """Return the string payload filled with zero bytes
959 up to the next 512 byte border.
960 """
961 blocks, remainder = divmod(len(payload), BLOCKSIZE)
962 if remainder > 0:
963 payload += (BLOCKSIZE - remainder) * NUL
964 return payload
965
966 @classmethod
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000967 def _create_gnu_long_header(cls, name, type, encoding, errors):
Guido van Rossumd8faa362007-04-27 19:54:29 +0000968 """Return a GNUTYPE_LONGNAME or GNUTYPE_LONGLINK sequence
969 for name.
970 """
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000971 name = name.encode(encoding, errors) + NUL
Guido van Rossumd8faa362007-04-27 19:54:29 +0000972
973 info = {}
974 info["name"] = "././@LongLink"
975 info["type"] = type
976 info["size"] = len(name)
977 info["magic"] = GNU_MAGIC
978
979 # create extended header + name blocks.
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000980 return cls._create_header(info, USTAR_FORMAT, encoding, errors) + \
Guido van Rossumd8faa362007-04-27 19:54:29 +0000981 cls._create_payload(name)
982
983 @classmethod
Lars Gustäbel1465cc22010-05-17 18:02:50 +0000984 def _create_pax_generic_header(cls, pax_headers, type, encoding):
985 """Return a POSIX.1-2008 extended or global header sequence
Guido van Rossumd8faa362007-04-27 19:54:29 +0000986 that contains a list of keyword, value pairs. The values
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000987 must be strings.
Guido van Rossumd8faa362007-04-27 19:54:29 +0000988 """
Lars Gustäbel1465cc22010-05-17 18:02:50 +0000989 # Check if one of the fields contains surrogate characters and thereby
990 # forces hdrcharset=BINARY, see _proc_pax() for more information.
991 binary = False
992 for keyword, value in pax_headers.items():
993 try:
Marc-André Lemburg8f36af72011-02-25 15:42:01 +0000994 value.encode("utf-8", "strict")
Lars Gustäbel1465cc22010-05-17 18:02:50 +0000995 except UnicodeEncodeError:
996 binary = True
997 break
998
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000999 records = b""
Lars Gustäbel1465cc22010-05-17 18:02:50 +00001000 if binary:
1001 # Put the hdrcharset field at the beginning of the header.
1002 records += b"21 hdrcharset=BINARY\n"
1003
Guido van Rossumd8faa362007-04-27 19:54:29 +00001004 for keyword, value in pax_headers.items():
Marc-André Lemburg8f36af72011-02-25 15:42:01 +00001005 keyword = keyword.encode("utf-8")
Lars Gustäbel1465cc22010-05-17 18:02:50 +00001006 if binary:
1007 # Try to restore the original byte representation of `value'.
1008 # Needless to say, that the encoding must match the string.
1009 value = value.encode(encoding, "surrogateescape")
1010 else:
Marc-André Lemburg8f36af72011-02-25 15:42:01 +00001011 value = value.encode("utf-8")
Lars Gustäbel1465cc22010-05-17 18:02:50 +00001012
Guido van Rossumd8faa362007-04-27 19:54:29 +00001013 l = len(keyword) + len(value) + 3 # ' ' + '=' + '\n'
1014 n = p = 0
1015 while True:
1016 n = l + len(str(p))
1017 if n == p:
1018 break
1019 p = n
Lars Gustäbela280ca752007-08-28 07:34:33 +00001020 records += bytes(str(p), "ascii") + b" " + keyword + b"=" + value + b"\n"
Guido van Rossumd8faa362007-04-27 19:54:29 +00001021
1022 # We use a hardcoded "././@PaxHeader" name like star does
1023 # instead of the one that POSIX recommends.
1024 info = {}
1025 info["name"] = "././@PaxHeader"
1026 info["type"] = type
1027 info["size"] = len(records)
1028 info["magic"] = POSIX_MAGIC
1029
1030 # Create pax header + record blocks.
Lars Gustäbel3741eff2007-08-21 12:17:05 +00001031 return cls._create_header(info, USTAR_FORMAT, "ascii", "replace") + \
Guido van Rossumd8faa362007-04-27 19:54:29 +00001032 cls._create_payload(records)
1033
Guido van Rossum75b64e62005-01-16 00:16:11 +00001034 @classmethod
Lars Gustäbelb506dc32007-08-07 18:36:16 +00001035 def frombuf(cls, buf, encoding, errors):
1036 """Construct a TarInfo object from a 512 byte bytes object.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001037 """
Lars Gustäbel9520a432009-11-22 18:48:49 +00001038 if len(buf) == 0:
1039 raise EmptyHeaderError("empty header")
Thomas Wouters477c8d52006-05-27 19:21:47 +00001040 if len(buf) != BLOCKSIZE:
Lars Gustäbel9520a432009-11-22 18:48:49 +00001041 raise TruncatedHeaderError("truncated header")
Thomas Wouters477c8d52006-05-27 19:21:47 +00001042 if buf.count(NUL) == BLOCKSIZE:
Lars Gustäbel9520a432009-11-22 18:48:49 +00001043 raise EOFHeaderError("end of file header")
Thomas Wouters902d6eb2007-01-09 23:18:33 +00001044
1045 chksum = nti(buf[148:156])
1046 if chksum not in calc_chksums(buf):
Lars Gustäbel9520a432009-11-22 18:48:49 +00001047 raise InvalidHeaderError("bad checksum")
Thomas Wouters477c8d52006-05-27 19:21:47 +00001048
Guido van Rossumd8faa362007-04-27 19:54:29 +00001049 obj = cls()
Lars Gustäbelb506dc32007-08-07 18:36:16 +00001050 obj.name = nts(buf[0:100], encoding, errors)
Guido van Rossumd8faa362007-04-27 19:54:29 +00001051 obj.mode = nti(buf[100:108])
1052 obj.uid = nti(buf[108:116])
1053 obj.gid = nti(buf[116:124])
1054 obj.size = nti(buf[124:136])
1055 obj.mtime = nti(buf[136:148])
1056 obj.chksum = chksum
1057 obj.type = buf[156:157]
Lars Gustäbelb506dc32007-08-07 18:36:16 +00001058 obj.linkname = nts(buf[157:257], encoding, errors)
1059 obj.uname = nts(buf[265:297], encoding, errors)
1060 obj.gname = nts(buf[297:329], encoding, errors)
Guido van Rossumd8faa362007-04-27 19:54:29 +00001061 obj.devmajor = nti(buf[329:337])
1062 obj.devminor = nti(buf[337:345])
Lars Gustäbelb506dc32007-08-07 18:36:16 +00001063 prefix = nts(buf[345:500], encoding, errors)
Thomas Wouters89f507f2006-12-13 04:49:30 +00001064
Guido van Rossumd8faa362007-04-27 19:54:29 +00001065 # Old V7 tar format represents a directory as a regular
1066 # file with a trailing slash.
1067 if obj.type == AREGTYPE and obj.name.endswith("/"):
1068 obj.type = DIRTYPE
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001069
Lars Gustäbelc2ea8c62008-04-14 10:05:48 +00001070 # The old GNU sparse format occupies some of the unused
1071 # space in the buffer for up to 4 sparse structures.
1072 # Save the them for later processing in _proc_sparse().
1073 if obj.type == GNUTYPE_SPARSE:
1074 pos = 386
1075 structs = []
1076 for i in range(4):
1077 try:
1078 offset = nti(buf[pos:pos + 12])
1079 numbytes = nti(buf[pos + 12:pos + 24])
1080 except ValueError:
1081 break
1082 structs.append((offset, numbytes))
1083 pos += 24
1084 isextended = bool(buf[482])
1085 origsize = nti(buf[483:495])
1086 obj._sparse_structs = (structs, isextended, origsize)
1087
Guido van Rossumd8faa362007-04-27 19:54:29 +00001088 # Remove redundant slashes from directories.
1089 if obj.isdir():
1090 obj.name = obj.name.rstrip("/")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001091
Guido van Rossumd8faa362007-04-27 19:54:29 +00001092 # Reconstruct a ustar longname.
1093 if prefix and obj.type not in GNU_TYPES:
1094 obj.name = prefix + "/" + obj.name
1095 return obj
1096
1097 @classmethod
1098 def fromtarfile(cls, tarfile):
1099 """Return the next TarInfo object from TarFile object
1100 tarfile.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001101 """
Guido van Rossumd8faa362007-04-27 19:54:29 +00001102 buf = tarfile.fileobj.read(BLOCKSIZE)
Lars Gustäbelb506dc32007-08-07 18:36:16 +00001103 obj = cls.frombuf(buf, tarfile.encoding, tarfile.errors)
Guido van Rossumd8faa362007-04-27 19:54:29 +00001104 obj.offset = tarfile.fileobj.tell() - BLOCKSIZE
1105 return obj._proc_member(tarfile)
Thomas Wouters89f507f2006-12-13 04:49:30 +00001106
Guido van Rossumd8faa362007-04-27 19:54:29 +00001107 #--------------------------------------------------------------------------
1108 # The following are methods that are called depending on the type of a
1109 # member. The entry point is _proc_member() which can be overridden in a
1110 # subclass to add custom _proc_*() methods. A _proc_*() method MUST
1111 # implement the following
1112 # operations:
1113 # 1. Set self.offset_data to the position where the data blocks begin,
1114 # if there is data that follows.
1115 # 2. Set tarfile.offset to the position where the next member's header will
1116 # begin.
1117 # 3. Return self or another valid TarInfo object.
1118 def _proc_member(self, tarfile):
1119 """Choose the right processing method depending on
1120 the type and call it.
Thomas Wouters89f507f2006-12-13 04:49:30 +00001121 """
Guido van Rossumd8faa362007-04-27 19:54:29 +00001122 if self.type in (GNUTYPE_LONGNAME, GNUTYPE_LONGLINK):
1123 return self._proc_gnulong(tarfile)
1124 elif self.type == GNUTYPE_SPARSE:
1125 return self._proc_sparse(tarfile)
1126 elif self.type in (XHDTYPE, XGLTYPE, SOLARIS_XHDTYPE):
1127 return self._proc_pax(tarfile)
1128 else:
1129 return self._proc_builtin(tarfile)
Thomas Wouters89f507f2006-12-13 04:49:30 +00001130
Guido van Rossumd8faa362007-04-27 19:54:29 +00001131 def _proc_builtin(self, tarfile):
1132 """Process a builtin type or an unknown type which
1133 will be treated as a regular file.
1134 """
1135 self.offset_data = tarfile.fileobj.tell()
1136 offset = self.offset_data
1137 if self.isreg() or self.type not in SUPPORTED_TYPES:
1138 # Skip the following data blocks.
1139 offset += self._block(self.size)
1140 tarfile.offset = offset
Thomas Wouters89f507f2006-12-13 04:49:30 +00001141
Guido van Rossume7ba4952007-06-06 23:52:48 +00001142 # Patch the TarInfo object with saved global
Guido van Rossumd8faa362007-04-27 19:54:29 +00001143 # header information.
Guido van Rossume7ba4952007-06-06 23:52:48 +00001144 self._apply_pax_info(tarfile.pax_headers, tarfile.encoding, tarfile.errors)
Guido van Rossumd8faa362007-04-27 19:54:29 +00001145
1146 return self
1147
1148 def _proc_gnulong(self, tarfile):
1149 """Process the blocks that hold a GNU longname
1150 or longlink member.
1151 """
1152 buf = tarfile.fileobj.read(self._block(self.size))
1153
1154 # Fetch the next header and process it.
Lars Gustäbel9520a432009-11-22 18:48:49 +00001155 try:
1156 next = self.fromtarfile(tarfile)
1157 except HeaderError:
1158 raise SubsequentHeaderError("missing or bad subsequent header")
Guido van Rossumd8faa362007-04-27 19:54:29 +00001159
1160 # Patch the TarInfo object from the next header with
1161 # the longname information.
1162 next.offset = self.offset
1163 if self.type == GNUTYPE_LONGNAME:
Lars Gustäbelb506dc32007-08-07 18:36:16 +00001164 next.name = nts(buf, tarfile.encoding, tarfile.errors)
Guido van Rossumd8faa362007-04-27 19:54:29 +00001165 elif self.type == GNUTYPE_LONGLINK:
Lars Gustäbelb506dc32007-08-07 18:36:16 +00001166 next.linkname = nts(buf, tarfile.encoding, tarfile.errors)
Guido van Rossumd8faa362007-04-27 19:54:29 +00001167
1168 return next
1169
1170 def _proc_sparse(self, tarfile):
1171 """Process a GNU sparse header plus extra headers.
1172 """
Lars Gustäbelc2ea8c62008-04-14 10:05:48 +00001173 # We already collected some sparse structures in frombuf().
1174 structs, isextended, origsize = self._sparse_structs
1175 del self._sparse_structs
Guido van Rossumd8faa362007-04-27 19:54:29 +00001176
Lars Gustäbelc2ea8c62008-04-14 10:05:48 +00001177 # Collect sparse structures from extended header blocks.
Lars Gustäbelb506dc32007-08-07 18:36:16 +00001178 while isextended:
Guido van Rossumd8faa362007-04-27 19:54:29 +00001179 buf = tarfile.fileobj.read(BLOCKSIZE)
1180 pos = 0
Guido van Rossum805365e2007-05-07 22:24:25 +00001181 for i in range(21):
Guido van Rossumd8faa362007-04-27 19:54:29 +00001182 try:
1183 offset = nti(buf[pos:pos + 12])
1184 numbytes = nti(buf[pos + 12:pos + 24])
1185 except ValueError:
1186 break
Lars Gustäbel9cbdd752010-10-29 09:08:19 +00001187 if offset and numbytes:
1188 structs.append((offset, numbytes))
Guido van Rossumd8faa362007-04-27 19:54:29 +00001189 pos += 24
Lars Gustäbelb506dc32007-08-07 18:36:16 +00001190 isextended = bool(buf[504])
Lars Gustäbel9cbdd752010-10-29 09:08:19 +00001191 self.sparse = structs
Guido van Rossumd8faa362007-04-27 19:54:29 +00001192
1193 self.offset_data = tarfile.fileobj.tell()
1194 tarfile.offset = self.offset_data + self._block(self.size)
1195 self.size = origsize
Guido van Rossumd8faa362007-04-27 19:54:29 +00001196 return self
1197
1198 def _proc_pax(self, tarfile):
1199 """Process an extended or global header as described in
Lars Gustäbel1465cc22010-05-17 18:02:50 +00001200 POSIX.1-2008.
Guido van Rossumd8faa362007-04-27 19:54:29 +00001201 """
1202 # Read the header information.
1203 buf = tarfile.fileobj.read(self._block(self.size))
1204
1205 # A pax header stores supplemental information for either
1206 # the following file (extended) or all following files
1207 # (global).
1208 if self.type == XGLTYPE:
1209 pax_headers = tarfile.pax_headers
1210 else:
1211 pax_headers = tarfile.pax_headers.copy()
1212
Lars Gustäbel1465cc22010-05-17 18:02:50 +00001213 # Check if the pax header contains a hdrcharset field. This tells us
1214 # the encoding of the path, linkpath, uname and gname fields. Normally,
1215 # these fields are UTF-8 encoded but since POSIX.1-2008 tar
1216 # implementations are allowed to store them as raw binary strings if
1217 # the translation to UTF-8 fails.
1218 match = re.search(br"\d+ hdrcharset=([^\n]+)\n", buf)
1219 if match is not None:
Marc-André Lemburg8f36af72011-02-25 15:42:01 +00001220 pax_headers["hdrcharset"] = match.group(1).decode("utf-8")
Lars Gustäbel1465cc22010-05-17 18:02:50 +00001221
1222 # For the time being, we don't care about anything other than "BINARY".
1223 # The only other value that is currently allowed by the standard is
1224 # "ISO-IR 10646 2000 UTF-8" in other words UTF-8.
1225 hdrcharset = pax_headers.get("hdrcharset")
1226 if hdrcharset == "BINARY":
1227 encoding = tarfile.encoding
1228 else:
Marc-André Lemburg8f36af72011-02-25 15:42:01 +00001229 encoding = "utf-8"
Lars Gustäbel1465cc22010-05-17 18:02:50 +00001230
Guido van Rossumd8faa362007-04-27 19:54:29 +00001231 # Parse pax header information. A record looks like that:
1232 # "%d %s=%s\n" % (length, keyword, value). length is the size
1233 # of the complete record including the length field itself and
Guido van Rossume7ba4952007-06-06 23:52:48 +00001234 # the newline. keyword and value are both UTF-8 encoded strings.
Antoine Pitroufd036452008-08-19 17:56:33 +00001235 regex = re.compile(br"(\d+) ([^=]+)=")
Guido van Rossumd8faa362007-04-27 19:54:29 +00001236 pos = 0
1237 while True:
1238 match = regex.match(buf, pos)
1239 if not match:
1240 break
1241
1242 length, keyword = match.groups()
1243 length = int(length)
1244 value = buf[match.end(2) + 1:match.start(1) + length - 1]
1245
Marc-André Lemburg8f36af72011-02-25 15:42:01 +00001246 # Normally, we could just use "utf-8" as the encoding and "strict"
Lars Gustäbel1465cc22010-05-17 18:02:50 +00001247 # as the error handler, but we better not take the risk. For
1248 # example, GNU tar <= 1.23 is known to store filenames it cannot
1249 # translate to UTF-8 as raw strings (unfortunately without a
1250 # hdrcharset=BINARY header).
1251 # We first try the strict standard encoding, and if that fails we
1252 # fall back on the user's encoding and error handler.
Marc-André Lemburg8f36af72011-02-25 15:42:01 +00001253 keyword = self._decode_pax_field(keyword, "utf-8", "utf-8",
Lars Gustäbel1465cc22010-05-17 18:02:50 +00001254 tarfile.errors)
1255 if keyword in PAX_NAME_FIELDS:
1256 value = self._decode_pax_field(value, encoding, tarfile.encoding,
1257 tarfile.errors)
1258 else:
Marc-André Lemburg8f36af72011-02-25 15:42:01 +00001259 value = self._decode_pax_field(value, "utf-8", "utf-8",
Lars Gustäbel1465cc22010-05-17 18:02:50 +00001260 tarfile.errors)
Guido van Rossumd8faa362007-04-27 19:54:29 +00001261
1262 pax_headers[keyword] = value
1263 pos += length
1264
Guido van Rossume7ba4952007-06-06 23:52:48 +00001265 # Fetch the next header.
Lars Gustäbel9520a432009-11-22 18:48:49 +00001266 try:
1267 next = self.fromtarfile(tarfile)
1268 except HeaderError:
1269 raise SubsequentHeaderError("missing or bad subsequent header")
Guido van Rossumd8faa362007-04-27 19:54:29 +00001270
Lars Gustäbel9cbdd752010-10-29 09:08:19 +00001271 # Process GNU sparse information.
1272 if "GNU.sparse.map" in pax_headers:
1273 # GNU extended sparse format version 0.1.
1274 self._proc_gnusparse_01(next, pax_headers)
1275
1276 elif "GNU.sparse.size" in pax_headers:
1277 # GNU extended sparse format version 0.0.
1278 self._proc_gnusparse_00(next, pax_headers, buf)
1279
1280 elif pax_headers.get("GNU.sparse.major") == "1" and pax_headers.get("GNU.sparse.minor") == "0":
1281 # GNU extended sparse format version 1.0.
1282 self._proc_gnusparse_10(next, pax_headers, tarfile)
1283
Guido van Rossume7ba4952007-06-06 23:52:48 +00001284 if self.type in (XHDTYPE, SOLARIS_XHDTYPE):
Guido van Rossume7ba4952007-06-06 23:52:48 +00001285 # Patch the TarInfo object with the extended header info.
1286 next._apply_pax_info(pax_headers, tarfile.encoding, tarfile.errors)
1287 next.offset = self.offset
1288
1289 if "size" in pax_headers:
1290 # If the extended header replaces the size field,
1291 # we need to recalculate the offset where the next
1292 # header starts.
1293 offset = next.offset_data
1294 if next.isreg() or next.type not in SUPPORTED_TYPES:
1295 offset += next._block(next.size)
1296 tarfile.offset = offset
1297
1298 return next
1299
Lars Gustäbel9cbdd752010-10-29 09:08:19 +00001300 def _proc_gnusparse_00(self, next, pax_headers, buf):
1301 """Process a GNU tar extended sparse header, version 0.0.
1302 """
1303 offsets = []
1304 for match in re.finditer(br"\d+ GNU.sparse.offset=(\d+)\n", buf):
1305 offsets.append(int(match.group(1)))
1306 numbytes = []
1307 for match in re.finditer(br"\d+ GNU.sparse.numbytes=(\d+)\n", buf):
1308 numbytes.append(int(match.group(1)))
1309 next.sparse = list(zip(offsets, numbytes))
1310
1311 def _proc_gnusparse_01(self, next, pax_headers):
1312 """Process a GNU tar extended sparse header, version 0.1.
1313 """
1314 sparse = [int(x) for x in pax_headers["GNU.sparse.map"].split(",")]
1315 next.sparse = list(zip(sparse[::2], sparse[1::2]))
1316
1317 def _proc_gnusparse_10(self, next, pax_headers, tarfile):
1318 """Process a GNU tar extended sparse header, version 1.0.
1319 """
1320 fields = None
1321 sparse = []
1322 buf = tarfile.fileobj.read(BLOCKSIZE)
1323 fields, buf = buf.split(b"\n", 1)
1324 fields = int(fields)
1325 while len(sparse) < fields * 2:
1326 if b"\n" not in buf:
1327 buf += tarfile.fileobj.read(BLOCKSIZE)
1328 number, buf = buf.split(b"\n", 1)
1329 sparse.append(int(number))
1330 next.offset_data = tarfile.fileobj.tell()
1331 next.sparse = list(zip(sparse[::2], sparse[1::2]))
1332
Guido van Rossume7ba4952007-06-06 23:52:48 +00001333 def _apply_pax_info(self, pax_headers, encoding, errors):
1334 """Replace fields with supplemental information from a previous
1335 pax extended or global header.
1336 """
1337 for keyword, value in pax_headers.items():
Lars Gustäbel9cbdd752010-10-29 09:08:19 +00001338 if keyword == "GNU.sparse.name":
1339 setattr(self, "path", value)
1340 elif keyword == "GNU.sparse.size":
1341 setattr(self, "size", int(value))
1342 elif keyword == "GNU.sparse.realsize":
1343 setattr(self, "size", int(value))
1344 elif keyword in PAX_FIELDS:
1345 if keyword in PAX_NUMBER_FIELDS:
1346 try:
1347 value = PAX_NUMBER_FIELDS[keyword](value)
1348 except ValueError:
1349 value = 0
1350 if keyword == "path":
1351 value = value.rstrip("/")
1352 setattr(self, keyword, value)
Guido van Rossume7ba4952007-06-06 23:52:48 +00001353
1354 self.pax_headers = pax_headers.copy()
Guido van Rossumd8faa362007-04-27 19:54:29 +00001355
Lars Gustäbel1465cc22010-05-17 18:02:50 +00001356 def _decode_pax_field(self, value, encoding, fallback_encoding, fallback_errors):
1357 """Decode a single field from a pax record.
1358 """
1359 try:
1360 return value.decode(encoding, "strict")
1361 except UnicodeDecodeError:
1362 return value.decode(fallback_encoding, fallback_errors)
1363
Guido van Rossumd8faa362007-04-27 19:54:29 +00001364 def _block(self, count):
1365 """Round up a byte count by BLOCKSIZE and return it,
1366 e.g. _block(834) => 1024.
1367 """
1368 blocks, remainder = divmod(count, BLOCKSIZE)
1369 if remainder:
1370 blocks += 1
1371 return blocks * BLOCKSIZE
Thomas Wouters89f507f2006-12-13 04:49:30 +00001372
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001373 def isreg(self):
1374 return self.type in REGULAR_TYPES
1375 def isfile(self):
1376 return self.isreg()
1377 def isdir(self):
1378 return self.type == DIRTYPE
1379 def issym(self):
1380 return self.type == SYMTYPE
1381 def islnk(self):
1382 return self.type == LNKTYPE
1383 def ischr(self):
1384 return self.type == CHRTYPE
1385 def isblk(self):
1386 return self.type == BLKTYPE
1387 def isfifo(self):
1388 return self.type == FIFOTYPE
1389 def issparse(self):
Lars Gustäbel9cbdd752010-10-29 09:08:19 +00001390 return self.sparse is not None
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001391 def isdev(self):
1392 return self.type in (CHRTYPE, BLKTYPE, FIFOTYPE)
1393# class TarInfo
1394
1395class TarFile(object):
1396 """The TarFile Class provides an interface to tar archives.
1397 """
1398
1399 debug = 0 # May be set from 0 (no msgs) to 3 (all msgs)
1400
1401 dereference = False # If true, add content of linked file to the
1402 # tar file, else the link.
1403
1404 ignore_zeros = False # If true, skips empty or invalid blocks and
1405 # continues processing.
1406
Lars Gustäbel365aff32009-12-13 11:42:29 +00001407 errorlevel = 1 # If 0, fatal errors only appear in debug
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001408 # messages (if debug >= 0). If > 0, errors
1409 # are passed to the caller as exceptions.
1410
Guido van Rossumd8faa362007-04-27 19:54:29 +00001411 format = DEFAULT_FORMAT # The format to use when creating an archive.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001412
Guido van Rossume7ba4952007-06-06 23:52:48 +00001413 encoding = ENCODING # Encoding for 8-bit character strings.
1414
1415 errors = None # Error handler for unicode conversion.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001416
Guido van Rossumd8faa362007-04-27 19:54:29 +00001417 tarinfo = TarInfo # The default TarInfo class to use.
1418
Lars Gustäbelb062a2f2012-05-14 13:18:16 +02001419 fileobject = ExFileObject # The file-object for extractfile().
Guido van Rossumd8faa362007-04-27 19:54:29 +00001420
1421 def __init__(self, name=None, mode="r", fileobj=None, format=None,
1422 tarinfo=None, dereference=None, ignore_zeros=None, encoding=None,
Victor Stinnerde629d42010-05-05 21:43:57 +00001423 errors="surrogateescape", pax_headers=None, debug=None, errorlevel=None):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001424 """Open an (uncompressed) tar archive `name'. `mode' is either 'r' to
1425 read from an existing archive, 'a' to append data to an existing
1426 file or 'w' to create a new file overwriting an existing one. `mode'
1427 defaults to 'r'.
1428 If `fileobj' is given, it is used for reading or writing data. If it
1429 can be determined, `mode' is overridden by `fileobj's mode.
1430 `fileobj' is not closed, when TarFile is closed.
1431 """
Serhiy Storchaka53ad0cd2014-01-18 15:35:37 +02001432 modes = {"r": "rb", "a": "r+b", "w": "wb"}
1433 if mode not in modes:
Thomas Wouters477c8d52006-05-27 19:21:47 +00001434 raise ValueError("mode must be 'r', 'a' or 'w'")
Guido van Rossumd8faa362007-04-27 19:54:29 +00001435 self.mode = mode
Serhiy Storchaka53ad0cd2014-01-18 15:35:37 +02001436 self._mode = modes[mode]
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001437
1438 if not fileobj:
Guido van Rossumd8faa362007-04-27 19:54:29 +00001439 if self.mode == "a" and not os.path.exists(name):
Thomas Wouterscf297e42007-02-23 15:07:44 +00001440 # Create nonexistent files in append mode.
Guido van Rossumd8faa362007-04-27 19:54:29 +00001441 self.mode = "w"
1442 self._mode = "wb"
Guido van Rossume7ba4952007-06-06 23:52:48 +00001443 fileobj = bltn_open(name, self._mode)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001444 self._extfileobj = False
1445 else:
Guido van Rossumd8faa362007-04-27 19:54:29 +00001446 if name is None and hasattr(fileobj, "name"):
1447 name = fileobj.name
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001448 if hasattr(fileobj, "mode"):
Guido van Rossumd8faa362007-04-27 19:54:29 +00001449 self._mode = fileobj.mode
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001450 self._extfileobj = True
Thomas Woutersed03b412007-08-28 21:37:11 +00001451 self.name = os.path.abspath(name) if name else None
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001452 self.fileobj = fileobj
1453
Guido van Rossumd8faa362007-04-27 19:54:29 +00001454 # Init attributes.
1455 if format is not None:
1456 self.format = format
1457 if tarinfo is not None:
1458 self.tarinfo = tarinfo
1459 if dereference is not None:
1460 self.dereference = dereference
1461 if ignore_zeros is not None:
1462 self.ignore_zeros = ignore_zeros
1463 if encoding is not None:
1464 self.encoding = encoding
Victor Stinnerde629d42010-05-05 21:43:57 +00001465 self.errors = errors
Guido van Rossume7ba4952007-06-06 23:52:48 +00001466
1467 if pax_headers is not None and self.format == PAX_FORMAT:
1468 self.pax_headers = pax_headers
1469 else:
1470 self.pax_headers = {}
1471
Guido van Rossumd8faa362007-04-27 19:54:29 +00001472 if debug is not None:
1473 self.debug = debug
1474 if errorlevel is not None:
1475 self.errorlevel = errorlevel
1476
1477 # Init datastructures.
Thomas Wouters477c8d52006-05-27 19:21:47 +00001478 self.closed = False
1479 self.members = [] # list of members as TarInfo objects
1480 self._loaded = False # flag if all members have been read
Christian Heimesd8654cf2007-12-02 15:22:16 +00001481 self.offset = self.fileobj.tell()
1482 # current position in the archive file
Thomas Wouters477c8d52006-05-27 19:21:47 +00001483 self.inodes = {} # dictionary caching the inodes of
1484 # archive members already added
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001485
Lars Gustäbel7b465392009-11-18 20:29:25 +00001486 try:
1487 if self.mode == "r":
1488 self.firstmember = None
1489 self.firstmember = self.next()
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001490
Lars Gustäbel7b465392009-11-18 20:29:25 +00001491 if self.mode == "a":
1492 # Move to the end of the archive,
1493 # before the first empty block.
Lars Gustäbel7b465392009-11-18 20:29:25 +00001494 while True:
Lars Gustäbel9520a432009-11-22 18:48:49 +00001495 self.fileobj.seek(self.offset)
1496 try:
1497 tarinfo = self.tarinfo.fromtarfile(self)
1498 self.members.append(tarinfo)
1499 except EOFHeaderError:
1500 self.fileobj.seek(self.offset)
Lars Gustäbel7b465392009-11-18 20:29:25 +00001501 break
Lars Gustäbel9520a432009-11-22 18:48:49 +00001502 except HeaderError as e:
1503 raise ReadError(str(e))
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001504
Lars Gustäbel7b465392009-11-18 20:29:25 +00001505 if self.mode in "aw":
1506 self._loaded = True
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001507
Lars Gustäbel7b465392009-11-18 20:29:25 +00001508 if self.pax_headers:
1509 buf = self.tarinfo.create_pax_global_header(self.pax_headers.copy())
1510 self.fileobj.write(buf)
1511 self.offset += len(buf)
1512 except:
1513 if not self._extfileobj:
1514 self.fileobj.close()
1515 self.closed = True
1516 raise
Guido van Rossumd8faa362007-04-27 19:54:29 +00001517
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001518 #--------------------------------------------------------------------------
1519 # Below are the classmethods which act as alternate constructors to the
1520 # TarFile class. The open() method is the only one that is needed for
1521 # public use; it is the "super"-constructor and is able to select an
1522 # adequate "sub"-constructor for a particular compression using the mapping
1523 # from OPEN_METH.
1524 #
1525 # This concept allows one to subclass TarFile without losing the comfort of
1526 # the super-constructor. A sub-constructor is registered and made available
1527 # by adding it to the mapping in OPEN_METH.
1528
Guido van Rossum75b64e62005-01-16 00:16:11 +00001529 @classmethod
Guido van Rossumd8faa362007-04-27 19:54:29 +00001530 def open(cls, name=None, mode="r", fileobj=None, bufsize=RECORDSIZE, **kwargs):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001531 """Open a tar archive for reading, writing or appending. Return
1532 an appropriate TarFile class.
1533
1534 mode:
Martin v. Löwis78be7df2005-03-05 12:47:42 +00001535 'r' or 'r:*' open for reading with transparent compression
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001536 'r:' open for reading exclusively uncompressed
1537 'r:gz' open for reading with gzip compression
1538 'r:bz2' open for reading with bzip2 compression
Lars Gustäbel0a9dd2f2011-12-10 20:38:14 +01001539 'r:xz' open for reading with lzma compression
Thomas Wouterscf297e42007-02-23 15:07:44 +00001540 'a' or 'a:' open for appending, creating the file if necessary
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001541 'w' or 'w:' open for writing without compression
1542 'w:gz' open for writing with gzip compression
1543 'w:bz2' open for writing with bzip2 compression
Lars Gustäbel0a9dd2f2011-12-10 20:38:14 +01001544 'w:xz' open for writing with lzma compression
Martin v. Löwis78be7df2005-03-05 12:47:42 +00001545
1546 'r|*' open a stream of tar blocks with transparent compression
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001547 'r|' open an uncompressed stream of tar blocks for reading
1548 'r|gz' open a gzip compressed stream of tar blocks
1549 'r|bz2' open a bzip2 compressed stream of tar blocks
Lars Gustäbel0a9dd2f2011-12-10 20:38:14 +01001550 'r|xz' open an lzma compressed stream of tar blocks
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001551 'w|' open an uncompressed stream for writing
1552 'w|gz' open a gzip compressed stream for writing
1553 'w|bz2' open a bzip2 compressed stream for writing
Lars Gustäbel0a9dd2f2011-12-10 20:38:14 +01001554 'w|xz' open an lzma compressed stream for writing
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001555 """
1556
1557 if not name and not fileobj:
Thomas Wouters477c8d52006-05-27 19:21:47 +00001558 raise ValueError("nothing to open")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001559
Martin v. Löwis78be7df2005-03-05 12:47:42 +00001560 if mode in ("r", "r:*"):
1561 # Find out which *open() is appropriate for opening the file.
1562 for comptype in cls.OPEN_METH:
1563 func = getattr(cls, cls.OPEN_METH[comptype])
Thomas Wouters902d6eb2007-01-09 23:18:33 +00001564 if fileobj is not None:
1565 saved_pos = fileobj.tell()
Martin v. Löwis78be7df2005-03-05 12:47:42 +00001566 try:
Guido van Rossumd8faa362007-04-27 19:54:29 +00001567 return func(name, "r", fileobj, **kwargs)
1568 except (ReadError, CompressionError) as e:
Thomas Wouters902d6eb2007-01-09 23:18:33 +00001569 if fileobj is not None:
1570 fileobj.seek(saved_pos)
Martin v. Löwis78be7df2005-03-05 12:47:42 +00001571 continue
Thomas Wouters477c8d52006-05-27 19:21:47 +00001572 raise ReadError("file could not be opened successfully")
Martin v. Löwis78be7df2005-03-05 12:47:42 +00001573
1574 elif ":" in mode:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001575 filemode, comptype = mode.split(":", 1)
1576 filemode = filemode or "r"
1577 comptype = comptype or "tar"
1578
1579 # Select the *open() function according to
1580 # given compression.
1581 if comptype in cls.OPEN_METH:
1582 func = getattr(cls, cls.OPEN_METH[comptype])
1583 else:
Thomas Wouters477c8d52006-05-27 19:21:47 +00001584 raise CompressionError("unknown compression type %r" % comptype)
Guido van Rossumd8faa362007-04-27 19:54:29 +00001585 return func(name, filemode, fileobj, **kwargs)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001586
1587 elif "|" in mode:
1588 filemode, comptype = mode.split("|", 1)
1589 filemode = filemode or "r"
1590 comptype = comptype or "tar"
1591
Serhiy Storchaka53ad0cd2014-01-18 15:35:37 +02001592 if filemode not in ("r", "w"):
Thomas Wouters477c8d52006-05-27 19:21:47 +00001593 raise ValueError("mode must be 'r' or 'w'")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001594
Antoine Pitrou605c2932010-09-23 20:15:14 +00001595 stream = _Stream(name, filemode, comptype, fileobj, bufsize)
1596 try:
1597 t = cls(name, filemode, stream, **kwargs)
1598 except:
1599 stream.close()
1600 raise
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001601 t._extfileobj = False
1602 return t
1603
Serhiy Storchaka53ad0cd2014-01-18 15:35:37 +02001604 elif mode in ("a", "w"):
Guido van Rossumd8faa362007-04-27 19:54:29 +00001605 return cls.taropen(name, mode, fileobj, **kwargs)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001606
Thomas Wouters477c8d52006-05-27 19:21:47 +00001607 raise ValueError("undiscernible mode")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001608
Guido van Rossum75b64e62005-01-16 00:16:11 +00001609 @classmethod
Guido van Rossumd8faa362007-04-27 19:54:29 +00001610 def taropen(cls, name, mode="r", fileobj=None, **kwargs):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001611 """Open uncompressed tar archive name for reading or writing.
1612 """
Serhiy Storchaka53ad0cd2014-01-18 15:35:37 +02001613 if mode not in ("r", "a", "w"):
Thomas Wouters477c8d52006-05-27 19:21:47 +00001614 raise ValueError("mode must be 'r', 'a' or 'w'")
Guido van Rossumd8faa362007-04-27 19:54:29 +00001615 return cls(name, mode, fileobj, **kwargs)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001616
Guido van Rossum75b64e62005-01-16 00:16:11 +00001617 @classmethod
Guido van Rossumd8faa362007-04-27 19:54:29 +00001618 def gzopen(cls, name, mode="r", fileobj=None, compresslevel=9, **kwargs):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001619 """Open gzip compressed tar archive name for reading or writing.
1620 Appending is not allowed.
1621 """
Serhiy Storchaka53ad0cd2014-01-18 15:35:37 +02001622 if mode not in ("r", "w"):
Thomas Wouters477c8d52006-05-27 19:21:47 +00001623 raise ValueError("mode must be 'r' or 'w'")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001624
1625 try:
1626 import gzip
Neal Norwitz4ec68242003-04-11 03:05:56 +00001627 gzip.GzipFile
1628 except (ImportError, AttributeError):
Thomas Wouters477c8d52006-05-27 19:21:47 +00001629 raise CompressionError("gzip module is not available")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001630
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001631 try:
Antoine Pitroue1eca4e2010-10-29 23:49:49 +00001632 fileobj = gzip.GzipFile(name, mode + "b", compresslevel, fileobj)
Serhiy Storchakac2d01422014-01-18 16:14:10 +02001633 except OSError:
1634 if fileobj is not None and mode == 'r':
1635 raise ReadError("not a gzip file")
1636 raise
1637
1638 try:
Antoine Pitroue1eca4e2010-10-29 23:49:49 +00001639 t = cls.taropen(name, mode, fileobj, **kwargs)
Serhiy Storchakac2d01422014-01-18 16:14:10 +02001640 except OSError:
1641 fileobj.close()
1642 if mode == 'r':
1643 raise ReadError("not a gzip file")
1644 raise
Antoine Pitroue1eca4e2010-10-29 23:49:49 +00001645 except:
Serhiy Storchakac2d01422014-01-18 16:14:10 +02001646 fileobj.close()
Antoine Pitroue1eca4e2010-10-29 23:49:49 +00001647 raise
Serhiy Storchaka9fbec7a2014-01-18 15:53:05 +02001648 t._extfileobj = False
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001649 return t
1650
Guido van Rossum75b64e62005-01-16 00:16:11 +00001651 @classmethod
Guido van Rossumd8faa362007-04-27 19:54:29 +00001652 def bz2open(cls, name, mode="r", fileobj=None, compresslevel=9, **kwargs):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001653 """Open bzip2 compressed tar archive name for reading or writing.
1654 Appending is not allowed.
1655 """
Serhiy Storchaka53ad0cd2014-01-18 15:35:37 +02001656 if mode not in ("r", "w"):
Thomas Wouters477c8d52006-05-27 19:21:47 +00001657 raise ValueError("mode must be 'r' or 'w'.")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001658
1659 try:
1660 import bz2
1661 except ImportError:
Thomas Wouters477c8d52006-05-27 19:21:47 +00001662 raise CompressionError("bz2 module is not available")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001663
Nadeem Vawdaaebcdba2012-06-04 23:31:20 +02001664 fileobj = bz2.BZ2File(fileobj or name, mode,
1665 compresslevel=compresslevel)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001666
1667 try:
Guido van Rossumd8faa362007-04-27 19:54:29 +00001668 t = cls.taropen(name, mode, fileobj, **kwargs)
Lars Gustäbel9520a432009-11-22 18:48:49 +00001669 except (IOError, EOFError):
Antoine Pitrou95f55602010-09-23 18:36:46 +00001670 fileobj.close()
Serhiy Storchakac2d01422014-01-18 16:14:10 +02001671 if mode == 'r':
1672 raise ReadError("not a bzip2 file")
1673 raise
Serhiy Storchakae413cde2014-01-18 16:28:08 +02001674 except:
1675 fileobj.close()
1676 raise
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001677 t._extfileobj = False
1678 return t
1679
Lars Gustäbel0a9dd2f2011-12-10 20:38:14 +01001680 @classmethod
Lars Gustäbelc5e11992012-01-18 14:01:17 +01001681 def xzopen(cls, name, mode="r", fileobj=None, preset=None, **kwargs):
Lars Gustäbel0a9dd2f2011-12-10 20:38:14 +01001682 """Open lzma compressed tar archive name for reading or writing.
1683 Appending is not allowed.
1684 """
1685 if mode not in ("r", "w"):
1686 raise ValueError("mode must be 'r' or 'w'")
1687
1688 try:
1689 import lzma
1690 except ImportError:
1691 raise CompressionError("lzma module is not available")
1692
Nadeem Vawda33c34da2012-06-04 23:34:07 +02001693 fileobj = lzma.LZMAFile(fileobj or name, mode, preset=preset)
Lars Gustäbel0a9dd2f2011-12-10 20:38:14 +01001694
1695 try:
1696 t = cls.taropen(name, mode, fileobj, **kwargs)
1697 except (lzma.LZMAError, EOFError):
1698 fileobj.close()
Serhiy Storchakac2d01422014-01-18 16:14:10 +02001699 if mode == 'r':
1700 raise ReadError("not an lzma file")
1701 raise
Serhiy Storchakae413cde2014-01-18 16:28:08 +02001702 except:
1703 fileobj.close()
1704 raise
Lars Gustäbel0a9dd2f2011-12-10 20:38:14 +01001705 t._extfileobj = False
1706 return t
1707
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001708 # All *open() methods are registered here.
1709 OPEN_METH = {
1710 "tar": "taropen", # uncompressed tar
1711 "gz": "gzopen", # gzip compressed tar
Lars Gustäbel0a9dd2f2011-12-10 20:38:14 +01001712 "bz2": "bz2open", # bzip2 compressed tar
1713 "xz": "xzopen" # lzma compressed tar
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001714 }
1715
1716 #--------------------------------------------------------------------------
1717 # The public methods which TarFile provides:
1718
1719 def close(self):
1720 """Close the TarFile. In write-mode, two finishing zero blocks are
1721 appended to the archive.
1722 """
1723 if self.closed:
1724 return
1725
Guido van Rossumd8faa362007-04-27 19:54:29 +00001726 if self.mode in "aw":
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001727 self.fileobj.write(NUL * (BLOCKSIZE * 2))
1728 self.offset += (BLOCKSIZE * 2)
1729 # fill up the end with zero-blocks
1730 # (like option -b20 for tar does)
1731 blocks, remainder = divmod(self.offset, RECORDSIZE)
1732 if remainder > 0:
1733 self.fileobj.write(NUL * (RECORDSIZE - remainder))
1734
1735 if not self._extfileobj:
1736 self.fileobj.close()
1737 self.closed = True
1738
1739 def getmember(self, name):
1740 """Return a TarInfo object for member `name'. If `name' can not be
1741 found in the archive, KeyError is raised. If a member occurs more
Mark Dickinson934896d2009-02-21 20:59:32 +00001742 than once in the archive, its last occurrence is assumed to be the
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001743 most up-to-date version.
1744 """
Martin v. Löwisf3c56112004-09-18 09:08:52 +00001745 tarinfo = self._getmember(name)
1746 if tarinfo is None:
Thomas Wouters477c8d52006-05-27 19:21:47 +00001747 raise KeyError("filename %r not found" % name)
Martin v. Löwisf3c56112004-09-18 09:08:52 +00001748 return tarinfo
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001749
1750 def getmembers(self):
1751 """Return the members of the archive as a list of TarInfo objects. The
1752 list has the same order as the members in the archive.
1753 """
1754 self._check()
1755 if not self._loaded: # if we want to obtain a list of
1756 self._load() # all members, we first have to
1757 # scan the whole archive.
1758 return self.members
1759
1760 def getnames(self):
1761 """Return the members of the archive as a list of their names. It has
1762 the same order as the list returned by getmembers().
1763 """
Martin v. Löwisf3c56112004-09-18 09:08:52 +00001764 return [tarinfo.name for tarinfo in self.getmembers()]
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001765
1766 def gettarinfo(self, name=None, arcname=None, fileobj=None):
1767 """Create a TarInfo object for either the file `name' or the file
1768 object `fileobj' (using os.fstat on its file descriptor). You can
1769 modify some of the TarInfo's attributes before you add it using
1770 addfile(). If given, `arcname' specifies an alternative name for the
1771 file in the archive.
1772 """
1773 self._check("aw")
1774
1775 # When fileobj is given, replace name by
1776 # fileobj's real name.
1777 if fileobj is not None:
1778 name = fileobj.name
1779
1780 # Building the name of the member in the archive.
1781 # Backward slashes are converted to forward slashes,
1782 # Absolute paths are turned to relative paths.
1783 if arcname is None:
1784 arcname = name
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001785 drv, arcname = os.path.splitdrive(arcname)
Lars Gustäbelbfdfdda2009-08-28 19:59:59 +00001786 arcname = arcname.replace(os.sep, "/")
1787 arcname = arcname.lstrip("/")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001788
1789 # Now, fill the TarInfo object with
1790 # information specific for the file.
Guido van Rossumd8faa362007-04-27 19:54:29 +00001791 tarinfo = self.tarinfo()
1792 tarinfo.tarfile = self
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001793
1794 # Use os.stat or os.lstat, depending on platform
1795 # and if symlinks shall be resolved.
1796 if fileobj is None:
1797 if hasattr(os, "lstat") and not self.dereference:
1798 statres = os.lstat(name)
1799 else:
1800 statres = os.stat(name)
1801 else:
1802 statres = os.fstat(fileobj.fileno())
1803 linkname = ""
1804
1805 stmd = statres.st_mode
1806 if stat.S_ISREG(stmd):
1807 inode = (statres.st_ino, statres.st_dev)
Guido van Rossumd8faa362007-04-27 19:54:29 +00001808 if not self.dereference and statres.st_nlink > 1 and \
1809 inode in self.inodes and arcname != self.inodes[inode]:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001810 # Is it a hardlink to an already
1811 # archived file?
1812 type = LNKTYPE
1813 linkname = self.inodes[inode]
1814 else:
1815 # The inode is added only if its valid.
1816 # For win32 it is always 0.
1817 type = REGTYPE
1818 if inode[0]:
1819 self.inodes[inode] = arcname
1820 elif stat.S_ISDIR(stmd):
1821 type = DIRTYPE
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001822 elif stat.S_ISFIFO(stmd):
1823 type = FIFOTYPE
1824 elif stat.S_ISLNK(stmd):
1825 type = SYMTYPE
1826 linkname = os.readlink(name)
1827 elif stat.S_ISCHR(stmd):
1828 type = CHRTYPE
1829 elif stat.S_ISBLK(stmd):
1830 type = BLKTYPE
1831 else:
1832 return None
1833
1834 # Fill the TarInfo object with all
1835 # information we can get.
Martin v. Löwis5dbdc592005-08-27 10:07:56 +00001836 tarinfo.name = arcname
1837 tarinfo.mode = stmd
1838 tarinfo.uid = statres.st_uid
1839 tarinfo.gid = statres.st_gid
Lars Gustäbel2470ff12010-06-03 10:11:52 +00001840 if type == REGTYPE:
Martin v. Löwis61d77e02004-08-20 06:35:46 +00001841 tarinfo.size = statres.st_size
Martin v. Löwis5dbdc592005-08-27 10:07:56 +00001842 else:
Guido van Rossume2a383d2007-01-15 16:59:06 +00001843 tarinfo.size = 0
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001844 tarinfo.mtime = statres.st_mtime
Martin v. Löwis5dbdc592005-08-27 10:07:56 +00001845 tarinfo.type = type
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001846 tarinfo.linkname = linkname
1847 if pwd:
1848 try:
1849 tarinfo.uname = pwd.getpwuid(tarinfo.uid)[0]
1850 except KeyError:
1851 pass
1852 if grp:
1853 try:
1854 tarinfo.gname = grp.getgrgid(tarinfo.gid)[0]
1855 except KeyError:
1856 pass
1857
1858 if type in (CHRTYPE, BLKTYPE):
1859 if hasattr(os, "major") and hasattr(os, "minor"):
1860 tarinfo.devmajor = os.major(statres.st_rdev)
1861 tarinfo.devminor = os.minor(statres.st_rdev)
1862 return tarinfo
1863
1864 def list(self, verbose=True):
1865 """Print a table of contents to sys.stdout. If `verbose' is False, only
1866 the names of the members are printed. If it is True, an `ls -l'-like
1867 output is produced.
1868 """
1869 self._check()
1870
1871 for tarinfo in self:
1872 if verbose:
Giampaolo Rodola'ffa1d0b2012-05-15 15:30:25 +02001873 print(stat.filemode(tarinfo.mode), end=' ')
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001874 print("%s/%s" % (tarinfo.uname or tarinfo.uid,
1875 tarinfo.gname or tarinfo.gid), end=' ')
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001876 if tarinfo.ischr() or tarinfo.isblk():
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001877 print("%10s" % ("%d,%d" \
1878 % (tarinfo.devmajor, tarinfo.devminor)), end=' ')
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001879 else:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001880 print("%10d" % tarinfo.size, end=' ')
1881 print("%d-%02d-%02d %02d:%02d:%02d" \
1882 % time.localtime(tarinfo.mtime)[:6], end=' ')
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001883
Guido van Rossumd8faa362007-04-27 19:54:29 +00001884 print(tarinfo.name + ("/" if tarinfo.isdir() else ""), end=' ')
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001885
1886 if verbose:
1887 if tarinfo.issym():
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001888 print("->", tarinfo.linkname, end=' ')
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001889 if tarinfo.islnk():
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001890 print("link to", tarinfo.linkname, end=' ')
1891 print()
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001892
Raymond Hettingera63a3122011-01-26 20:34:14 +00001893 def add(self, name, arcname=None, recursive=True, exclude=None, *, filter=None):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001894 """Add the file `name' to the archive. `name' may be any type of file
1895 (directory, fifo, symbolic link, etc.). If given, `arcname'
1896 specifies an alternative name for the file in the archive.
1897 Directories are added recursively by default. This can be avoided by
Guido van Rossum486364b2007-06-30 05:01:58 +00001898 setting `recursive' to False. `exclude' is a function that should
Lars Gustäbel049d2aa2009-09-12 10:44:00 +00001899 return True for each filename to be excluded. `filter' is a function
1900 that expects a TarInfo object argument and returns the changed
1901 TarInfo object, if it returns None the TarInfo object will be
1902 excluded from the archive.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001903 """
1904 self._check("aw")
1905
1906 if arcname is None:
1907 arcname = name
1908
Guido van Rossum486364b2007-06-30 05:01:58 +00001909 # Exclude pathnames.
Lars Gustäbel049d2aa2009-09-12 10:44:00 +00001910 if exclude is not None:
1911 import warnings
1912 warnings.warn("use the filter argument instead",
1913 DeprecationWarning, 2)
1914 if exclude(name):
1915 self._dbg(2, "tarfile: Excluded %r" % name)
1916 return
Guido van Rossum486364b2007-06-30 05:01:58 +00001917
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001918 # Skip if somebody tries to archive the archive...
Thomas Wouters902d6eb2007-01-09 23:18:33 +00001919 if self.name is not None and os.path.abspath(name) == self.name:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001920 self._dbg(2, "tarfile: Skipped %r" % name)
1921 return
1922
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001923 self._dbg(1, name)
1924
1925 # Create a TarInfo object from the file.
1926 tarinfo = self.gettarinfo(name, arcname)
1927
1928 if tarinfo is None:
1929 self._dbg(1, "tarfile: Unsupported type %r" % name)
1930 return
1931
Lars Gustäbel049d2aa2009-09-12 10:44:00 +00001932 # Change or exclude the TarInfo object.
1933 if filter is not None:
1934 tarinfo = filter(tarinfo)
1935 if tarinfo is None:
1936 self._dbg(2, "tarfile: Excluded %r" % name)
1937 return
1938
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001939 # Append the tar header and data to the archive.
1940 if tarinfo.isreg():
Andrew Svetlov718df1d2012-11-29 14:20:47 +02001941 with bltn_open(name, "rb") as f:
1942 self.addfile(tarinfo, f)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001943
Martin v. Löwis5dbdc592005-08-27 10:07:56 +00001944 elif tarinfo.isdir():
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001945 self.addfile(tarinfo)
1946 if recursive:
1947 for f in os.listdir(name):
Lars Gustäbel049d2aa2009-09-12 10:44:00 +00001948 self.add(os.path.join(name, f), os.path.join(arcname, f),
Raymond Hettingera63a3122011-01-26 20:34:14 +00001949 recursive, exclude, filter=filter)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001950
Martin v. Löwis5dbdc592005-08-27 10:07:56 +00001951 else:
1952 self.addfile(tarinfo)
1953
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001954 def addfile(self, tarinfo, fileobj=None):
1955 """Add the TarInfo object `tarinfo' to the archive. If `fileobj' is
1956 given, tarinfo.size bytes are read from it and added to the archive.
1957 You can create TarInfo objects using gettarinfo().
1958 On Windows platforms, `fileobj' should always be opened with mode
1959 'rb' to avoid irritation about the file size.
1960 """
1961 self._check("aw")
1962
Thomas Wouters89f507f2006-12-13 04:49:30 +00001963 tarinfo = copy.copy(tarinfo)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001964
Guido van Rossume7ba4952007-06-06 23:52:48 +00001965 buf = tarinfo.tobuf(self.format, self.encoding, self.errors)
Thomas Wouters89f507f2006-12-13 04:49:30 +00001966 self.fileobj.write(buf)
1967 self.offset += len(buf)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001968
1969 # If there's data to follow, append it.
1970 if fileobj is not None:
1971 copyfileobj(fileobj, self.fileobj, tarinfo.size)
1972 blocks, remainder = divmod(tarinfo.size, BLOCKSIZE)
1973 if remainder > 0:
1974 self.fileobj.write(NUL * (BLOCKSIZE - remainder))
1975 blocks += 1
1976 self.offset += blocks * BLOCKSIZE
1977
Martin v. Löwisf3c56112004-09-18 09:08:52 +00001978 self.members.append(tarinfo)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001979
Martin v. Löwis00a73e72005-03-04 19:40:34 +00001980 def extractall(self, path=".", members=None):
1981 """Extract all members from the archive to the current working
1982 directory and set owner, modification time and permissions on
1983 directories afterwards. `path' specifies a different directory
1984 to extract to. `members' is optional and must be a subset of the
1985 list returned by getmembers().
1986 """
1987 directories = []
1988
1989 if members is None:
1990 members = self
1991
1992 for tarinfo in members:
1993 if tarinfo.isdir():
Christian Heimes2202f872008-02-06 14:31:34 +00001994 # Extract directories with a safe mode.
Martin v. Löwis00a73e72005-03-04 19:40:34 +00001995 directories.append(tarinfo)
Christian Heimes2202f872008-02-06 14:31:34 +00001996 tarinfo = copy.copy(tarinfo)
1997 tarinfo.mode = 0o700
Martin v. Löwis16f344d2010-11-01 21:39:13 +00001998 # Do not set_attrs directories, as we will do that further down
1999 self.extract(tarinfo, path, set_attrs=not tarinfo.isdir())
Martin v. Löwis00a73e72005-03-04 19:40:34 +00002000
2001 # Reverse sort directories.
Raymond Hettingerd4cb56d2008-01-30 02:55:10 +00002002 directories.sort(key=lambda a: a.name)
Martin v. Löwis00a73e72005-03-04 19:40:34 +00002003 directories.reverse()
2004
2005 # Set correct owner, mtime and filemode on directories.
2006 for tarinfo in directories:
Christian Heimesfaf2f632008-01-06 16:59:19 +00002007 dirpath = os.path.join(path, tarinfo.name)
Martin v. Löwis00a73e72005-03-04 19:40:34 +00002008 try:
Christian Heimesfaf2f632008-01-06 16:59:19 +00002009 self.chown(tarinfo, dirpath)
2010 self.utime(tarinfo, dirpath)
2011 self.chmod(tarinfo, dirpath)
Guido van Rossumb940e112007-01-10 16:19:56 +00002012 except ExtractError as e:
Martin v. Löwis00a73e72005-03-04 19:40:34 +00002013 if self.errorlevel > 1:
2014 raise
2015 else:
2016 self._dbg(1, "tarfile: %s" % e)
2017
Martin v. Löwis16f344d2010-11-01 21:39:13 +00002018 def extract(self, member, path="", set_attrs=True):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002019 """Extract a member from the archive to the current working directory,
2020 using its full name. Its file information is extracted as accurately
2021 as possible. `member' may be a filename or a TarInfo object. You can
Martin v. Löwis16f344d2010-11-01 21:39:13 +00002022 specify a different directory using `path'. File attributes (owner,
2023 mtime, mode) are set unless `set_attrs' is False.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002024 """
2025 self._check("r")
2026
Guido van Rossum3172c5d2007-10-16 18:12:55 +00002027 if isinstance(member, str):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002028 tarinfo = self.getmember(member)
Guido van Rossumd8faa362007-04-27 19:54:29 +00002029 else:
2030 tarinfo = member
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002031
Neal Norwitza4f651a2004-07-20 22:07:44 +00002032 # Prepare the link target for makelink().
2033 if tarinfo.islnk():
2034 tarinfo._link_target = os.path.join(path, tarinfo.linkname)
2035
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002036 try:
Martin v. Löwis16f344d2010-11-01 21:39:13 +00002037 self._extract_member(tarinfo, os.path.join(path, tarinfo.name),
2038 set_attrs=set_attrs)
Guido van Rossumb940e112007-01-10 16:19:56 +00002039 except EnvironmentError as e:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002040 if self.errorlevel > 0:
2041 raise
2042 else:
2043 if e.filename is None:
2044 self._dbg(1, "tarfile: %s" % e.strerror)
2045 else:
2046 self._dbg(1, "tarfile: %s %r" % (e.strerror, e.filename))
Guido van Rossumb940e112007-01-10 16:19:56 +00002047 except ExtractError as e:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002048 if self.errorlevel > 1:
2049 raise
2050 else:
2051 self._dbg(1, "tarfile: %s" % e)
2052
2053 def extractfile(self, member):
2054 """Extract a member from the archive as a file object. `member' may be
Lars Gustäbel7a919e92012-05-05 18:15:03 +02002055 a filename or a TarInfo object. If `member' is a regular file or a
2056 link, an io.BufferedReader object is returned. Otherwise, None is
2057 returned.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002058 """
2059 self._check("r")
2060
Guido van Rossum3172c5d2007-10-16 18:12:55 +00002061 if isinstance(member, str):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002062 tarinfo = self.getmember(member)
Guido van Rossumd8faa362007-04-27 19:54:29 +00002063 else:
2064 tarinfo = member
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002065
Lars Gustäbel7a919e92012-05-05 18:15:03 +02002066 if tarinfo.isreg() or tarinfo.type not in SUPPORTED_TYPES:
2067 # Members with unknown types are treated as regular files.
Lars Gustäbelb062a2f2012-05-14 13:18:16 +02002068 return self.fileobject(self, tarinfo)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002069
2070 elif tarinfo.islnk() or tarinfo.issym():
2071 if isinstance(self.fileobj, _Stream):
2072 # A small but ugly workaround for the case that someone tries
2073 # to extract a (sym)link as a file-object from a non-seekable
2074 # stream of tar blocks.
Thomas Wouters477c8d52006-05-27 19:21:47 +00002075 raise StreamError("cannot extract (sym)link as file object")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002076 else:
Georg Brandl7eb4b7d2005-07-22 21:49:32 +00002077 # A (sym)link's file object is its target's file object.
Lars Gustäbel1b512722010-06-03 12:45:16 +00002078 return self.extractfile(self._find_link_target(tarinfo))
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002079 else:
2080 # If there's no data associated with the member (directory, chrdev,
2081 # blkdev, etc.), return None instead of a file object.
2082 return None
2083
Martin v. Löwis16f344d2010-11-01 21:39:13 +00002084 def _extract_member(self, tarinfo, targetpath, set_attrs=True):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002085 """Extract the TarInfo object tarinfo to a physical
2086 file called targetpath.
2087 """
2088 # Fetch the TarInfo object for the given name
2089 # and build the destination pathname, replacing
2090 # forward slashes to platform specific separators.
Lars Gustäbelbfdfdda2009-08-28 19:59:59 +00002091 targetpath = targetpath.rstrip("/")
2092 targetpath = targetpath.replace("/", os.sep)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002093
2094 # Create all upper directories.
2095 upperdirs = os.path.dirname(targetpath)
2096 if upperdirs and not os.path.exists(upperdirs):
Christian Heimes2202f872008-02-06 14:31:34 +00002097 # Create directories that are not part of the archive with
2098 # default permissions.
Thomas Woutersb2137042007-02-01 18:02:27 +00002099 os.makedirs(upperdirs)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002100
2101 if tarinfo.islnk() or tarinfo.issym():
2102 self._dbg(1, "%s -> %s" % (tarinfo.name, tarinfo.linkname))
2103 else:
2104 self._dbg(1, tarinfo.name)
2105
2106 if tarinfo.isreg():
2107 self.makefile(tarinfo, targetpath)
2108 elif tarinfo.isdir():
2109 self.makedir(tarinfo, targetpath)
2110 elif tarinfo.isfifo():
2111 self.makefifo(tarinfo, targetpath)
2112 elif tarinfo.ischr() or tarinfo.isblk():
2113 self.makedev(tarinfo, targetpath)
2114 elif tarinfo.islnk() or tarinfo.issym():
2115 self.makelink(tarinfo, targetpath)
2116 elif tarinfo.type not in SUPPORTED_TYPES:
2117 self.makeunknown(tarinfo, targetpath)
2118 else:
2119 self.makefile(tarinfo, targetpath)
2120
Martin v. Löwis16f344d2010-11-01 21:39:13 +00002121 if set_attrs:
2122 self.chown(tarinfo, targetpath)
2123 if not tarinfo.issym():
2124 self.chmod(tarinfo, targetpath)
2125 self.utime(tarinfo, targetpath)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002126
2127 #--------------------------------------------------------------------------
2128 # Below are the different file methods. They are called via
2129 # _extract_member() when extract() is called. They can be replaced in a
2130 # subclass to implement other functionality.
2131
2132 def makedir(self, tarinfo, targetpath):
2133 """Make a directory called targetpath.
2134 """
2135 try:
Christian Heimes2202f872008-02-06 14:31:34 +00002136 # Use a safe mode for the directory, the real mode is set
2137 # later in _extract_member().
2138 os.mkdir(targetpath, 0o700)
Florent Xicluna68f71a32011-10-28 16:06:23 +02002139 except FileExistsError:
2140 pass
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002141
2142 def makefile(self, tarinfo, targetpath):
2143 """Make a file called targetpath.
2144 """
Lars Gustäbel9cbdd752010-10-29 09:08:19 +00002145 source = self.fileobj
2146 source.seek(tarinfo.offset_data)
Andrew Svetlov718df1d2012-11-29 14:20:47 +02002147 with bltn_open(targetpath, "wb") as target:
2148 if tarinfo.sparse is not None:
2149 for offset, size in tarinfo.sparse:
2150 target.seek(offset)
2151 copyfileobj(source, target, size)
2152 else:
2153 copyfileobj(source, target, tarinfo.size)
2154 target.seek(tarinfo.size)
2155 target.truncate()
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002156
2157 def makeunknown(self, tarinfo, targetpath):
2158 """Make a file from a TarInfo object with an unknown type
2159 at targetpath.
2160 """
2161 self.makefile(tarinfo, targetpath)
2162 self._dbg(1, "tarfile: Unknown file type %r, " \
2163 "extracted as regular file." % tarinfo.type)
2164
2165 def makefifo(self, tarinfo, targetpath):
2166 """Make a fifo called targetpath.
2167 """
2168 if hasattr(os, "mkfifo"):
2169 os.mkfifo(targetpath)
2170 else:
Thomas Wouters477c8d52006-05-27 19:21:47 +00002171 raise ExtractError("fifo not supported by system")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002172
2173 def makedev(self, tarinfo, targetpath):
2174 """Make a character or block device called targetpath.
2175 """
2176 if not hasattr(os, "mknod") or not hasattr(os, "makedev"):
Thomas Wouters477c8d52006-05-27 19:21:47 +00002177 raise ExtractError("special devices not supported by system")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002178
2179 mode = tarinfo.mode
2180 if tarinfo.isblk():
2181 mode |= stat.S_IFBLK
2182 else:
2183 mode |= stat.S_IFCHR
2184
2185 os.mknod(targetpath, mode,
2186 os.makedev(tarinfo.devmajor, tarinfo.devminor))
2187
2188 def makelink(self, tarinfo, targetpath):
2189 """Make a (symbolic) link called targetpath. If it cannot be created
2190 (platform limitation), we try to make a copy of the referenced file
2191 instead of a link.
2192 """
Brian Curtind40e6f72010-07-08 21:39:08 +00002193 try:
Lars Gustäbel1b512722010-06-03 12:45:16 +00002194 # For systems that support symbolic and hard links.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002195 if tarinfo.issym():
Lars Gustäbelbfdfdda2009-08-28 19:59:59 +00002196 os.symlink(tarinfo.linkname, targetpath)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002197 else:
Neal Norwitza4f651a2004-07-20 22:07:44 +00002198 # See extract().
Lars Gustäbel1b512722010-06-03 12:45:16 +00002199 if os.path.exists(tarinfo._link_target):
2200 os.link(tarinfo._link_target, targetpath)
2201 else:
Brian Curtin82df53e2010-09-24 21:04:05 +00002202 self._extract_member(self._find_link_target(tarinfo),
2203 targetpath)
Brian Curtin16633fa2010-07-09 13:54:27 +00002204 except symlink_exception:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002205 try:
Brian Curtin16633fa2010-07-09 13:54:27 +00002206 self._extract_member(self._find_link_target(tarinfo),
2207 targetpath)
Lars Gustäbel1b512722010-06-03 12:45:16 +00002208 except KeyError:
2209 raise ExtractError("unable to resolve link inside archive")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002210
2211 def chown(self, tarinfo, targetpath):
2212 """Set owner of targetpath according to tarinfo.
2213 """
2214 if pwd and hasattr(os, "geteuid") and os.geteuid() == 0:
2215 # We have to be root to do so.
2216 try:
2217 g = grp.getgrnam(tarinfo.gname)[2]
2218 except KeyError:
Lars Gustäbel2e7ddd32011-09-05 16:58:14 +02002219 g = tarinfo.gid
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002220 try:
2221 u = pwd.getpwnam(tarinfo.uname)[2]
2222 except KeyError:
Lars Gustäbel2e7ddd32011-09-05 16:58:14 +02002223 u = tarinfo.uid
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002224 try:
2225 if tarinfo.issym() and hasattr(os, "lchown"):
2226 os.lchown(targetpath, u, g)
2227 else:
Andrew MacIntyre7970d202003-02-19 12:51:34 +00002228 if sys.platform != "os2emx":
2229 os.chown(targetpath, u, g)
Guido van Rossumb940e112007-01-10 16:19:56 +00002230 except EnvironmentError as e:
Thomas Wouters477c8d52006-05-27 19:21:47 +00002231 raise ExtractError("could not change owner")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002232
2233 def chmod(self, tarinfo, targetpath):
2234 """Set file permissions of targetpath according to tarinfo.
2235 """
Jack Jansen834eff62003-03-07 12:47:06 +00002236 if hasattr(os, 'chmod'):
2237 try:
2238 os.chmod(targetpath, tarinfo.mode)
Guido van Rossumb940e112007-01-10 16:19:56 +00002239 except EnvironmentError as e:
Thomas Wouters477c8d52006-05-27 19:21:47 +00002240 raise ExtractError("could not change mode")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002241
2242 def utime(self, tarinfo, targetpath):
2243 """Set modification time of targetpath according to tarinfo.
2244 """
Jack Jansen834eff62003-03-07 12:47:06 +00002245 if not hasattr(os, 'utime'):
Tim Petersf9347782003-03-07 15:36:41 +00002246 return
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002247 try:
2248 os.utime(targetpath, (tarinfo.mtime, tarinfo.mtime))
Guido van Rossumb940e112007-01-10 16:19:56 +00002249 except EnvironmentError as e:
Thomas Wouters477c8d52006-05-27 19:21:47 +00002250 raise ExtractError("could not change modification time")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002251
2252 #--------------------------------------------------------------------------
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002253 def next(self):
2254 """Return the next member of the archive as a TarInfo object, when
2255 TarFile is opened for reading. Return None if there is no more
2256 available.
2257 """
2258 self._check("ra")
2259 if self.firstmember is not None:
2260 m = self.firstmember
2261 self.firstmember = None
2262 return m
2263
2264 # Read the next block.
Andrew M. Kuchling864bba12004-07-10 22:02:11 +00002265 self.fileobj.seek(self.offset)
Lars Gustäbel9520a432009-11-22 18:48:49 +00002266 tarinfo = None
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002267 while True:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002268 try:
Guido van Rossumd8faa362007-04-27 19:54:29 +00002269 tarinfo = self.tarinfo.fromtarfile(self)
Lars Gustäbel9520a432009-11-22 18:48:49 +00002270 except EOFHeaderError as e:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002271 if self.ignore_zeros:
Thomas Wouters902d6eb2007-01-09 23:18:33 +00002272 self._dbg(2, "0x%X: %s" % (self.offset, e))
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002273 self.offset += BLOCKSIZE
2274 continue
Lars Gustäbel9520a432009-11-22 18:48:49 +00002275 except InvalidHeaderError as e:
2276 if self.ignore_zeros:
2277 self._dbg(2, "0x%X: %s" % (self.offset, e))
2278 self.offset += BLOCKSIZE
2279 continue
2280 elif self.offset == 0:
2281 raise ReadError(str(e))
2282 except EmptyHeaderError:
2283 if self.offset == 0:
2284 raise ReadError("empty file")
2285 except TruncatedHeaderError as e:
2286 if self.offset == 0:
2287 raise ReadError(str(e))
2288 except SubsequentHeaderError as e:
2289 raise ReadError(str(e))
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002290 break
2291
Lars Gustäbel9520a432009-11-22 18:48:49 +00002292 if tarinfo is not None:
2293 self.members.append(tarinfo)
2294 else:
2295 self._loaded = True
2296
Thomas Wouters477c8d52006-05-27 19:21:47 +00002297 return tarinfo
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002298
2299 #--------------------------------------------------------------------------
2300 # Little helper methods:
2301
Lars Gustäbel1b512722010-06-03 12:45:16 +00002302 def _getmember(self, name, tarinfo=None, normalize=False):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002303 """Find an archive member by name from bottom to top.
2304 If tarinfo is given, it is used as the starting point.
2305 """
Martin v. Löwisf3c56112004-09-18 09:08:52 +00002306 # Ensure that all members have been loaded.
2307 members = self.getmembers()
2308
Lars Gustäbel1b512722010-06-03 12:45:16 +00002309 # Limit the member search list up to tarinfo.
2310 if tarinfo is not None:
2311 members = members[:members.index(tarinfo)]
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002312
Lars Gustäbel1b512722010-06-03 12:45:16 +00002313 if normalize:
2314 name = os.path.normpath(name)
2315
2316 for member in reversed(members):
2317 if normalize:
2318 member_name = os.path.normpath(member.name)
2319 else:
2320 member_name = member.name
2321
2322 if name == member_name:
2323 return member
Andrew M. Kuchling864bba12004-07-10 22:02:11 +00002324
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002325 def _load(self):
2326 """Read through the entire archive file and look for readable
2327 members.
2328 """
2329 while True:
2330 tarinfo = self.next()
2331 if tarinfo is None:
2332 break
2333 self._loaded = True
2334
2335 def _check(self, mode=None):
2336 """Check if TarFile is still open, and if the operation's mode
2337 corresponds to TarFile's mode.
2338 """
2339 if self.closed:
Thomas Wouters477c8d52006-05-27 19:21:47 +00002340 raise IOError("%s is closed" % self.__class__.__name__)
Guido van Rossumd8faa362007-04-27 19:54:29 +00002341 if mode is not None and self.mode not in mode:
2342 raise IOError("bad operation for mode %r" % self.mode)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002343
Lars Gustäbel1b512722010-06-03 12:45:16 +00002344 def _find_link_target(self, tarinfo):
2345 """Find the target member of a symlink or hardlink member in the
2346 archive.
2347 """
2348 if tarinfo.issym():
2349 # Always search the entire archive.
Lars Gustäbel1ef9eda2012-04-24 21:04:40 +02002350 linkname = "/".join(filter(None, (os.path.dirname(tarinfo.name), tarinfo.linkname)))
Lars Gustäbel1b512722010-06-03 12:45:16 +00002351 limit = None
2352 else:
2353 # Search the archive before the link, because a hard link is
2354 # just a reference to an already archived file.
2355 linkname = tarinfo.linkname
2356 limit = tarinfo
2357
2358 member = self._getmember(linkname, tarinfo=limit, normalize=True)
2359 if member is None:
2360 raise KeyError("linkname %r not found" % linkname)
2361 return member
2362
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002363 def __iter__(self):
2364 """Provide an iterator object.
2365 """
2366 if self._loaded:
2367 return iter(self.members)
2368 else:
2369 return TarIter(self)
2370
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002371 def _dbg(self, level, msg):
2372 """Write debugging output to sys.stderr.
2373 """
2374 if level <= self.debug:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00002375 print(msg, file=sys.stderr)
Lars Gustäbel01385812010-03-03 12:08:54 +00002376
2377 def __enter__(self):
2378 self._check()
2379 return self
2380
2381 def __exit__(self, type, value, traceback):
2382 if type is None:
2383 self.close()
2384 else:
2385 # An exception occurred. We must not call close() because
2386 # it would try to write end-of-archive blocks and padding.
2387 if not self._extfileobj:
2388 self.fileobj.close()
2389 self.closed = True
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002390# class TarFile
2391
2392class TarIter:
2393 """Iterator Class.
2394
2395 for tarinfo in TarFile(...):
2396 suite...
2397 """
2398
2399 def __init__(self, tarfile):
2400 """Construct a TarIter object.
2401 """
2402 self.tarfile = tarfile
Martin v. Löwis637431b2005-03-03 23:12:42 +00002403 self.index = 0
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002404 def __iter__(self):
2405 """Return iterator object.
2406 """
2407 return self
Georg Brandla18af4e2007-04-21 15:47:16 +00002408 def __next__(self):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002409 """Return the next item using TarFile's next() method.
2410 When all members have been read, set TarFile as _loaded.
2411 """
Martin v. Löwis637431b2005-03-03 23:12:42 +00002412 # Fix for SF #1100429: Under rare circumstances it can
2413 # happen that getmembers() is called during iteration,
2414 # which will cause TarIter to stop prematurely.
Serhiy Storchaka263fab92013-05-09 14:22:26 +03002415
2416 if self.index == 0 and self.tarfile.firstmember is not None:
2417 tarinfo = self.tarfile.next()
2418 elif self.index < len(self.tarfile.members):
2419 tarinfo = self.tarfile.members[self.index]
2420 elif not self.tarfile._loaded:
Martin v. Löwis637431b2005-03-03 23:12:42 +00002421 tarinfo = self.tarfile.next()
2422 if not tarinfo:
2423 self.tarfile._loaded = True
2424 raise StopIteration
2425 else:
Serhiy Storchaka263fab92013-05-09 14:22:26 +03002426 raise StopIteration
Martin v. Löwis637431b2005-03-03 23:12:42 +00002427 self.index += 1
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002428 return tarinfo
2429
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002430#--------------------
2431# exported functions
2432#--------------------
2433def is_tarfile(name):
2434 """Return True if name points to a tar archive that we
2435 are able to handle, else return False.
2436 """
2437 try:
2438 t = open(name)
2439 t.close()
2440 return True
2441 except TarError:
2442 return False
2443
Guido van Rossume7ba4952007-06-06 23:52:48 +00002444bltn_open = open
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002445open = TarFile.open