blob: 81b13a678f7df9da5722f7e42fd08082fd25fcfd [file] [log] [blame]
Benjamin Peterson90f5ba52010-03-11 22:53:45 +00001#!/usr/bin/env python3
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002#-------------------------------------------------------------------
3# tarfile.py
4#-------------------------------------------------------------------
Christian Heimes9c1257e2007-11-04 11:37:22 +00005# Copyright (C) 2002 Lars Gustaebel <lars@gustaebel.de>
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00006# All rights reserved.
7#
8# Permission is hereby granted, free of charge, to any person
9# obtaining a copy of this software and associated documentation
10# files (the "Software"), to deal in the Software without
11# restriction, including without limitation the rights to use,
12# copy, modify, merge, publish, distribute, sublicense, and/or sell
13# copies of the Software, and to permit persons to whom the
14# Software is furnished to do so, subject to the following
15# conditions:
16#
17# The above copyright notice and this permission notice shall be
18# included in all copies or substantial portions of the Software.
19#
20# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
21# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
22# OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
23# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
24# HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
25# WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
26# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
27# OTHER DEALINGS IN THE SOFTWARE.
28#
29"""Read from and write to tar format archives.
30"""
31
32__version__ = "$Revision$"
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +000033
Guido van Rossumd8faa362007-04-27 19:54:29 +000034version = "0.9.0"
Guido van Rossum98297ee2007-11-06 21:34:58 +000035__author__ = "Lars Gust\u00e4bel (lars@gustaebel.de)"
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +000036__date__ = "$Date$"
37__cvsid__ = "$Id$"
Guido van Rossum98297ee2007-11-06 21:34:58 +000038__credits__ = "Gustavo Niemeyer, Niels Gust\u00e4bel, Richard Townsend."
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +000039
40#---------
41# Imports
42#---------
43import sys
44import os
45import shutil
46import stat
47import errno
48import time
49import struct
Thomas Wouters89f507f2006-12-13 04:49:30 +000050import copy
Guido van Rossumd8faa362007-04-27 19:54:29 +000051import re
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +000052
53try:
54 import grp, pwd
55except ImportError:
56 grp = pwd = None
57
58# from tarfile import *
59__all__ = ["TarFile", "TarInfo", "is_tarfile", "TarError"]
60
Georg Brandl1a3284e2007-12-02 09:40:06 +000061from builtins import open as _open # Since 'open' is TarFile.open
Guido van Rossum8f78fe92006-08-24 04:03:53 +000062
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +000063#---------------------------------------------------------
64# tar constants
65#---------------------------------------------------------
Lars Gustäbelb506dc32007-08-07 18:36:16 +000066NUL = b"\0" # the null character
Guido van Rossumd8faa362007-04-27 19:54:29 +000067BLOCKSIZE = 512 # length of processing blocks
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +000068RECORDSIZE = BLOCKSIZE * 20 # length of records
Lars Gustäbelb506dc32007-08-07 18:36:16 +000069GNU_MAGIC = b"ustar \0" # magic gnu tar string
70POSIX_MAGIC = b"ustar\x0000" # magic posix tar string
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +000071
Guido van Rossumd8faa362007-04-27 19:54:29 +000072LENGTH_NAME = 100 # maximum length of a filename
73LENGTH_LINK = 100 # maximum length of a linkname
74LENGTH_PREFIX = 155 # maximum length of the prefix field
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +000075
Lars Gustäbelb506dc32007-08-07 18:36:16 +000076REGTYPE = b"0" # regular file
77AREGTYPE = b"\0" # regular file
78LNKTYPE = b"1" # link (inside tarfile)
79SYMTYPE = b"2" # symbolic link
80CHRTYPE = b"3" # character special device
81BLKTYPE = b"4" # block special device
82DIRTYPE = b"5" # directory
83FIFOTYPE = b"6" # fifo special device
84CONTTYPE = b"7" # contiguous file
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +000085
Lars Gustäbelb506dc32007-08-07 18:36:16 +000086GNUTYPE_LONGNAME = b"L" # GNU tar longname
87GNUTYPE_LONGLINK = b"K" # GNU tar longlink
88GNUTYPE_SPARSE = b"S" # GNU tar sparse file
Guido van Rossumd8faa362007-04-27 19:54:29 +000089
Lars Gustäbelb506dc32007-08-07 18:36:16 +000090XHDTYPE = b"x" # POSIX.1-2001 extended header
91XGLTYPE = b"g" # POSIX.1-2001 global header
92SOLARIS_XHDTYPE = b"X" # Solaris extended header
Guido van Rossumd8faa362007-04-27 19:54:29 +000093
94USTAR_FORMAT = 0 # POSIX.1-1988 (ustar) format
95GNU_FORMAT = 1 # GNU tar format
96PAX_FORMAT = 2 # POSIX.1-2001 (pax) format
97DEFAULT_FORMAT = GNU_FORMAT
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +000098
99#---------------------------------------------------------
100# tarfile constants
101#---------------------------------------------------------
Guido van Rossumd8faa362007-04-27 19:54:29 +0000102# File types that tarfile supports:
103SUPPORTED_TYPES = (REGTYPE, AREGTYPE, LNKTYPE,
104 SYMTYPE, DIRTYPE, FIFOTYPE,
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000105 CONTTYPE, CHRTYPE, BLKTYPE,
106 GNUTYPE_LONGNAME, GNUTYPE_LONGLINK,
107 GNUTYPE_SPARSE)
108
Guido van Rossumd8faa362007-04-27 19:54:29 +0000109# File types that will be treated as a regular file.
110REGULAR_TYPES = (REGTYPE, AREGTYPE,
111 CONTTYPE, GNUTYPE_SPARSE)
112
113# File types that are part of the GNU tar format.
114GNU_TYPES = (GNUTYPE_LONGNAME, GNUTYPE_LONGLINK,
115 GNUTYPE_SPARSE)
116
117# Fields from a pax header that override a TarInfo attribute.
118PAX_FIELDS = ("path", "linkpath", "size", "mtime",
119 "uid", "gid", "uname", "gname")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000120
Guido van Rossume7ba4952007-06-06 23:52:48 +0000121# Fields in a pax header that are numbers, all other fields
122# are treated as strings.
123PAX_NUMBER_FIELDS = {
124 "atime": float,
125 "ctime": float,
126 "mtime": float,
127 "uid": int,
128 "gid": int,
129 "size": int
130}
131
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000132#---------------------------------------------------------
133# Bits used in the mode field, values in octal.
134#---------------------------------------------------------
Guido van Rossumcd16bf62007-06-13 18:07:49 +0000135S_IFLNK = 0o120000 # symbolic link
136S_IFREG = 0o100000 # regular file
137S_IFBLK = 0o060000 # block device
138S_IFDIR = 0o040000 # directory
139S_IFCHR = 0o020000 # character device
140S_IFIFO = 0o010000 # fifo
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000141
Guido van Rossumcd16bf62007-06-13 18:07:49 +0000142TSUID = 0o4000 # set UID on execution
143TSGID = 0o2000 # set GID on execution
144TSVTX = 0o1000 # reserved
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000145
Guido van Rossumcd16bf62007-06-13 18:07:49 +0000146TUREAD = 0o400 # read by owner
147TUWRITE = 0o200 # write by owner
148TUEXEC = 0o100 # execute/search by owner
149TGREAD = 0o040 # read by group
150TGWRITE = 0o020 # write by group
151TGEXEC = 0o010 # execute/search by group
152TOREAD = 0o004 # read by other
153TOWRITE = 0o002 # write by other
154TOEXEC = 0o001 # execute/search by other
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000155
156#---------------------------------------------------------
Guido van Rossumd8faa362007-04-27 19:54:29 +0000157# initialization
158#---------------------------------------------------------
159ENCODING = sys.getfilesystemencoding()
160if ENCODING is None:
Lars Gustäbel3741eff2007-08-21 12:17:05 +0000161 ENCODING = "ascii"
Guido van Rossumd8faa362007-04-27 19:54:29 +0000162
163#---------------------------------------------------------
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000164# Some useful functions
165#---------------------------------------------------------
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000166
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000167def stn(s, length, encoding, errors):
168 """Convert a string to a null-terminated bytes object.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000169 """
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000170 s = s.encode(encoding, errors)
Thomas Wouters89f507f2006-12-13 04:49:30 +0000171 return s[:length] + (length - len(s)) * NUL
Thomas Wouters477c8d52006-05-27 19:21:47 +0000172
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000173def nts(s, encoding, errors):
174 """Convert a null-terminated bytes object to a string.
Guido van Rossumd8faa362007-04-27 19:54:29 +0000175 """
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000176 p = s.find(b"\0")
177 if p != -1:
178 s = s[:p]
179 return s.decode(encoding, errors)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000180
Thomas Wouters477c8d52006-05-27 19:21:47 +0000181def nti(s):
182 """Convert a number field to a python number.
183 """
184 # There are two possible encodings for a number field, see
185 # itn() below.
Guido van Rossumcd16bf62007-06-13 18:07:49 +0000186 if s[0] != chr(0o200):
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000187 try:
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000188 n = int(nts(s, "ascii", "strict") or "0", 8)
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000189 except ValueError:
Lars Gustäbel9520a432009-11-22 18:48:49 +0000190 raise InvalidHeaderError("invalid header")
Thomas Wouters477c8d52006-05-27 19:21:47 +0000191 else:
Guido van Rossume2a383d2007-01-15 16:59:06 +0000192 n = 0
Guido van Rossum805365e2007-05-07 22:24:25 +0000193 for i in range(len(s) - 1):
Thomas Wouters477c8d52006-05-27 19:21:47 +0000194 n <<= 8
195 n += ord(s[i + 1])
196 return n
197
Guido van Rossumd8faa362007-04-27 19:54:29 +0000198def itn(n, digits=8, format=DEFAULT_FORMAT):
Thomas Wouters477c8d52006-05-27 19:21:47 +0000199 """Convert a python number to a number field.
200 """
201 # POSIX 1003.1-1988 requires numbers to be encoded as a string of
202 # octal digits followed by a null-byte, this allows values up to
203 # (8**(digits-1))-1. GNU tar allows storing numbers greater than
Guido van Rossumcd16bf62007-06-13 18:07:49 +0000204 # that if necessary. A leading 0o200 byte indicates this particular
Thomas Wouters477c8d52006-05-27 19:21:47 +0000205 # encoding, the following digits-1 bytes are a big-endian
206 # representation. This allows values up to (256**(digits-1))-1.
207 if 0 <= n < 8 ** (digits - 1):
Lars Gustäbela280ca752007-08-28 07:34:33 +0000208 s = bytes("%0*o" % (digits - 1, n), "ascii") + NUL
Thomas Wouters477c8d52006-05-27 19:21:47 +0000209 else:
Guido van Rossumd8faa362007-04-27 19:54:29 +0000210 if format != GNU_FORMAT or n >= 256 ** (digits - 1):
Thomas Wouters477c8d52006-05-27 19:21:47 +0000211 raise ValueError("overflow in number field")
212
213 if n < 0:
214 # XXX We mimic GNU tar's behaviour with negative numbers,
215 # this could raise OverflowError.
216 n = struct.unpack("L", struct.pack("l", n))[0]
217
Guido van Rossum254348e2007-11-21 19:29:53 +0000218 s = bytearray()
Guido van Rossum805365e2007-05-07 22:24:25 +0000219 for i in range(digits - 1):
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000220 s.insert(0, n & 0o377)
Thomas Wouters477c8d52006-05-27 19:21:47 +0000221 n >>= 8
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000222 s.insert(0, 0o200)
Thomas Wouters477c8d52006-05-27 19:21:47 +0000223 return s
224
225def calc_chksums(buf):
226 """Calculate the checksum for a member's header by summing up all
227 characters except for the chksum field which is treated as if
228 it was filled with spaces. According to the GNU tar sources,
229 some tars (Sun and NeXT) calculate chksum with signed char,
230 which will be different if there are chars in the buffer with
231 the high bit set. So we calculate two checksums, unsigned and
232 signed.
233 """
234 unsigned_chksum = 256 + sum(struct.unpack("148B", buf[:148]) + struct.unpack("356B", buf[156:512]))
235 signed_chksum = 256 + sum(struct.unpack("148b", buf[:148]) + struct.unpack("356b", buf[156:512]))
236 return unsigned_chksum, signed_chksum
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000237
238def copyfileobj(src, dst, length=None):
239 """Copy length bytes from fileobj src to fileobj dst.
240 If length is None, copy the entire content.
241 """
242 if length == 0:
243 return
244 if length is None:
245 shutil.copyfileobj(src, dst)
246 return
247
248 BUFSIZE = 16 * 1024
249 blocks, remainder = divmod(length, BUFSIZE)
Guido van Rossum805365e2007-05-07 22:24:25 +0000250 for b in range(blocks):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000251 buf = src.read(BUFSIZE)
252 if len(buf) < BUFSIZE:
Thomas Wouters477c8d52006-05-27 19:21:47 +0000253 raise IOError("end of file reached")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000254 dst.write(buf)
255
256 if remainder != 0:
257 buf = src.read(remainder)
258 if len(buf) < remainder:
Thomas Wouters477c8d52006-05-27 19:21:47 +0000259 raise IOError("end of file reached")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000260 dst.write(buf)
261 return
262
263filemode_table = (
Andrew M. Kuchling8bc462f2004-10-20 11:48:42 +0000264 ((S_IFLNK, "l"),
265 (S_IFREG, "-"),
266 (S_IFBLK, "b"),
267 (S_IFDIR, "d"),
268 (S_IFCHR, "c"),
269 (S_IFIFO, "p")),
270
271 ((TUREAD, "r"),),
272 ((TUWRITE, "w"),),
273 ((TUEXEC|TSUID, "s"),
274 (TSUID, "S"),
275 (TUEXEC, "x")),
276
277 ((TGREAD, "r"),),
278 ((TGWRITE, "w"),),
279 ((TGEXEC|TSGID, "s"),
280 (TSGID, "S"),
281 (TGEXEC, "x")),
282
283 ((TOREAD, "r"),),
284 ((TOWRITE, "w"),),
285 ((TOEXEC|TSVTX, "t"),
286 (TSVTX, "T"),
287 (TOEXEC, "x"))
288)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000289
290def filemode(mode):
291 """Convert a file's mode to a string of the form
292 -rwxrwxrwx.
293 Used by TarFile.list()
294 """
Andrew M. Kuchling8bc462f2004-10-20 11:48:42 +0000295 perm = []
296 for table in filemode_table:
297 for bit, char in table:
298 if mode & bit == bit:
299 perm.append(char)
300 break
301 else:
302 perm.append("-")
303 return "".join(perm)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000304
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000305class TarError(Exception):
306 """Base exception."""
307 pass
308class ExtractError(TarError):
309 """General exception for extract errors."""
310 pass
311class ReadError(TarError):
312 """Exception for unreadble tar archives."""
313 pass
314class CompressionError(TarError):
315 """Exception for unavailable compression methods."""
316 pass
317class StreamError(TarError):
318 """Exception for unsupported operations on stream-like TarFiles."""
319 pass
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000320class HeaderError(TarError):
Lars Gustäbel9520a432009-11-22 18:48:49 +0000321 """Base exception for header errors."""
322 pass
323class EmptyHeaderError(HeaderError):
324 """Exception for empty headers."""
325 pass
326class TruncatedHeaderError(HeaderError):
327 """Exception for truncated headers."""
328 pass
329class EOFHeaderError(HeaderError):
330 """Exception for end of file headers."""
331 pass
332class InvalidHeaderError(HeaderError):
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000333 """Exception for invalid headers."""
334 pass
Lars Gustäbel9520a432009-11-22 18:48:49 +0000335class SubsequentHeaderError(HeaderError):
336 """Exception for missing and invalid extended headers."""
337 pass
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000338
339#---------------------------
340# internal stream interface
341#---------------------------
342class _LowLevelFile:
343 """Low-level file object. Supports reading and writing.
344 It is used instead of a regular file object for streaming
345 access.
346 """
347
348 def __init__(self, name, mode):
349 mode = {
350 "r": os.O_RDONLY,
351 "w": os.O_WRONLY | os.O_CREAT | os.O_TRUNC,
352 }[mode]
353 if hasattr(os, "O_BINARY"):
354 mode |= os.O_BINARY
Lars Gustäbeld6eb70b2010-04-29 15:37:02 +0000355 self.fd = os.open(name, mode, 0o666)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000356
357 def close(self):
358 os.close(self.fd)
359
360 def read(self, size):
361 return os.read(self.fd, size)
362
363 def write(self, s):
364 os.write(self.fd, s)
365
366class _Stream:
367 """Class that serves as an adapter between TarFile and
368 a stream-like object. The stream-like object only
369 needs to have a read() or write() method and is accessed
370 blockwise. Use of gzip or bzip2 compression is possible.
371 A stream-like object could be for example: sys.stdin,
372 sys.stdout, a socket, a tape device etc.
373
374 _Stream is intended to be used only internally.
375 """
376
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000377 def __init__(self, name, mode, comptype, fileobj, bufsize):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000378 """Construct a _Stream object.
379 """
380 self._extfileobj = True
381 if fileobj is None:
382 fileobj = _LowLevelFile(name, mode)
383 self._extfileobj = False
384
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000385 if comptype == '*':
386 # Enable transparent compression detection for the
387 # stream interface
388 fileobj = _StreamProxy(fileobj)
389 comptype = fileobj.getcomptype()
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000390
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000391 self.name = name or ""
392 self.mode = mode
393 self.comptype = comptype
394 self.fileobj = fileobj
395 self.bufsize = bufsize
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000396 self.buf = b""
Guido van Rossume2a383d2007-01-15 16:59:06 +0000397 self.pos = 0
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000398 self.closed = False
399
400 if comptype == "gz":
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000401 try:
402 import zlib
403 except ImportError:
Thomas Wouters477c8d52006-05-27 19:21:47 +0000404 raise CompressionError("zlib module is not available")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000405 self.zlib = zlib
Antoine Pitrou77b338b2009-12-14 18:00:06 +0000406 self.crc = zlib.crc32(b"")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000407 if mode == "r":
408 self._init_read_gz()
409 else:
410 self._init_write_gz()
411
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000412 if comptype == "bz2":
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000413 try:
414 import bz2
415 except ImportError:
Thomas Wouters477c8d52006-05-27 19:21:47 +0000416 raise CompressionError("bz2 module is not available")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000417 if mode == "r":
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000418 self.dbuf = b""
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000419 self.cmp = bz2.BZ2Decompressor()
420 else:
421 self.cmp = bz2.BZ2Compressor()
422
423 def __del__(self):
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000424 if hasattr(self, "closed") and not self.closed:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000425 self.close()
426
427 def _init_write_gz(self):
428 """Initialize for writing with gzip compression.
429 """
430 self.cmp = self.zlib.compressobj(9, self.zlib.DEFLATED,
431 -self.zlib.MAX_WBITS,
432 self.zlib.DEF_MEM_LEVEL,
433 0)
Guido van Rossume2a383d2007-01-15 16:59:06 +0000434 timestamp = struct.pack("<L", int(time.time()))
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000435 self.__write(b"\037\213\010\010" + timestamp + b"\002\377")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000436 if self.name.endswith(".gz"):
437 self.name = self.name[:-3]
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000438 # RFC1952 says we must use ISO-8859-1 for the FNAME field.
439 self.__write(self.name.encode("iso-8859-1", "replace") + NUL)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000440
441 def write(self, s):
442 """Write string s to the stream.
443 """
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000444 if self.comptype == "gz":
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000445 self.crc = self.zlib.crc32(s, self.crc)
446 self.pos += len(s)
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000447 if self.comptype != "tar":
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000448 s = self.cmp.compress(s)
449 self.__write(s)
450
451 def __write(self, s):
452 """Write string s to the stream if a whole new block
453 is ready to be written.
454 """
455 self.buf += s
456 while len(self.buf) > self.bufsize:
457 self.fileobj.write(self.buf[:self.bufsize])
458 self.buf = self.buf[self.bufsize:]
459
460 def close(self):
461 """Close the _Stream object. No operation should be
462 done on it afterwards.
463 """
464 if self.closed:
465 return
466
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000467 if self.mode == "w" and self.comptype != "tar":
Martin v. Löwisc234a522004-08-22 21:28:33 +0000468 self.buf += self.cmp.flush()
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000469
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000470 if self.mode == "w" and self.buf:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000471 self.fileobj.write(self.buf)
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000472 self.buf = b""
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000473 if self.comptype == "gz":
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000474 # The native zlib crc is an unsigned 32-bit integer, but
475 # the Python wrapper implicitly casts that to a signed C
476 # long. So, on a 32-bit box self.crc may "look negative",
477 # while the same crc on a 64-bit box may "look positive".
478 # To avoid irksome warnings from the `struct` module, force
479 # it to look positive on all boxes.
Guido van Rossume2a383d2007-01-15 16:59:06 +0000480 self.fileobj.write(struct.pack("<L", self.crc & 0xffffffff))
481 self.fileobj.write(struct.pack("<L", self.pos & 0xffffFFFF))
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000482
483 if not self._extfileobj:
484 self.fileobj.close()
485
486 self.closed = True
487
488 def _init_read_gz(self):
489 """Initialize for reading a gzip compressed fileobj.
490 """
491 self.cmp = self.zlib.decompressobj(-self.zlib.MAX_WBITS)
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000492 self.dbuf = b""
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000493
494 # taken from gzip.GzipFile with some alterations
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000495 if self.__read(2) != b"\037\213":
Thomas Wouters477c8d52006-05-27 19:21:47 +0000496 raise ReadError("not a gzip file")
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000497 if self.__read(1) != b"\010":
Thomas Wouters477c8d52006-05-27 19:21:47 +0000498 raise CompressionError("unsupported compression method")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000499
500 flag = ord(self.__read(1))
501 self.__read(6)
502
503 if flag & 4:
504 xlen = ord(self.__read(1)) + 256 * ord(self.__read(1))
505 self.read(xlen)
506 if flag & 8:
507 while True:
508 s = self.__read(1)
509 if not s or s == NUL:
510 break
511 if flag & 16:
512 while True:
513 s = self.__read(1)
514 if not s or s == NUL:
515 break
516 if flag & 2:
517 self.__read(2)
518
519 def tell(self):
520 """Return the stream's file pointer position.
521 """
522 return self.pos
523
524 def seek(self, pos=0):
525 """Set the stream's file pointer to pos. Negative seeking
526 is forbidden.
527 """
528 if pos - self.pos >= 0:
529 blocks, remainder = divmod(pos - self.pos, self.bufsize)
Guido van Rossum805365e2007-05-07 22:24:25 +0000530 for i in range(blocks):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000531 self.read(self.bufsize)
532 self.read(remainder)
533 else:
Thomas Wouters477c8d52006-05-27 19:21:47 +0000534 raise StreamError("seeking backwards is not allowed")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000535 return self.pos
536
537 def read(self, size=None):
538 """Return the next size number of bytes from the stream.
539 If size is not defined, return all bytes of the stream
540 up to EOF.
541 """
542 if size is None:
543 t = []
544 while True:
545 buf = self._read(self.bufsize)
546 if not buf:
547 break
548 t.append(buf)
549 buf = "".join(t)
550 else:
551 buf = self._read(size)
552 self.pos += len(buf)
553 return buf
554
555 def _read(self, size):
556 """Return size bytes from the stream.
557 """
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000558 if self.comptype == "tar":
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000559 return self.__read(size)
560
561 c = len(self.dbuf)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000562 while c < size:
563 buf = self.__read(self.bufsize)
564 if not buf:
565 break
Guido van Rossumd8faa362007-04-27 19:54:29 +0000566 try:
567 buf = self.cmp.decompress(buf)
568 except IOError:
569 raise ReadError("invalid compressed data")
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000570 self.dbuf += buf
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000571 c += len(buf)
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000572 buf = self.dbuf[:size]
573 self.dbuf = self.dbuf[size:]
574 return buf
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000575
576 def __read(self, size):
577 """Return size bytes from stream. If internal buffer is empty,
578 read another block from the stream.
579 """
580 c = len(self.buf)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000581 while c < size:
582 buf = self.fileobj.read(self.bufsize)
583 if not buf:
584 break
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000585 self.buf += buf
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000586 c += len(buf)
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000587 buf = self.buf[:size]
588 self.buf = self.buf[size:]
589 return buf
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000590# class _Stream
591
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000592class _StreamProxy(object):
593 """Small proxy class that enables transparent compression
594 detection for the Stream interface (mode 'r|*').
595 """
596
597 def __init__(self, fileobj):
598 self.fileobj = fileobj
599 self.buf = self.fileobj.read(BLOCKSIZE)
600
601 def read(self, size):
602 self.read = self.fileobj.read
603 return self.buf
604
605 def getcomptype(self):
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000606 if self.buf.startswith(b"\037\213\010"):
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000607 return "gz"
Lars Gustäbela280ca752007-08-28 07:34:33 +0000608 if self.buf.startswith(b"BZh91"):
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000609 return "bz2"
610 return "tar"
611
612 def close(self):
613 self.fileobj.close()
614# class StreamProxy
615
Thomas Wouters477c8d52006-05-27 19:21:47 +0000616class _BZ2Proxy(object):
617 """Small proxy class that enables external file object
618 support for "r:bz2" and "w:bz2" modes. This is actually
619 a workaround for a limitation in bz2 module's BZ2File
620 class which (unlike gzip.GzipFile) has no support for
621 a file object argument.
622 """
623
624 blocksize = 16 * 1024
625
626 def __init__(self, fileobj, mode):
627 self.fileobj = fileobj
628 self.mode = mode
Guido van Rossumd8faa362007-04-27 19:54:29 +0000629 self.name = getattr(self.fileobj, "name", None)
Thomas Wouters477c8d52006-05-27 19:21:47 +0000630 self.init()
631
632 def init(self):
633 import bz2
634 self.pos = 0
635 if self.mode == "r":
636 self.bz2obj = bz2.BZ2Decompressor()
637 self.fileobj.seek(0)
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000638 self.buf = b""
Thomas Wouters477c8d52006-05-27 19:21:47 +0000639 else:
640 self.bz2obj = bz2.BZ2Compressor()
641
642 def read(self, size):
Thomas Wouters477c8d52006-05-27 19:21:47 +0000643 x = len(self.buf)
644 while x < size:
Lars Gustäbel42e00912009-03-22 20:34:29 +0000645 raw = self.fileobj.read(self.blocksize)
646 if not raw:
Thomas Wouters477c8d52006-05-27 19:21:47 +0000647 break
Lars Gustäbel42e00912009-03-22 20:34:29 +0000648 data = self.bz2obj.decompress(raw)
649 self.buf += data
Thomas Wouters477c8d52006-05-27 19:21:47 +0000650 x += len(data)
Thomas Wouters477c8d52006-05-27 19:21:47 +0000651
652 buf = self.buf[:size]
653 self.buf = self.buf[size:]
654 self.pos += len(buf)
655 return buf
656
657 def seek(self, pos):
658 if pos < self.pos:
659 self.init()
660 self.read(pos - self.pos)
661
662 def tell(self):
663 return self.pos
664
665 def write(self, data):
666 self.pos += len(data)
667 raw = self.bz2obj.compress(data)
668 self.fileobj.write(raw)
669
670 def close(self):
671 if self.mode == "w":
672 raw = self.bz2obj.flush()
673 self.fileobj.write(raw)
Thomas Wouters477c8d52006-05-27 19:21:47 +0000674# class _BZ2Proxy
675
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000676#------------------------
677# Extraction file object
678#------------------------
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000679class _FileInFile(object):
680 """A thin wrapper around an existing file object that
681 provides a part of its data as an individual file
682 object.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000683 """
684
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000685 def __init__(self, fileobj, offset, size, sparse=None):
686 self.fileobj = fileobj
687 self.offset = offset
688 self.size = size
689 self.sparse = sparse
690 self.position = 0
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000691
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000692 def seekable(self):
693 if not hasattr(self.fileobj, "seekable"):
694 # XXX gzip.GzipFile and bz2.BZ2File
695 return True
696 return self.fileobj.seekable()
697
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000698 def tell(self):
699 """Return the current file position.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000700 """
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000701 return self.position
702
703 def seek(self, position):
704 """Seek to a position in the file.
705 """
706 self.position = position
707
708 def read(self, size=None):
709 """Read data from the file.
710 """
711 if size is None:
712 size = self.size - self.position
713 else:
714 size = min(size, self.size - self.position)
715
716 if self.sparse is None:
717 return self.readnormal(size)
718 else:
719 return self.readsparse(size)
720
721 def readnormal(self, size):
722 """Read operation for regular files.
723 """
724 self.fileobj.seek(self.offset + self.position)
725 self.position += size
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000726 return self.fileobj.read(size)
727
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000728 def readsparse(self, size):
729 """Read operation for sparse files.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000730 """
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000731 data = b""
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000732 while size > 0:
733 buf = self.readsparsesection(size)
734 if not buf:
735 break
736 size -= len(buf)
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000737 data += buf
738 return data
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000739
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000740 def readsparsesection(self, size):
741 """Read a single section of a sparse file.
742 """
743 section = self.sparse.find(self.position)
744
745 if section is None:
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000746 return b""
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000747
748 size = min(size, section.offset + section.size - self.position)
749
750 if isinstance(section, _data):
751 realpos = section.realpos + self.position - section.offset
752 self.fileobj.seek(self.offset + realpos)
753 self.position += size
754 return self.fileobj.read(size)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000755 else:
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000756 self.position += size
757 return NUL * size
758#class _FileInFile
759
760
761class ExFileObject(object):
762 """File-like object for reading an archive member.
763 Is returned by TarFile.extractfile().
764 """
765 blocksize = 1024
766
767 def __init__(self, tarfile, tarinfo):
768 self.fileobj = _FileInFile(tarfile.fileobj,
769 tarinfo.offset_data,
770 tarinfo.size,
Lars Gustäbelc2ea8c62008-04-14 10:05:48 +0000771 tarinfo.sparse)
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000772 self.name = tarinfo.name
773 self.mode = "r"
774 self.closed = False
775 self.size = tarinfo.size
776
777 self.position = 0
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000778 self.buffer = b""
779
780 def readable(self):
781 return True
782
783 def writable(self):
784 return False
785
786 def seekable(self):
787 return self.fileobj.seekable()
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000788
789 def read(self, size=None):
790 """Read at most size bytes from the file. If size is not
791 present or None, read all data until EOF is reached.
792 """
793 if self.closed:
794 raise ValueError("I/O operation on closed file")
795
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000796 buf = b""
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000797 if self.buffer:
798 if size is None:
799 buf = self.buffer
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000800 self.buffer = b""
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000801 else:
802 buf = self.buffer[:size]
803 self.buffer = self.buffer[size:]
804
805 if size is None:
806 buf += self.fileobj.read()
807 else:
808 buf += self.fileobj.read(size - len(buf))
809
810 self.position += len(buf)
811 return buf
812
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000813 # XXX TextIOWrapper uses the read1() method.
814 read1 = read
815
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000816 def readline(self, size=-1):
817 """Read one entire line from the file. If size is present
818 and non-negative, return a string with at most that
819 size, which may be an incomplete line.
820 """
821 if self.closed:
822 raise ValueError("I/O operation on closed file")
823
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000824 pos = self.buffer.find(b"\n") + 1
825 if pos == 0:
826 # no newline found.
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000827 while True:
828 buf = self.fileobj.read(self.blocksize)
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000829 self.buffer += buf
830 if not buf or b"\n" in buf:
831 pos = self.buffer.find(b"\n") + 1
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000832 if pos == 0:
833 # no newline found.
834 pos = len(self.buffer)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000835 break
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000836
837 if size != -1:
838 pos = min(size, pos)
839
840 buf = self.buffer[:pos]
841 self.buffer = self.buffer[pos:]
842 self.position += len(buf)
843 return buf
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000844
845 def readlines(self):
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000846 """Return a list with all remaining lines.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000847 """
848 result = []
849 while True:
850 line = self.readline()
851 if not line: break
852 result.append(line)
853 return result
854
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000855 def tell(self):
856 """Return the current file position.
857 """
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000858 if self.closed:
859 raise ValueError("I/O operation on closed file")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000860
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000861 return self.position
862
863 def seek(self, pos, whence=os.SEEK_SET):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000864 """Seek to a position in the file.
865 """
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000866 if self.closed:
867 raise ValueError("I/O operation on closed file")
868
869 if whence == os.SEEK_SET:
870 self.position = min(max(pos, 0), self.size)
871 elif whence == os.SEEK_CUR:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000872 if pos < 0:
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000873 self.position = max(self.position + pos, 0)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000874 else:
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000875 self.position = min(self.position + pos, self.size)
876 elif whence == os.SEEK_END:
877 self.position = max(min(self.size + pos, self.size), 0)
878 else:
879 raise ValueError("Invalid argument")
880
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000881 self.buffer = b""
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000882 self.fileobj.seek(self.position)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000883
884 def close(self):
885 """Close the file object.
886 """
887 self.closed = True
Martin v. Löwisdf241532005-03-03 08:17:42 +0000888
889 def __iter__(self):
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000890 """Get an iterator over the file's lines.
Martin v. Löwisdf241532005-03-03 08:17:42 +0000891 """
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000892 while True:
893 line = self.readline()
894 if not line:
895 break
896 yield line
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000897#class ExFileObject
898
899#------------------
900# Exported Classes
901#------------------
902class TarInfo(object):
903 """Informational class which holds the details about an
904 archive member given by a tar header block.
905 TarInfo objects are returned by TarFile.getmember(),
906 TarFile.getmembers() and TarFile.gettarinfo() and are
907 usually created internally.
908 """
909
Lars Gustäbelc2ea8c62008-04-14 10:05:48 +0000910 __slots__ = ("name", "mode", "uid", "gid", "size", "mtime",
911 "chksum", "type", "linkname", "uname", "gname",
912 "devmajor", "devminor",
913 "offset", "offset_data", "pax_headers", "sparse",
914 "tarfile", "_sparse_structs", "_link_target")
915
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000916 def __init__(self, name=""):
917 """Construct a TarInfo object. name is the optional name
918 of the member.
919 """
Guido van Rossumd8faa362007-04-27 19:54:29 +0000920 self.name = name # member name
Guido van Rossumcd16bf62007-06-13 18:07:49 +0000921 self.mode = 0o644 # file permissions
Thomas Wouters477c8d52006-05-27 19:21:47 +0000922 self.uid = 0 # user id
923 self.gid = 0 # group id
924 self.size = 0 # file size
925 self.mtime = 0 # modification time
926 self.chksum = 0 # header checksum
927 self.type = REGTYPE # member type
928 self.linkname = "" # link name
Guido van Rossumd8faa362007-04-27 19:54:29 +0000929 self.uname = "root" # user name
930 self.gname = "root" # group name
Thomas Wouters477c8d52006-05-27 19:21:47 +0000931 self.devmajor = 0 # device major number
932 self.devminor = 0 # device minor number
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000933
Thomas Wouters477c8d52006-05-27 19:21:47 +0000934 self.offset = 0 # the tar header starts here
935 self.offset_data = 0 # the file's data starts here
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000936
Lars Gustäbelc2ea8c62008-04-14 10:05:48 +0000937 self.sparse = None # sparse member information
Guido van Rossumd8faa362007-04-27 19:54:29 +0000938 self.pax_headers = {} # pax header information
939
940 # In pax headers the "name" and "linkname" field are called
941 # "path" and "linkpath".
942 def _getpath(self):
943 return self.name
944 def _setpath(self, name):
945 self.name = name
946 path = property(_getpath, _setpath)
947
948 def _getlinkpath(self):
949 return self.linkname
950 def _setlinkpath(self, linkname):
951 self.linkname = linkname
952 linkpath = property(_getlinkpath, _setlinkpath)
953
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000954 def __repr__(self):
955 return "<%s %r at %#x>" % (self.__class__.__name__,self.name,id(self))
956
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000957 def get_info(self):
Guido van Rossumd8faa362007-04-27 19:54:29 +0000958 """Return the TarInfo's attributes as a dictionary.
959 """
960 info = {
Lars Gustäbelbfdfdda2009-08-28 19:59:59 +0000961 "name": self.name,
Guido van Rossumcd16bf62007-06-13 18:07:49 +0000962 "mode": self.mode & 0o7777,
Guido van Rossumd8faa362007-04-27 19:54:29 +0000963 "uid": self.uid,
964 "gid": self.gid,
965 "size": self.size,
966 "mtime": self.mtime,
967 "chksum": self.chksum,
968 "type": self.type,
Lars Gustäbelbfdfdda2009-08-28 19:59:59 +0000969 "linkname": self.linkname,
Guido van Rossumd8faa362007-04-27 19:54:29 +0000970 "uname": self.uname,
971 "gname": self.gname,
972 "devmajor": self.devmajor,
973 "devminor": self.devminor
974 }
975
976 if info["type"] == DIRTYPE and not info["name"].endswith("/"):
977 info["name"] += "/"
978
979 return info
980
Victor Stinnerde629d42010-05-05 21:43:57 +0000981 def tobuf(self, format=DEFAULT_FORMAT, encoding=ENCODING, errors="surrogateescape"):
Guido van Rossumd8faa362007-04-27 19:54:29 +0000982 """Return a tar header as a string of 512 byte blocks.
983 """
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000984 info = self.get_info()
Guido van Rossume7ba4952007-06-06 23:52:48 +0000985
Guido van Rossumd8faa362007-04-27 19:54:29 +0000986 if format == USTAR_FORMAT:
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000987 return self.create_ustar_header(info, encoding, errors)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000988 elif format == GNU_FORMAT:
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000989 return self.create_gnu_header(info, encoding, errors)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000990 elif format == PAX_FORMAT:
Lars Gustäbel3741eff2007-08-21 12:17:05 +0000991 return self.create_pax_header(info)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000992 else:
993 raise ValueError("invalid format")
994
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000995 def create_ustar_header(self, info, encoding, errors):
Guido van Rossumd8faa362007-04-27 19:54:29 +0000996 """Return the object as a ustar header block.
997 """
Guido van Rossumd8faa362007-04-27 19:54:29 +0000998 info["magic"] = POSIX_MAGIC
999
1000 if len(info["linkname"]) > LENGTH_LINK:
1001 raise ValueError("linkname is too long")
1002
1003 if len(info["name"]) > LENGTH_NAME:
1004 info["prefix"], info["name"] = self._posix_split_name(info["name"])
1005
Lars Gustäbelb506dc32007-08-07 18:36:16 +00001006 return self._create_header(info, USTAR_FORMAT, encoding, errors)
Guido van Rossumd8faa362007-04-27 19:54:29 +00001007
Lars Gustäbelb506dc32007-08-07 18:36:16 +00001008 def create_gnu_header(self, info, encoding, errors):
Guido van Rossumd8faa362007-04-27 19:54:29 +00001009 """Return the object as a GNU header block sequence.
1010 """
Guido van Rossumd8faa362007-04-27 19:54:29 +00001011 info["magic"] = GNU_MAGIC
1012
Lars Gustäbelb506dc32007-08-07 18:36:16 +00001013 buf = b""
Guido van Rossumd8faa362007-04-27 19:54:29 +00001014 if len(info["linkname"]) > LENGTH_LINK:
Lars Gustäbelb506dc32007-08-07 18:36:16 +00001015 buf += self._create_gnu_long_header(info["linkname"], GNUTYPE_LONGLINK, encoding, errors)
Guido van Rossumd8faa362007-04-27 19:54:29 +00001016
1017 if len(info["name"]) > LENGTH_NAME:
Lars Gustäbelb506dc32007-08-07 18:36:16 +00001018 buf += self._create_gnu_long_header(info["name"], GNUTYPE_LONGNAME, encoding, errors)
Guido van Rossumd8faa362007-04-27 19:54:29 +00001019
Lars Gustäbelb506dc32007-08-07 18:36:16 +00001020 return buf + self._create_header(info, GNU_FORMAT, encoding, errors)
Guido van Rossumd8faa362007-04-27 19:54:29 +00001021
Lars Gustäbel3741eff2007-08-21 12:17:05 +00001022 def create_pax_header(self, info):
Guido van Rossumd8faa362007-04-27 19:54:29 +00001023 """Return the object as a ustar header block. If it cannot be
1024 represented this way, prepend a pax extended header sequence
1025 with supplement information.
1026 """
Guido van Rossumd8faa362007-04-27 19:54:29 +00001027 info["magic"] = POSIX_MAGIC
1028 pax_headers = self.pax_headers.copy()
1029
1030 # Test string fields for values that exceed the field length or cannot
1031 # be represented in ASCII encoding.
1032 for name, hname, length in (
1033 ("name", "path", LENGTH_NAME), ("linkname", "linkpath", LENGTH_LINK),
1034 ("uname", "uname", 32), ("gname", "gname", 32)):
1035
Guido van Rossume7ba4952007-06-06 23:52:48 +00001036 if hname in pax_headers:
1037 # The pax header has priority.
1038 continue
1039
Guido van Rossumd8faa362007-04-27 19:54:29 +00001040 # Try to encode the string as ASCII.
1041 try:
Lars Gustäbelb506dc32007-08-07 18:36:16 +00001042 info[name].encode("ascii", "strict")
Guido van Rossumd8faa362007-04-27 19:54:29 +00001043 except UnicodeEncodeError:
Lars Gustäbelb506dc32007-08-07 18:36:16 +00001044 pax_headers[hname] = info[name]
Guido van Rossumd8faa362007-04-27 19:54:29 +00001045 continue
1046
Guido van Rossume7ba4952007-06-06 23:52:48 +00001047 if len(info[name]) > length:
Lars Gustäbelb506dc32007-08-07 18:36:16 +00001048 pax_headers[hname] = info[name]
Guido van Rossumd8faa362007-04-27 19:54:29 +00001049
1050 # Test number fields for values that exceed the field limit or values
1051 # that like to be stored as float.
1052 for name, digits in (("uid", 8), ("gid", 8), ("size", 12), ("mtime", 12)):
Guido van Rossume7ba4952007-06-06 23:52:48 +00001053 if name in pax_headers:
1054 # The pax header has priority. Avoid overflow.
1055 info[name] = 0
1056 continue
1057
Guido van Rossumd8faa362007-04-27 19:54:29 +00001058 val = info[name]
1059 if not 0 <= val < 8 ** (digits - 1) or isinstance(val, float):
Guido van Rossumef87d6e2007-05-02 19:09:54 +00001060 pax_headers[name] = str(val)
Guido van Rossumd8faa362007-04-27 19:54:29 +00001061 info[name] = 0
1062
Guido van Rossume7ba4952007-06-06 23:52:48 +00001063 # Create a pax extended header if necessary.
Guido van Rossumd8faa362007-04-27 19:54:29 +00001064 if pax_headers:
Lars Gustäbel3741eff2007-08-21 12:17:05 +00001065 buf = self._create_pax_generic_header(pax_headers, XHDTYPE)
Guido van Rossumd8faa362007-04-27 19:54:29 +00001066 else:
Lars Gustäbelb506dc32007-08-07 18:36:16 +00001067 buf = b""
Guido van Rossumd8faa362007-04-27 19:54:29 +00001068
Lars Gustäbel3741eff2007-08-21 12:17:05 +00001069 return buf + self._create_header(info, USTAR_FORMAT, "ascii", "replace")
Guido van Rossumd8faa362007-04-27 19:54:29 +00001070
1071 @classmethod
Lars Gustäbel3741eff2007-08-21 12:17:05 +00001072 def create_pax_global_header(cls, pax_headers):
Guido van Rossumd8faa362007-04-27 19:54:29 +00001073 """Return the object as a pax global header block sequence.
1074 """
Lars Gustäbel3741eff2007-08-21 12:17:05 +00001075 return cls._create_pax_generic_header(pax_headers, XGLTYPE)
Guido van Rossumd8faa362007-04-27 19:54:29 +00001076
1077 def _posix_split_name(self, name):
1078 """Split a name longer than 100 chars into a prefix
1079 and a name part.
1080 """
1081 prefix = name[:LENGTH_PREFIX + 1]
1082 while prefix and prefix[-1] != "/":
1083 prefix = prefix[:-1]
1084
1085 name = name[len(prefix):]
1086 prefix = prefix[:-1]
1087
1088 if not prefix or len(name) > LENGTH_NAME:
1089 raise ValueError("name is too long")
1090 return prefix, name
1091
1092 @staticmethod
Lars Gustäbelb506dc32007-08-07 18:36:16 +00001093 def _create_header(info, format, encoding, errors):
Guido van Rossumd8faa362007-04-27 19:54:29 +00001094 """Return a header block. info is a dictionary with file
1095 information, format must be one of the *_FORMAT constants.
1096 """
1097 parts = [
Lars Gustäbelb506dc32007-08-07 18:36:16 +00001098 stn(info.get("name", ""), 100, encoding, errors),
Guido van Rossumcd16bf62007-06-13 18:07:49 +00001099 itn(info.get("mode", 0) & 0o7777, 8, format),
Guido van Rossumd8faa362007-04-27 19:54:29 +00001100 itn(info.get("uid", 0), 8, format),
1101 itn(info.get("gid", 0), 8, format),
1102 itn(info.get("size", 0), 12, format),
1103 itn(info.get("mtime", 0), 12, format),
Lars Gustäbelb506dc32007-08-07 18:36:16 +00001104 b" ", # checksum field
Guido van Rossumd8faa362007-04-27 19:54:29 +00001105 info.get("type", REGTYPE),
Lars Gustäbelb506dc32007-08-07 18:36:16 +00001106 stn(info.get("linkname", ""), 100, encoding, errors),
1107 info.get("magic", POSIX_MAGIC),
1108 stn(info.get("uname", "root"), 32, encoding, errors),
1109 stn(info.get("gname", "root"), 32, encoding, errors),
Guido van Rossumd8faa362007-04-27 19:54:29 +00001110 itn(info.get("devmajor", 0), 8, format),
1111 itn(info.get("devminor", 0), 8, format),
Lars Gustäbelb506dc32007-08-07 18:36:16 +00001112 stn(info.get("prefix", ""), 155, encoding, errors)
Guido van Rossumd8faa362007-04-27 19:54:29 +00001113 ]
1114
Lars Gustäbelb506dc32007-08-07 18:36:16 +00001115 buf = struct.pack("%ds" % BLOCKSIZE, b"".join(parts))
Guido van Rossumd8faa362007-04-27 19:54:29 +00001116 chksum = calc_chksums(buf[-BLOCKSIZE:])[0]
Lars Gustäbela280ca752007-08-28 07:34:33 +00001117 buf = buf[:-364] + bytes("%06o\0" % chksum, "ascii") + buf[-357:]
Guido van Rossumd8faa362007-04-27 19:54:29 +00001118 return buf
1119
1120 @staticmethod
1121 def _create_payload(payload):
1122 """Return the string payload filled with zero bytes
1123 up to the next 512 byte border.
1124 """
1125 blocks, remainder = divmod(len(payload), BLOCKSIZE)
1126 if remainder > 0:
1127 payload += (BLOCKSIZE - remainder) * NUL
1128 return payload
1129
1130 @classmethod
Lars Gustäbelb506dc32007-08-07 18:36:16 +00001131 def _create_gnu_long_header(cls, name, type, encoding, errors):
Guido van Rossumd8faa362007-04-27 19:54:29 +00001132 """Return a GNUTYPE_LONGNAME or GNUTYPE_LONGLINK sequence
1133 for name.
1134 """
Lars Gustäbelb506dc32007-08-07 18:36:16 +00001135 name = name.encode(encoding, errors) + NUL
Guido van Rossumd8faa362007-04-27 19:54:29 +00001136
1137 info = {}
1138 info["name"] = "././@LongLink"
1139 info["type"] = type
1140 info["size"] = len(name)
1141 info["magic"] = GNU_MAGIC
1142
1143 # create extended header + name blocks.
Lars Gustäbelb506dc32007-08-07 18:36:16 +00001144 return cls._create_header(info, USTAR_FORMAT, encoding, errors) + \
Guido van Rossumd8faa362007-04-27 19:54:29 +00001145 cls._create_payload(name)
1146
1147 @classmethod
Lars Gustäbel3741eff2007-08-21 12:17:05 +00001148 def _create_pax_generic_header(cls, pax_headers, type):
Guido van Rossumd8faa362007-04-27 19:54:29 +00001149 """Return a POSIX.1-2001 extended or global header sequence
1150 that contains a list of keyword, value pairs. The values
Lars Gustäbelb506dc32007-08-07 18:36:16 +00001151 must be strings.
Guido van Rossumd8faa362007-04-27 19:54:29 +00001152 """
Lars Gustäbelb506dc32007-08-07 18:36:16 +00001153 records = b""
Guido van Rossumd8faa362007-04-27 19:54:29 +00001154 for keyword, value in pax_headers.items():
1155 keyword = keyword.encode("utf8")
1156 value = value.encode("utf8")
1157 l = len(keyword) + len(value) + 3 # ' ' + '=' + '\n'
1158 n = p = 0
1159 while True:
1160 n = l + len(str(p))
1161 if n == p:
1162 break
1163 p = n
Lars Gustäbela280ca752007-08-28 07:34:33 +00001164 records += bytes(str(p), "ascii") + b" " + keyword + b"=" + value + b"\n"
Guido van Rossumd8faa362007-04-27 19:54:29 +00001165
1166 # We use a hardcoded "././@PaxHeader" name like star does
1167 # instead of the one that POSIX recommends.
1168 info = {}
1169 info["name"] = "././@PaxHeader"
1170 info["type"] = type
1171 info["size"] = len(records)
1172 info["magic"] = POSIX_MAGIC
1173
1174 # Create pax header + record blocks.
Lars Gustäbel3741eff2007-08-21 12:17:05 +00001175 return cls._create_header(info, USTAR_FORMAT, "ascii", "replace") + \
Guido van Rossumd8faa362007-04-27 19:54:29 +00001176 cls._create_payload(records)
1177
Guido van Rossum75b64e62005-01-16 00:16:11 +00001178 @classmethod
Lars Gustäbelb506dc32007-08-07 18:36:16 +00001179 def frombuf(cls, buf, encoding, errors):
1180 """Construct a TarInfo object from a 512 byte bytes object.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001181 """
Lars Gustäbel9520a432009-11-22 18:48:49 +00001182 if len(buf) == 0:
1183 raise EmptyHeaderError("empty header")
Thomas Wouters477c8d52006-05-27 19:21:47 +00001184 if len(buf) != BLOCKSIZE:
Lars Gustäbel9520a432009-11-22 18:48:49 +00001185 raise TruncatedHeaderError("truncated header")
Thomas Wouters477c8d52006-05-27 19:21:47 +00001186 if buf.count(NUL) == BLOCKSIZE:
Lars Gustäbel9520a432009-11-22 18:48:49 +00001187 raise EOFHeaderError("end of file header")
Thomas Wouters902d6eb2007-01-09 23:18:33 +00001188
1189 chksum = nti(buf[148:156])
1190 if chksum not in calc_chksums(buf):
Lars Gustäbel9520a432009-11-22 18:48:49 +00001191 raise InvalidHeaderError("bad checksum")
Thomas Wouters477c8d52006-05-27 19:21:47 +00001192
Guido van Rossumd8faa362007-04-27 19:54:29 +00001193 obj = cls()
Lars Gustäbelb506dc32007-08-07 18:36:16 +00001194 obj.name = nts(buf[0:100], encoding, errors)
Guido van Rossumd8faa362007-04-27 19:54:29 +00001195 obj.mode = nti(buf[100:108])
1196 obj.uid = nti(buf[108:116])
1197 obj.gid = nti(buf[116:124])
1198 obj.size = nti(buf[124:136])
1199 obj.mtime = nti(buf[136:148])
1200 obj.chksum = chksum
1201 obj.type = buf[156:157]
Lars Gustäbelb506dc32007-08-07 18:36:16 +00001202 obj.linkname = nts(buf[157:257], encoding, errors)
1203 obj.uname = nts(buf[265:297], encoding, errors)
1204 obj.gname = nts(buf[297:329], encoding, errors)
Guido van Rossumd8faa362007-04-27 19:54:29 +00001205 obj.devmajor = nti(buf[329:337])
1206 obj.devminor = nti(buf[337:345])
Lars Gustäbelb506dc32007-08-07 18:36:16 +00001207 prefix = nts(buf[345:500], encoding, errors)
Thomas Wouters89f507f2006-12-13 04:49:30 +00001208
Guido van Rossumd8faa362007-04-27 19:54:29 +00001209 # Old V7 tar format represents a directory as a regular
1210 # file with a trailing slash.
1211 if obj.type == AREGTYPE and obj.name.endswith("/"):
1212 obj.type = DIRTYPE
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001213
Lars Gustäbelc2ea8c62008-04-14 10:05:48 +00001214 # The old GNU sparse format occupies some of the unused
1215 # space in the buffer for up to 4 sparse structures.
1216 # Save the them for later processing in _proc_sparse().
1217 if obj.type == GNUTYPE_SPARSE:
1218 pos = 386
1219 structs = []
1220 for i in range(4):
1221 try:
1222 offset = nti(buf[pos:pos + 12])
1223 numbytes = nti(buf[pos + 12:pos + 24])
1224 except ValueError:
1225 break
1226 structs.append((offset, numbytes))
1227 pos += 24
1228 isextended = bool(buf[482])
1229 origsize = nti(buf[483:495])
1230 obj._sparse_structs = (structs, isextended, origsize)
1231
Guido van Rossumd8faa362007-04-27 19:54:29 +00001232 # Remove redundant slashes from directories.
1233 if obj.isdir():
1234 obj.name = obj.name.rstrip("/")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001235
Guido van Rossumd8faa362007-04-27 19:54:29 +00001236 # Reconstruct a ustar longname.
1237 if prefix and obj.type not in GNU_TYPES:
1238 obj.name = prefix + "/" + obj.name
1239 return obj
1240
1241 @classmethod
1242 def fromtarfile(cls, tarfile):
1243 """Return the next TarInfo object from TarFile object
1244 tarfile.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001245 """
Guido van Rossumd8faa362007-04-27 19:54:29 +00001246 buf = tarfile.fileobj.read(BLOCKSIZE)
Lars Gustäbelb506dc32007-08-07 18:36:16 +00001247 obj = cls.frombuf(buf, tarfile.encoding, tarfile.errors)
Guido van Rossumd8faa362007-04-27 19:54:29 +00001248 obj.offset = tarfile.fileobj.tell() - BLOCKSIZE
1249 return obj._proc_member(tarfile)
Thomas Wouters89f507f2006-12-13 04:49:30 +00001250
Guido van Rossumd8faa362007-04-27 19:54:29 +00001251 #--------------------------------------------------------------------------
1252 # The following are methods that are called depending on the type of a
1253 # member. The entry point is _proc_member() which can be overridden in a
1254 # subclass to add custom _proc_*() methods. A _proc_*() method MUST
1255 # implement the following
1256 # operations:
1257 # 1. Set self.offset_data to the position where the data blocks begin,
1258 # if there is data that follows.
1259 # 2. Set tarfile.offset to the position where the next member's header will
1260 # begin.
1261 # 3. Return self or another valid TarInfo object.
1262 def _proc_member(self, tarfile):
1263 """Choose the right processing method depending on
1264 the type and call it.
Thomas Wouters89f507f2006-12-13 04:49:30 +00001265 """
Guido van Rossumd8faa362007-04-27 19:54:29 +00001266 if self.type in (GNUTYPE_LONGNAME, GNUTYPE_LONGLINK):
1267 return self._proc_gnulong(tarfile)
1268 elif self.type == GNUTYPE_SPARSE:
1269 return self._proc_sparse(tarfile)
1270 elif self.type in (XHDTYPE, XGLTYPE, SOLARIS_XHDTYPE):
1271 return self._proc_pax(tarfile)
1272 else:
1273 return self._proc_builtin(tarfile)
Thomas Wouters89f507f2006-12-13 04:49:30 +00001274
Guido van Rossumd8faa362007-04-27 19:54:29 +00001275 def _proc_builtin(self, tarfile):
1276 """Process a builtin type or an unknown type which
1277 will be treated as a regular file.
1278 """
1279 self.offset_data = tarfile.fileobj.tell()
1280 offset = self.offset_data
1281 if self.isreg() or self.type not in SUPPORTED_TYPES:
1282 # Skip the following data blocks.
1283 offset += self._block(self.size)
1284 tarfile.offset = offset
Thomas Wouters89f507f2006-12-13 04:49:30 +00001285
Guido van Rossume7ba4952007-06-06 23:52:48 +00001286 # Patch the TarInfo object with saved global
Guido van Rossumd8faa362007-04-27 19:54:29 +00001287 # header information.
Guido van Rossume7ba4952007-06-06 23:52:48 +00001288 self._apply_pax_info(tarfile.pax_headers, tarfile.encoding, tarfile.errors)
Guido van Rossumd8faa362007-04-27 19:54:29 +00001289
1290 return self
1291
1292 def _proc_gnulong(self, tarfile):
1293 """Process the blocks that hold a GNU longname
1294 or longlink member.
1295 """
1296 buf = tarfile.fileobj.read(self._block(self.size))
1297
1298 # Fetch the next header and process it.
Lars Gustäbel9520a432009-11-22 18:48:49 +00001299 try:
1300 next = self.fromtarfile(tarfile)
1301 except HeaderError:
1302 raise SubsequentHeaderError("missing or bad subsequent header")
Guido van Rossumd8faa362007-04-27 19:54:29 +00001303
1304 # Patch the TarInfo object from the next header with
1305 # the longname information.
1306 next.offset = self.offset
1307 if self.type == GNUTYPE_LONGNAME:
Lars Gustäbelb506dc32007-08-07 18:36:16 +00001308 next.name = nts(buf, tarfile.encoding, tarfile.errors)
Guido van Rossumd8faa362007-04-27 19:54:29 +00001309 elif self.type == GNUTYPE_LONGLINK:
Lars Gustäbelb506dc32007-08-07 18:36:16 +00001310 next.linkname = nts(buf, tarfile.encoding, tarfile.errors)
Guido van Rossumd8faa362007-04-27 19:54:29 +00001311
1312 return next
1313
1314 def _proc_sparse(self, tarfile):
1315 """Process a GNU sparse header plus extra headers.
1316 """
Lars Gustäbelc2ea8c62008-04-14 10:05:48 +00001317 # We already collected some sparse structures in frombuf().
1318 structs, isextended, origsize = self._sparse_structs
1319 del self._sparse_structs
Guido van Rossumd8faa362007-04-27 19:54:29 +00001320
Lars Gustäbelc2ea8c62008-04-14 10:05:48 +00001321 # Collect sparse structures from extended header blocks.
Lars Gustäbelb506dc32007-08-07 18:36:16 +00001322 while isextended:
Guido van Rossumd8faa362007-04-27 19:54:29 +00001323 buf = tarfile.fileobj.read(BLOCKSIZE)
1324 pos = 0
Guido van Rossum805365e2007-05-07 22:24:25 +00001325 for i in range(21):
Guido van Rossumd8faa362007-04-27 19:54:29 +00001326 try:
1327 offset = nti(buf[pos:pos + 12])
1328 numbytes = nti(buf[pos + 12:pos + 24])
1329 except ValueError:
1330 break
Lars Gustäbelc2ea8c62008-04-14 10:05:48 +00001331 structs.append((offset, numbytes))
Guido van Rossumd8faa362007-04-27 19:54:29 +00001332 pos += 24
Lars Gustäbelb506dc32007-08-07 18:36:16 +00001333 isextended = bool(buf[504])
Guido van Rossumd8faa362007-04-27 19:54:29 +00001334
Lars Gustäbelc2ea8c62008-04-14 10:05:48 +00001335 # Transform the sparse structures to something we can use
1336 # in ExFileObject.
1337 self.sparse = _ringbuffer()
1338 lastpos = 0
1339 realpos = 0
1340 for offset, numbytes in structs:
1341 if offset > lastpos:
1342 self.sparse.append(_hole(lastpos, offset - lastpos))
1343 self.sparse.append(_data(offset, numbytes, realpos))
1344 realpos += numbytes
1345 lastpos = offset + numbytes
Guido van Rossumd8faa362007-04-27 19:54:29 +00001346 if lastpos < origsize:
Lars Gustäbelc2ea8c62008-04-14 10:05:48 +00001347 self.sparse.append(_hole(lastpos, origsize - lastpos))
Guido van Rossumd8faa362007-04-27 19:54:29 +00001348
1349 self.offset_data = tarfile.fileobj.tell()
1350 tarfile.offset = self.offset_data + self._block(self.size)
1351 self.size = origsize
1352
1353 return self
1354
1355 def _proc_pax(self, tarfile):
1356 """Process an extended or global header as described in
1357 POSIX.1-2001.
1358 """
1359 # Read the header information.
1360 buf = tarfile.fileobj.read(self._block(self.size))
1361
1362 # A pax header stores supplemental information for either
1363 # the following file (extended) or all following files
1364 # (global).
1365 if self.type == XGLTYPE:
1366 pax_headers = tarfile.pax_headers
1367 else:
1368 pax_headers = tarfile.pax_headers.copy()
1369
Guido van Rossumd8faa362007-04-27 19:54:29 +00001370 # Parse pax header information. A record looks like that:
1371 # "%d %s=%s\n" % (length, keyword, value). length is the size
1372 # of the complete record including the length field itself and
Guido van Rossume7ba4952007-06-06 23:52:48 +00001373 # the newline. keyword and value are both UTF-8 encoded strings.
Antoine Pitroufd036452008-08-19 17:56:33 +00001374 regex = re.compile(br"(\d+) ([^=]+)=")
Guido van Rossumd8faa362007-04-27 19:54:29 +00001375 pos = 0
1376 while True:
1377 match = regex.match(buf, pos)
1378 if not match:
1379 break
1380
1381 length, keyword = match.groups()
1382 length = int(length)
1383 value = buf[match.end(2) + 1:match.start(1) + length - 1]
1384
1385 keyword = keyword.decode("utf8")
Guido van Rossumd8faa362007-04-27 19:54:29 +00001386 value = value.decode("utf8")
Guido van Rossumd8faa362007-04-27 19:54:29 +00001387
1388 pax_headers[keyword] = value
1389 pos += length
1390
Guido van Rossume7ba4952007-06-06 23:52:48 +00001391 # Fetch the next header.
Lars Gustäbel9520a432009-11-22 18:48:49 +00001392 try:
1393 next = self.fromtarfile(tarfile)
1394 except HeaderError:
1395 raise SubsequentHeaderError("missing or bad subsequent header")
Guido van Rossumd8faa362007-04-27 19:54:29 +00001396
Guido van Rossume7ba4952007-06-06 23:52:48 +00001397 if self.type in (XHDTYPE, SOLARIS_XHDTYPE):
Guido van Rossume7ba4952007-06-06 23:52:48 +00001398 # Patch the TarInfo object with the extended header info.
1399 next._apply_pax_info(pax_headers, tarfile.encoding, tarfile.errors)
1400 next.offset = self.offset
1401
1402 if "size" in pax_headers:
1403 # If the extended header replaces the size field,
1404 # we need to recalculate the offset where the next
1405 # header starts.
1406 offset = next.offset_data
1407 if next.isreg() or next.type not in SUPPORTED_TYPES:
1408 offset += next._block(next.size)
1409 tarfile.offset = offset
1410
1411 return next
1412
1413 def _apply_pax_info(self, pax_headers, encoding, errors):
1414 """Replace fields with supplemental information from a previous
1415 pax extended or global header.
1416 """
1417 for keyword, value in pax_headers.items():
1418 if keyword not in PAX_FIELDS:
1419 continue
1420
1421 if keyword == "path":
1422 value = value.rstrip("/")
1423
1424 if keyword in PAX_NUMBER_FIELDS:
1425 try:
1426 value = PAX_NUMBER_FIELDS[keyword](value)
1427 except ValueError:
1428 value = 0
Guido van Rossume7ba4952007-06-06 23:52:48 +00001429
1430 setattr(self, keyword, value)
1431
1432 self.pax_headers = pax_headers.copy()
Guido van Rossumd8faa362007-04-27 19:54:29 +00001433
1434 def _block(self, count):
1435 """Round up a byte count by BLOCKSIZE and return it,
1436 e.g. _block(834) => 1024.
1437 """
1438 blocks, remainder = divmod(count, BLOCKSIZE)
1439 if remainder:
1440 blocks += 1
1441 return blocks * BLOCKSIZE
Thomas Wouters89f507f2006-12-13 04:49:30 +00001442
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001443 def isreg(self):
1444 return self.type in REGULAR_TYPES
1445 def isfile(self):
1446 return self.isreg()
1447 def isdir(self):
1448 return self.type == DIRTYPE
1449 def issym(self):
1450 return self.type == SYMTYPE
1451 def islnk(self):
1452 return self.type == LNKTYPE
1453 def ischr(self):
1454 return self.type == CHRTYPE
1455 def isblk(self):
1456 return self.type == BLKTYPE
1457 def isfifo(self):
1458 return self.type == FIFOTYPE
1459 def issparse(self):
1460 return self.type == GNUTYPE_SPARSE
1461 def isdev(self):
1462 return self.type in (CHRTYPE, BLKTYPE, FIFOTYPE)
1463# class TarInfo
1464
1465class TarFile(object):
1466 """The TarFile Class provides an interface to tar archives.
1467 """
1468
1469 debug = 0 # May be set from 0 (no msgs) to 3 (all msgs)
1470
1471 dereference = False # If true, add content of linked file to the
1472 # tar file, else the link.
1473
1474 ignore_zeros = False # If true, skips empty or invalid blocks and
1475 # continues processing.
1476
Lars Gustäbel365aff32009-12-13 11:42:29 +00001477 errorlevel = 1 # If 0, fatal errors only appear in debug
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001478 # messages (if debug >= 0). If > 0, errors
1479 # are passed to the caller as exceptions.
1480
Guido van Rossumd8faa362007-04-27 19:54:29 +00001481 format = DEFAULT_FORMAT # The format to use when creating an archive.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001482
Guido van Rossume7ba4952007-06-06 23:52:48 +00001483 encoding = ENCODING # Encoding for 8-bit character strings.
1484
1485 errors = None # Error handler for unicode conversion.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001486
Guido van Rossumd8faa362007-04-27 19:54:29 +00001487 tarinfo = TarInfo # The default TarInfo class to use.
1488
1489 fileobject = ExFileObject # The default ExFileObject class to use.
1490
1491 def __init__(self, name=None, mode="r", fileobj=None, format=None,
1492 tarinfo=None, dereference=None, ignore_zeros=None, encoding=None,
Victor Stinnerde629d42010-05-05 21:43:57 +00001493 errors="surrogateescape", pax_headers=None, debug=None, errorlevel=None):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001494 """Open an (uncompressed) tar archive `name'. `mode' is either 'r' to
1495 read from an existing archive, 'a' to append data to an existing
1496 file or 'w' to create a new file overwriting an existing one. `mode'
1497 defaults to 'r'.
1498 If `fileobj' is given, it is used for reading or writing data. If it
1499 can be determined, `mode' is overridden by `fileobj's mode.
1500 `fileobj' is not closed, when TarFile is closed.
1501 """
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001502 if len(mode) > 1 or mode not in "raw":
Thomas Wouters477c8d52006-05-27 19:21:47 +00001503 raise ValueError("mode must be 'r', 'a' or 'w'")
Guido van Rossumd8faa362007-04-27 19:54:29 +00001504 self.mode = mode
1505 self._mode = {"r": "rb", "a": "r+b", "w": "wb"}[mode]
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001506
1507 if not fileobj:
Guido van Rossumd8faa362007-04-27 19:54:29 +00001508 if self.mode == "a" and not os.path.exists(name):
Thomas Wouterscf297e42007-02-23 15:07:44 +00001509 # Create nonexistent files in append mode.
Guido van Rossumd8faa362007-04-27 19:54:29 +00001510 self.mode = "w"
1511 self._mode = "wb"
Guido van Rossume7ba4952007-06-06 23:52:48 +00001512 fileobj = bltn_open(name, self._mode)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001513 self._extfileobj = False
1514 else:
Guido van Rossumd8faa362007-04-27 19:54:29 +00001515 if name is None and hasattr(fileobj, "name"):
1516 name = fileobj.name
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001517 if hasattr(fileobj, "mode"):
Guido van Rossumd8faa362007-04-27 19:54:29 +00001518 self._mode = fileobj.mode
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001519 self._extfileobj = True
Thomas Woutersed03b412007-08-28 21:37:11 +00001520 self.name = os.path.abspath(name) if name else None
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001521 self.fileobj = fileobj
1522
Guido van Rossumd8faa362007-04-27 19:54:29 +00001523 # Init attributes.
1524 if format is not None:
1525 self.format = format
1526 if tarinfo is not None:
1527 self.tarinfo = tarinfo
1528 if dereference is not None:
1529 self.dereference = dereference
1530 if ignore_zeros is not None:
1531 self.ignore_zeros = ignore_zeros
1532 if encoding is not None:
1533 self.encoding = encoding
Victor Stinnerde629d42010-05-05 21:43:57 +00001534 self.errors = errors
Guido van Rossume7ba4952007-06-06 23:52:48 +00001535
1536 if pax_headers is not None and self.format == PAX_FORMAT:
1537 self.pax_headers = pax_headers
1538 else:
1539 self.pax_headers = {}
1540
Guido van Rossumd8faa362007-04-27 19:54:29 +00001541 if debug is not None:
1542 self.debug = debug
1543 if errorlevel is not None:
1544 self.errorlevel = errorlevel
1545
1546 # Init datastructures.
Thomas Wouters477c8d52006-05-27 19:21:47 +00001547 self.closed = False
1548 self.members = [] # list of members as TarInfo objects
1549 self._loaded = False # flag if all members have been read
Christian Heimesd8654cf2007-12-02 15:22:16 +00001550 self.offset = self.fileobj.tell()
1551 # current position in the archive file
Thomas Wouters477c8d52006-05-27 19:21:47 +00001552 self.inodes = {} # dictionary caching the inodes of
1553 # archive members already added
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001554
Lars Gustäbel7b465392009-11-18 20:29:25 +00001555 try:
1556 if self.mode == "r":
1557 self.firstmember = None
1558 self.firstmember = self.next()
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001559
Lars Gustäbel7b465392009-11-18 20:29:25 +00001560 if self.mode == "a":
1561 # Move to the end of the archive,
1562 # before the first empty block.
Lars Gustäbel7b465392009-11-18 20:29:25 +00001563 while True:
Lars Gustäbel9520a432009-11-22 18:48:49 +00001564 self.fileobj.seek(self.offset)
1565 try:
1566 tarinfo = self.tarinfo.fromtarfile(self)
1567 self.members.append(tarinfo)
1568 except EOFHeaderError:
1569 self.fileobj.seek(self.offset)
Lars Gustäbel7b465392009-11-18 20:29:25 +00001570 break
Lars Gustäbel9520a432009-11-22 18:48:49 +00001571 except HeaderError as e:
1572 raise ReadError(str(e))
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001573
Lars Gustäbel7b465392009-11-18 20:29:25 +00001574 if self.mode in "aw":
1575 self._loaded = True
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001576
Lars Gustäbel7b465392009-11-18 20:29:25 +00001577 if self.pax_headers:
1578 buf = self.tarinfo.create_pax_global_header(self.pax_headers.copy())
1579 self.fileobj.write(buf)
1580 self.offset += len(buf)
1581 except:
1582 if not self._extfileobj:
1583 self.fileobj.close()
1584 self.closed = True
1585 raise
Guido van Rossumd8faa362007-04-27 19:54:29 +00001586
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001587 #--------------------------------------------------------------------------
1588 # Below are the classmethods which act as alternate constructors to the
1589 # TarFile class. The open() method is the only one that is needed for
1590 # public use; it is the "super"-constructor and is able to select an
1591 # adequate "sub"-constructor for a particular compression using the mapping
1592 # from OPEN_METH.
1593 #
1594 # This concept allows one to subclass TarFile without losing the comfort of
1595 # the super-constructor. A sub-constructor is registered and made available
1596 # by adding it to the mapping in OPEN_METH.
1597
Guido van Rossum75b64e62005-01-16 00:16:11 +00001598 @classmethod
Guido van Rossumd8faa362007-04-27 19:54:29 +00001599 def open(cls, name=None, mode="r", fileobj=None, bufsize=RECORDSIZE, **kwargs):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001600 """Open a tar archive for reading, writing or appending. Return
1601 an appropriate TarFile class.
1602
1603 mode:
Martin v. Löwis78be7df2005-03-05 12:47:42 +00001604 'r' or 'r:*' open for reading with transparent compression
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001605 'r:' open for reading exclusively uncompressed
1606 'r:gz' open for reading with gzip compression
1607 'r:bz2' open for reading with bzip2 compression
Thomas Wouterscf297e42007-02-23 15:07:44 +00001608 'a' or 'a:' open for appending, creating the file if necessary
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001609 'w' or 'w:' open for writing without compression
1610 'w:gz' open for writing with gzip compression
1611 'w:bz2' open for writing with bzip2 compression
Martin v. Löwis78be7df2005-03-05 12:47:42 +00001612
1613 'r|*' open a stream of tar blocks with transparent compression
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001614 'r|' open an uncompressed stream of tar blocks for reading
1615 'r|gz' open a gzip compressed stream of tar blocks
1616 'r|bz2' open a bzip2 compressed stream of tar blocks
1617 'w|' open an uncompressed stream for writing
1618 'w|gz' open a gzip compressed stream for writing
1619 'w|bz2' open a bzip2 compressed stream for writing
1620 """
1621
1622 if not name and not fileobj:
Thomas Wouters477c8d52006-05-27 19:21:47 +00001623 raise ValueError("nothing to open")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001624
Martin v. Löwis78be7df2005-03-05 12:47:42 +00001625 if mode in ("r", "r:*"):
1626 # Find out which *open() is appropriate for opening the file.
1627 for comptype in cls.OPEN_METH:
1628 func = getattr(cls, cls.OPEN_METH[comptype])
Thomas Wouters902d6eb2007-01-09 23:18:33 +00001629 if fileobj is not None:
1630 saved_pos = fileobj.tell()
Martin v. Löwis78be7df2005-03-05 12:47:42 +00001631 try:
Guido van Rossumd8faa362007-04-27 19:54:29 +00001632 return func(name, "r", fileobj, **kwargs)
1633 except (ReadError, CompressionError) as e:
Thomas Wouters902d6eb2007-01-09 23:18:33 +00001634 if fileobj is not None:
1635 fileobj.seek(saved_pos)
Martin v. Löwis78be7df2005-03-05 12:47:42 +00001636 continue
Thomas Wouters477c8d52006-05-27 19:21:47 +00001637 raise ReadError("file could not be opened successfully")
Martin v. Löwis78be7df2005-03-05 12:47:42 +00001638
1639 elif ":" in mode:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001640 filemode, comptype = mode.split(":", 1)
1641 filemode = filemode or "r"
1642 comptype = comptype or "tar"
1643
1644 # Select the *open() function according to
1645 # given compression.
1646 if comptype in cls.OPEN_METH:
1647 func = getattr(cls, cls.OPEN_METH[comptype])
1648 else:
Thomas Wouters477c8d52006-05-27 19:21:47 +00001649 raise CompressionError("unknown compression type %r" % comptype)
Guido van Rossumd8faa362007-04-27 19:54:29 +00001650 return func(name, filemode, fileobj, **kwargs)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001651
1652 elif "|" in mode:
1653 filemode, comptype = mode.split("|", 1)
1654 filemode = filemode or "r"
1655 comptype = comptype or "tar"
1656
1657 if filemode not in "rw":
Thomas Wouters477c8d52006-05-27 19:21:47 +00001658 raise ValueError("mode must be 'r' or 'w'")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001659
1660 t = cls(name, filemode,
Guido van Rossumd8faa362007-04-27 19:54:29 +00001661 _Stream(name, filemode, comptype, fileobj, bufsize),
1662 **kwargs)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001663 t._extfileobj = False
1664 return t
1665
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001666 elif mode in "aw":
Guido van Rossumd8faa362007-04-27 19:54:29 +00001667 return cls.taropen(name, mode, fileobj, **kwargs)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001668
Thomas Wouters477c8d52006-05-27 19:21:47 +00001669 raise ValueError("undiscernible mode")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001670
Guido van Rossum75b64e62005-01-16 00:16:11 +00001671 @classmethod
Guido van Rossumd8faa362007-04-27 19:54:29 +00001672 def taropen(cls, name, mode="r", fileobj=None, **kwargs):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001673 """Open uncompressed tar archive name for reading or writing.
1674 """
1675 if len(mode) > 1 or mode not in "raw":
Thomas Wouters477c8d52006-05-27 19:21:47 +00001676 raise ValueError("mode must be 'r', 'a' or 'w'")
Guido van Rossumd8faa362007-04-27 19:54:29 +00001677 return cls(name, mode, fileobj, **kwargs)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001678
Guido van Rossum75b64e62005-01-16 00:16:11 +00001679 @classmethod
Guido van Rossumd8faa362007-04-27 19:54:29 +00001680 def gzopen(cls, name, mode="r", fileobj=None, compresslevel=9, **kwargs):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001681 """Open gzip compressed tar archive name for reading or writing.
1682 Appending is not allowed.
1683 """
1684 if len(mode) > 1 or mode not in "rw":
Thomas Wouters477c8d52006-05-27 19:21:47 +00001685 raise ValueError("mode must be 'r' or 'w'")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001686
1687 try:
1688 import gzip
Neal Norwitz4ec68242003-04-11 03:05:56 +00001689 gzip.GzipFile
1690 except (ImportError, AttributeError):
Thomas Wouters477c8d52006-05-27 19:21:47 +00001691 raise CompressionError("gzip module is not available")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001692
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001693 if fileobj is None:
Guido van Rossume7ba4952007-06-06 23:52:48 +00001694 fileobj = bltn_open(name, mode + "b")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001695
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001696 try:
Thomas Wouters902d6eb2007-01-09 23:18:33 +00001697 t = cls.taropen(name, mode,
Guido van Rossumd8faa362007-04-27 19:54:29 +00001698 gzip.GzipFile(name, mode, compresslevel, fileobj),
1699 **kwargs)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001700 except IOError:
Thomas Wouters477c8d52006-05-27 19:21:47 +00001701 raise ReadError("not a gzip file")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001702 t._extfileobj = False
1703 return t
1704
Guido van Rossum75b64e62005-01-16 00:16:11 +00001705 @classmethod
Guido van Rossumd8faa362007-04-27 19:54:29 +00001706 def bz2open(cls, name, mode="r", fileobj=None, compresslevel=9, **kwargs):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001707 """Open bzip2 compressed tar archive name for reading or writing.
1708 Appending is not allowed.
1709 """
1710 if len(mode) > 1 or mode not in "rw":
Thomas Wouters477c8d52006-05-27 19:21:47 +00001711 raise ValueError("mode must be 'r' or 'w'.")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001712
1713 try:
1714 import bz2
1715 except ImportError:
Thomas Wouters477c8d52006-05-27 19:21:47 +00001716 raise CompressionError("bz2 module is not available")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001717
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001718 if fileobj is not None:
Thomas Wouters477c8d52006-05-27 19:21:47 +00001719 fileobj = _BZ2Proxy(fileobj, mode)
1720 else:
1721 fileobj = bz2.BZ2File(name, mode, compresslevel=compresslevel)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001722
1723 try:
Guido van Rossumd8faa362007-04-27 19:54:29 +00001724 t = cls.taropen(name, mode, fileobj, **kwargs)
Lars Gustäbel9520a432009-11-22 18:48:49 +00001725 except (IOError, EOFError):
Thomas Wouters477c8d52006-05-27 19:21:47 +00001726 raise ReadError("not a bzip2 file")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001727 t._extfileobj = False
1728 return t
1729
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001730 # All *open() methods are registered here.
1731 OPEN_METH = {
1732 "tar": "taropen", # uncompressed tar
1733 "gz": "gzopen", # gzip compressed tar
1734 "bz2": "bz2open" # bzip2 compressed tar
1735 }
1736
1737 #--------------------------------------------------------------------------
1738 # The public methods which TarFile provides:
1739
1740 def close(self):
1741 """Close the TarFile. In write-mode, two finishing zero blocks are
1742 appended to the archive.
1743 """
1744 if self.closed:
1745 return
1746
Guido van Rossumd8faa362007-04-27 19:54:29 +00001747 if self.mode in "aw":
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001748 self.fileobj.write(NUL * (BLOCKSIZE * 2))
1749 self.offset += (BLOCKSIZE * 2)
1750 # fill up the end with zero-blocks
1751 # (like option -b20 for tar does)
1752 blocks, remainder = divmod(self.offset, RECORDSIZE)
1753 if remainder > 0:
1754 self.fileobj.write(NUL * (RECORDSIZE - remainder))
1755
1756 if not self._extfileobj:
1757 self.fileobj.close()
1758 self.closed = True
1759
1760 def getmember(self, name):
1761 """Return a TarInfo object for member `name'. If `name' can not be
1762 found in the archive, KeyError is raised. If a member occurs more
Mark Dickinson934896d2009-02-21 20:59:32 +00001763 than once in the archive, its last occurrence is assumed to be the
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001764 most up-to-date version.
1765 """
Martin v. Löwisf3c56112004-09-18 09:08:52 +00001766 tarinfo = self._getmember(name)
1767 if tarinfo is None:
Thomas Wouters477c8d52006-05-27 19:21:47 +00001768 raise KeyError("filename %r not found" % name)
Martin v. Löwisf3c56112004-09-18 09:08:52 +00001769 return tarinfo
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001770
1771 def getmembers(self):
1772 """Return the members of the archive as a list of TarInfo objects. The
1773 list has the same order as the members in the archive.
1774 """
1775 self._check()
1776 if not self._loaded: # if we want to obtain a list of
1777 self._load() # all members, we first have to
1778 # scan the whole archive.
1779 return self.members
1780
1781 def getnames(self):
1782 """Return the members of the archive as a list of their names. It has
1783 the same order as the list returned by getmembers().
1784 """
Martin v. Löwisf3c56112004-09-18 09:08:52 +00001785 return [tarinfo.name for tarinfo in self.getmembers()]
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001786
1787 def gettarinfo(self, name=None, arcname=None, fileobj=None):
1788 """Create a TarInfo object for either the file `name' or the file
1789 object `fileobj' (using os.fstat on its file descriptor). You can
1790 modify some of the TarInfo's attributes before you add it using
1791 addfile(). If given, `arcname' specifies an alternative name for the
1792 file in the archive.
1793 """
1794 self._check("aw")
1795
1796 # When fileobj is given, replace name by
1797 # fileobj's real name.
1798 if fileobj is not None:
1799 name = fileobj.name
1800
1801 # Building the name of the member in the archive.
1802 # Backward slashes are converted to forward slashes,
1803 # Absolute paths are turned to relative paths.
1804 if arcname is None:
1805 arcname = name
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001806 drv, arcname = os.path.splitdrive(arcname)
Lars Gustäbelbfdfdda2009-08-28 19:59:59 +00001807 arcname = arcname.replace(os.sep, "/")
1808 arcname = arcname.lstrip("/")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001809
1810 # Now, fill the TarInfo object with
1811 # information specific for the file.
Guido van Rossumd8faa362007-04-27 19:54:29 +00001812 tarinfo = self.tarinfo()
1813 tarinfo.tarfile = self
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001814
1815 # Use os.stat or os.lstat, depending on platform
1816 # and if symlinks shall be resolved.
1817 if fileobj is None:
1818 if hasattr(os, "lstat") and not self.dereference:
1819 statres = os.lstat(name)
1820 else:
1821 statres = os.stat(name)
1822 else:
1823 statres = os.fstat(fileobj.fileno())
1824 linkname = ""
1825
1826 stmd = statres.st_mode
1827 if stat.S_ISREG(stmd):
1828 inode = (statres.st_ino, statres.st_dev)
Guido van Rossumd8faa362007-04-27 19:54:29 +00001829 if not self.dereference and statres.st_nlink > 1 and \
1830 inode in self.inodes and arcname != self.inodes[inode]:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001831 # Is it a hardlink to an already
1832 # archived file?
1833 type = LNKTYPE
1834 linkname = self.inodes[inode]
1835 else:
1836 # The inode is added only if its valid.
1837 # For win32 it is always 0.
1838 type = REGTYPE
1839 if inode[0]:
1840 self.inodes[inode] = arcname
1841 elif stat.S_ISDIR(stmd):
1842 type = DIRTYPE
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001843 elif stat.S_ISFIFO(stmd):
1844 type = FIFOTYPE
1845 elif stat.S_ISLNK(stmd):
1846 type = SYMTYPE
1847 linkname = os.readlink(name)
1848 elif stat.S_ISCHR(stmd):
1849 type = CHRTYPE
1850 elif stat.S_ISBLK(stmd):
1851 type = BLKTYPE
1852 else:
1853 return None
1854
1855 # Fill the TarInfo object with all
1856 # information we can get.
Martin v. Löwis5dbdc592005-08-27 10:07:56 +00001857 tarinfo.name = arcname
1858 tarinfo.mode = stmd
1859 tarinfo.uid = statres.st_uid
1860 tarinfo.gid = statres.st_gid
1861 if stat.S_ISREG(stmd):
Martin v. Löwis61d77e02004-08-20 06:35:46 +00001862 tarinfo.size = statres.st_size
Martin v. Löwis5dbdc592005-08-27 10:07:56 +00001863 else:
Guido van Rossume2a383d2007-01-15 16:59:06 +00001864 tarinfo.size = 0
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001865 tarinfo.mtime = statres.st_mtime
Martin v. Löwis5dbdc592005-08-27 10:07:56 +00001866 tarinfo.type = type
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001867 tarinfo.linkname = linkname
1868 if pwd:
1869 try:
1870 tarinfo.uname = pwd.getpwuid(tarinfo.uid)[0]
1871 except KeyError:
1872 pass
1873 if grp:
1874 try:
1875 tarinfo.gname = grp.getgrgid(tarinfo.gid)[0]
1876 except KeyError:
1877 pass
1878
1879 if type in (CHRTYPE, BLKTYPE):
1880 if hasattr(os, "major") and hasattr(os, "minor"):
1881 tarinfo.devmajor = os.major(statres.st_rdev)
1882 tarinfo.devminor = os.minor(statres.st_rdev)
1883 return tarinfo
1884
1885 def list(self, verbose=True):
1886 """Print a table of contents to sys.stdout. If `verbose' is False, only
1887 the names of the members are printed. If it is True, an `ls -l'-like
1888 output is produced.
1889 """
1890 self._check()
1891
1892 for tarinfo in self:
1893 if verbose:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001894 print(filemode(tarinfo.mode), end=' ')
1895 print("%s/%s" % (tarinfo.uname or tarinfo.uid,
1896 tarinfo.gname or tarinfo.gid), end=' ')
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001897 if tarinfo.ischr() or tarinfo.isblk():
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001898 print("%10s" % ("%d,%d" \
1899 % (tarinfo.devmajor, tarinfo.devminor)), end=' ')
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001900 else:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001901 print("%10d" % tarinfo.size, end=' ')
1902 print("%d-%02d-%02d %02d:%02d:%02d" \
1903 % time.localtime(tarinfo.mtime)[:6], end=' ')
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001904
Guido van Rossumd8faa362007-04-27 19:54:29 +00001905 print(tarinfo.name + ("/" if tarinfo.isdir() else ""), end=' ')
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001906
1907 if verbose:
1908 if tarinfo.issym():
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001909 print("->", tarinfo.linkname, end=' ')
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001910 if tarinfo.islnk():
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001911 print("link to", tarinfo.linkname, end=' ')
1912 print()
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001913
Lars Gustäbel049d2aa2009-09-12 10:44:00 +00001914 def add(self, name, arcname=None, recursive=True, exclude=None, filter=None):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001915 """Add the file `name' to the archive. `name' may be any type of file
1916 (directory, fifo, symbolic link, etc.). If given, `arcname'
1917 specifies an alternative name for the file in the archive.
1918 Directories are added recursively by default. This can be avoided by
Guido van Rossum486364b2007-06-30 05:01:58 +00001919 setting `recursive' to False. `exclude' is a function that should
Lars Gustäbel049d2aa2009-09-12 10:44:00 +00001920 return True for each filename to be excluded. `filter' is a function
1921 that expects a TarInfo object argument and returns the changed
1922 TarInfo object, if it returns None the TarInfo object will be
1923 excluded from the archive.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001924 """
1925 self._check("aw")
1926
1927 if arcname is None:
1928 arcname = name
1929
Guido van Rossum486364b2007-06-30 05:01:58 +00001930 # Exclude pathnames.
Lars Gustäbel049d2aa2009-09-12 10:44:00 +00001931 if exclude is not None:
1932 import warnings
1933 warnings.warn("use the filter argument instead",
1934 DeprecationWarning, 2)
1935 if exclude(name):
1936 self._dbg(2, "tarfile: Excluded %r" % name)
1937 return
Guido van Rossum486364b2007-06-30 05:01:58 +00001938
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001939 # Skip if somebody tries to archive the archive...
Thomas Wouters902d6eb2007-01-09 23:18:33 +00001940 if self.name is not None and os.path.abspath(name) == self.name:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001941 self._dbg(2, "tarfile: Skipped %r" % name)
1942 return
1943
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001944 self._dbg(1, name)
1945
1946 # Create a TarInfo object from the file.
1947 tarinfo = self.gettarinfo(name, arcname)
1948
1949 if tarinfo is None:
1950 self._dbg(1, "tarfile: Unsupported type %r" % name)
1951 return
1952
Lars Gustäbel049d2aa2009-09-12 10:44:00 +00001953 # Change or exclude the TarInfo object.
1954 if filter is not None:
1955 tarinfo = filter(tarinfo)
1956 if tarinfo is None:
1957 self._dbg(2, "tarfile: Excluded %r" % name)
1958 return
1959
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001960 # Append the tar header and data to the archive.
1961 if tarinfo.isreg():
Guido van Rossume7ba4952007-06-06 23:52:48 +00001962 f = bltn_open(name, "rb")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001963 self.addfile(tarinfo, f)
1964 f.close()
1965
Martin v. Löwis5dbdc592005-08-27 10:07:56 +00001966 elif tarinfo.isdir():
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001967 self.addfile(tarinfo)
1968 if recursive:
1969 for f in os.listdir(name):
Lars Gustäbel049d2aa2009-09-12 10:44:00 +00001970 self.add(os.path.join(name, f), os.path.join(arcname, f),
1971 recursive, exclude, filter)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001972
Martin v. Löwis5dbdc592005-08-27 10:07:56 +00001973 else:
1974 self.addfile(tarinfo)
1975
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001976 def addfile(self, tarinfo, fileobj=None):
1977 """Add the TarInfo object `tarinfo' to the archive. If `fileobj' is
1978 given, tarinfo.size bytes are read from it and added to the archive.
1979 You can create TarInfo objects using gettarinfo().
1980 On Windows platforms, `fileobj' should always be opened with mode
1981 'rb' to avoid irritation about the file size.
1982 """
1983 self._check("aw")
1984
Thomas Wouters89f507f2006-12-13 04:49:30 +00001985 tarinfo = copy.copy(tarinfo)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001986
Guido van Rossume7ba4952007-06-06 23:52:48 +00001987 buf = tarinfo.tobuf(self.format, self.encoding, self.errors)
Thomas Wouters89f507f2006-12-13 04:49:30 +00001988 self.fileobj.write(buf)
1989 self.offset += len(buf)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001990
1991 # If there's data to follow, append it.
1992 if fileobj is not None:
1993 copyfileobj(fileobj, self.fileobj, tarinfo.size)
1994 blocks, remainder = divmod(tarinfo.size, BLOCKSIZE)
1995 if remainder > 0:
1996 self.fileobj.write(NUL * (BLOCKSIZE - remainder))
1997 blocks += 1
1998 self.offset += blocks * BLOCKSIZE
1999
Martin v. Löwisf3c56112004-09-18 09:08:52 +00002000 self.members.append(tarinfo)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002001
Martin v. Löwis00a73e72005-03-04 19:40:34 +00002002 def extractall(self, path=".", members=None):
2003 """Extract all members from the archive to the current working
2004 directory and set owner, modification time and permissions on
2005 directories afterwards. `path' specifies a different directory
2006 to extract to. `members' is optional and must be a subset of the
2007 list returned by getmembers().
2008 """
2009 directories = []
2010
2011 if members is None:
2012 members = self
2013
2014 for tarinfo in members:
2015 if tarinfo.isdir():
Christian Heimes2202f872008-02-06 14:31:34 +00002016 # Extract directories with a safe mode.
Martin v. Löwis00a73e72005-03-04 19:40:34 +00002017 directories.append(tarinfo)
Christian Heimes2202f872008-02-06 14:31:34 +00002018 tarinfo = copy.copy(tarinfo)
2019 tarinfo.mode = 0o700
2020 self.extract(tarinfo, path)
Martin v. Löwis00a73e72005-03-04 19:40:34 +00002021
2022 # Reverse sort directories.
Raymond Hettingerd4cb56d2008-01-30 02:55:10 +00002023 directories.sort(key=lambda a: a.name)
Martin v. Löwis00a73e72005-03-04 19:40:34 +00002024 directories.reverse()
2025
2026 # Set correct owner, mtime and filemode on directories.
2027 for tarinfo in directories:
Christian Heimesfaf2f632008-01-06 16:59:19 +00002028 dirpath = os.path.join(path, tarinfo.name)
Martin v. Löwis00a73e72005-03-04 19:40:34 +00002029 try:
Christian Heimesfaf2f632008-01-06 16:59:19 +00002030 self.chown(tarinfo, dirpath)
2031 self.utime(tarinfo, dirpath)
2032 self.chmod(tarinfo, dirpath)
Guido van Rossumb940e112007-01-10 16:19:56 +00002033 except ExtractError as e:
Martin v. Löwis00a73e72005-03-04 19:40:34 +00002034 if self.errorlevel > 1:
2035 raise
2036 else:
2037 self._dbg(1, "tarfile: %s" % e)
2038
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002039 def extract(self, member, path=""):
2040 """Extract a member from the archive to the current working directory,
2041 using its full name. Its file information is extracted as accurately
2042 as possible. `member' may be a filename or a TarInfo object. You can
2043 specify a different directory using `path'.
2044 """
2045 self._check("r")
2046
Guido van Rossum3172c5d2007-10-16 18:12:55 +00002047 if isinstance(member, str):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002048 tarinfo = self.getmember(member)
Guido van Rossumd8faa362007-04-27 19:54:29 +00002049 else:
2050 tarinfo = member
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002051
Neal Norwitza4f651a2004-07-20 22:07:44 +00002052 # Prepare the link target for makelink().
2053 if tarinfo.islnk():
2054 tarinfo._link_target = os.path.join(path, tarinfo.linkname)
2055
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002056 try:
2057 self._extract_member(tarinfo, os.path.join(path, tarinfo.name))
Guido van Rossumb940e112007-01-10 16:19:56 +00002058 except EnvironmentError as e:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002059 if self.errorlevel > 0:
2060 raise
2061 else:
2062 if e.filename is None:
2063 self._dbg(1, "tarfile: %s" % e.strerror)
2064 else:
2065 self._dbg(1, "tarfile: %s %r" % (e.strerror, e.filename))
Guido van Rossumb940e112007-01-10 16:19:56 +00002066 except ExtractError as e:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002067 if self.errorlevel > 1:
2068 raise
2069 else:
2070 self._dbg(1, "tarfile: %s" % e)
2071
2072 def extractfile(self, member):
2073 """Extract a member from the archive as a file object. `member' may be
2074 a filename or a TarInfo object. If `member' is a regular file, a
2075 file-like object is returned. If `member' is a link, a file-like
2076 object is constructed from the link's target. If `member' is none of
2077 the above, None is returned.
2078 The file-like object is read-only and provides the following
2079 methods: read(), readline(), readlines(), seek() and tell()
2080 """
2081 self._check("r")
2082
Guido van Rossum3172c5d2007-10-16 18:12:55 +00002083 if isinstance(member, str):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002084 tarinfo = self.getmember(member)
Guido van Rossumd8faa362007-04-27 19:54:29 +00002085 else:
2086 tarinfo = member
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002087
2088 if tarinfo.isreg():
2089 return self.fileobject(self, tarinfo)
2090
2091 elif tarinfo.type not in SUPPORTED_TYPES:
2092 # If a member's type is unknown, it is treated as a
2093 # regular file.
2094 return self.fileobject(self, tarinfo)
2095
2096 elif tarinfo.islnk() or tarinfo.issym():
2097 if isinstance(self.fileobj, _Stream):
2098 # A small but ugly workaround for the case that someone tries
2099 # to extract a (sym)link as a file-object from a non-seekable
2100 # stream of tar blocks.
Thomas Wouters477c8d52006-05-27 19:21:47 +00002101 raise StreamError("cannot extract (sym)link as file object")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002102 else:
Georg Brandl7eb4b7d2005-07-22 21:49:32 +00002103 # A (sym)link's file object is its target's file object.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002104 return self.extractfile(self._getmember(tarinfo.linkname,
2105 tarinfo))
2106 else:
2107 # If there's no data associated with the member (directory, chrdev,
2108 # blkdev, etc.), return None instead of a file object.
2109 return None
2110
2111 def _extract_member(self, tarinfo, targetpath):
2112 """Extract the TarInfo object tarinfo to a physical
2113 file called targetpath.
2114 """
2115 # Fetch the TarInfo object for the given name
2116 # and build the destination pathname, replacing
2117 # forward slashes to platform specific separators.
Lars Gustäbelbfdfdda2009-08-28 19:59:59 +00002118 targetpath = targetpath.rstrip("/")
2119 targetpath = targetpath.replace("/", os.sep)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002120
2121 # Create all upper directories.
2122 upperdirs = os.path.dirname(targetpath)
2123 if upperdirs and not os.path.exists(upperdirs):
Christian Heimes2202f872008-02-06 14:31:34 +00002124 # Create directories that are not part of the archive with
2125 # default permissions.
Thomas Woutersb2137042007-02-01 18:02:27 +00002126 os.makedirs(upperdirs)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002127
2128 if tarinfo.islnk() or tarinfo.issym():
2129 self._dbg(1, "%s -> %s" % (tarinfo.name, tarinfo.linkname))
2130 else:
2131 self._dbg(1, tarinfo.name)
2132
2133 if tarinfo.isreg():
2134 self.makefile(tarinfo, targetpath)
2135 elif tarinfo.isdir():
2136 self.makedir(tarinfo, targetpath)
2137 elif tarinfo.isfifo():
2138 self.makefifo(tarinfo, targetpath)
2139 elif tarinfo.ischr() or tarinfo.isblk():
2140 self.makedev(tarinfo, targetpath)
2141 elif tarinfo.islnk() or tarinfo.issym():
2142 self.makelink(tarinfo, targetpath)
2143 elif tarinfo.type not in SUPPORTED_TYPES:
2144 self.makeunknown(tarinfo, targetpath)
2145 else:
2146 self.makefile(tarinfo, targetpath)
2147
2148 self.chown(tarinfo, targetpath)
2149 if not tarinfo.issym():
2150 self.chmod(tarinfo, targetpath)
2151 self.utime(tarinfo, targetpath)
2152
2153 #--------------------------------------------------------------------------
2154 # Below are the different file methods. They are called via
2155 # _extract_member() when extract() is called. They can be replaced in a
2156 # subclass to implement other functionality.
2157
2158 def makedir(self, tarinfo, targetpath):
2159 """Make a directory called targetpath.
2160 """
2161 try:
Christian Heimes2202f872008-02-06 14:31:34 +00002162 # Use a safe mode for the directory, the real mode is set
2163 # later in _extract_member().
2164 os.mkdir(targetpath, 0o700)
Guido van Rossumb940e112007-01-10 16:19:56 +00002165 except EnvironmentError as e:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002166 if e.errno != errno.EEXIST:
2167 raise
2168
2169 def makefile(self, tarinfo, targetpath):
2170 """Make a file called targetpath.
2171 """
2172 source = self.extractfile(tarinfo)
Guido van Rossume7ba4952007-06-06 23:52:48 +00002173 target = bltn_open(targetpath, "wb")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002174 copyfileobj(source, target)
2175 source.close()
2176 target.close()
2177
2178 def makeunknown(self, tarinfo, targetpath):
2179 """Make a file from a TarInfo object with an unknown type
2180 at targetpath.
2181 """
2182 self.makefile(tarinfo, targetpath)
2183 self._dbg(1, "tarfile: Unknown file type %r, " \
2184 "extracted as regular file." % tarinfo.type)
2185
2186 def makefifo(self, tarinfo, targetpath):
2187 """Make a fifo called targetpath.
2188 """
2189 if hasattr(os, "mkfifo"):
2190 os.mkfifo(targetpath)
2191 else:
Thomas Wouters477c8d52006-05-27 19:21:47 +00002192 raise ExtractError("fifo not supported by system")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002193
2194 def makedev(self, tarinfo, targetpath):
2195 """Make a character or block device called targetpath.
2196 """
2197 if not hasattr(os, "mknod") or not hasattr(os, "makedev"):
Thomas Wouters477c8d52006-05-27 19:21:47 +00002198 raise ExtractError("special devices not supported by system")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002199
2200 mode = tarinfo.mode
2201 if tarinfo.isblk():
2202 mode |= stat.S_IFBLK
2203 else:
2204 mode |= stat.S_IFCHR
2205
2206 os.mknod(targetpath, mode,
2207 os.makedev(tarinfo.devmajor, tarinfo.devminor))
2208
2209 def makelink(self, tarinfo, targetpath):
2210 """Make a (symbolic) link called targetpath. If it cannot be created
2211 (platform limitation), we try to make a copy of the referenced file
2212 instead of a link.
2213 """
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002214 try:
2215 if tarinfo.issym():
Lars Gustäbelbfdfdda2009-08-28 19:59:59 +00002216 os.symlink(tarinfo.linkname, targetpath)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002217 else:
Neal Norwitza4f651a2004-07-20 22:07:44 +00002218 # See extract().
2219 os.link(tarinfo._link_target, targetpath)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002220 except AttributeError:
2221 if tarinfo.issym():
Lars Gustäbelbfdfdda2009-08-28 19:59:59 +00002222 linkpath = os.path.dirname(tarinfo.name) + "/" + \
2223 tarinfo.linkname
2224 else:
2225 linkpath = tarinfo.linkname
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002226
2227 try:
2228 self._extract_member(self.getmember(linkpath), targetpath)
Guido van Rossumb940e112007-01-10 16:19:56 +00002229 except (EnvironmentError, KeyError) as e:
Lars Gustäbelbfdfdda2009-08-28 19:59:59 +00002230 linkpath = linkpath.replace("/", os.sep)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002231 try:
2232 shutil.copy2(linkpath, targetpath)
Guido van Rossumb940e112007-01-10 16:19:56 +00002233 except EnvironmentError as e:
Thomas Wouters477c8d52006-05-27 19:21:47 +00002234 raise IOError("link could not be created")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002235
2236 def chown(self, tarinfo, targetpath):
2237 """Set owner of targetpath according to tarinfo.
2238 """
2239 if pwd and hasattr(os, "geteuid") and os.geteuid() == 0:
2240 # We have to be root to do so.
2241 try:
2242 g = grp.getgrnam(tarinfo.gname)[2]
2243 except KeyError:
2244 try:
2245 g = grp.getgrgid(tarinfo.gid)[2]
2246 except KeyError:
2247 g = os.getgid()
2248 try:
2249 u = pwd.getpwnam(tarinfo.uname)[2]
2250 except KeyError:
2251 try:
2252 u = pwd.getpwuid(tarinfo.uid)[2]
2253 except KeyError:
2254 u = os.getuid()
2255 try:
2256 if tarinfo.issym() and hasattr(os, "lchown"):
2257 os.lchown(targetpath, u, g)
2258 else:
Andrew MacIntyre7970d202003-02-19 12:51:34 +00002259 if sys.platform != "os2emx":
2260 os.chown(targetpath, u, g)
Guido van Rossumb940e112007-01-10 16:19:56 +00002261 except EnvironmentError as e:
Thomas Wouters477c8d52006-05-27 19:21:47 +00002262 raise ExtractError("could not change owner")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002263
2264 def chmod(self, tarinfo, targetpath):
2265 """Set file permissions of targetpath according to tarinfo.
2266 """
Jack Jansen834eff62003-03-07 12:47:06 +00002267 if hasattr(os, 'chmod'):
2268 try:
2269 os.chmod(targetpath, tarinfo.mode)
Guido van Rossumb940e112007-01-10 16:19:56 +00002270 except EnvironmentError as e:
Thomas Wouters477c8d52006-05-27 19:21:47 +00002271 raise ExtractError("could not change mode")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002272
2273 def utime(self, tarinfo, targetpath):
2274 """Set modification time of targetpath according to tarinfo.
2275 """
Jack Jansen834eff62003-03-07 12:47:06 +00002276 if not hasattr(os, 'utime'):
Tim Petersf9347782003-03-07 15:36:41 +00002277 return
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002278 try:
2279 os.utime(targetpath, (tarinfo.mtime, tarinfo.mtime))
Guido van Rossumb940e112007-01-10 16:19:56 +00002280 except EnvironmentError as e:
Thomas Wouters477c8d52006-05-27 19:21:47 +00002281 raise ExtractError("could not change modification time")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002282
2283 #--------------------------------------------------------------------------
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002284 def next(self):
2285 """Return the next member of the archive as a TarInfo object, when
2286 TarFile is opened for reading. Return None if there is no more
2287 available.
2288 """
2289 self._check("ra")
2290 if self.firstmember is not None:
2291 m = self.firstmember
2292 self.firstmember = None
2293 return m
2294
2295 # Read the next block.
Andrew M. Kuchling864bba12004-07-10 22:02:11 +00002296 self.fileobj.seek(self.offset)
Lars Gustäbel9520a432009-11-22 18:48:49 +00002297 tarinfo = None
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002298 while True:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002299 try:
Guido van Rossumd8faa362007-04-27 19:54:29 +00002300 tarinfo = self.tarinfo.fromtarfile(self)
Lars Gustäbel9520a432009-11-22 18:48:49 +00002301 except EOFHeaderError as e:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002302 if self.ignore_zeros:
Thomas Wouters902d6eb2007-01-09 23:18:33 +00002303 self._dbg(2, "0x%X: %s" % (self.offset, e))
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002304 self.offset += BLOCKSIZE
2305 continue
Lars Gustäbel9520a432009-11-22 18:48:49 +00002306 except InvalidHeaderError as e:
2307 if self.ignore_zeros:
2308 self._dbg(2, "0x%X: %s" % (self.offset, e))
2309 self.offset += BLOCKSIZE
2310 continue
2311 elif self.offset == 0:
2312 raise ReadError(str(e))
2313 except EmptyHeaderError:
2314 if self.offset == 0:
2315 raise ReadError("empty file")
2316 except TruncatedHeaderError as e:
2317 if self.offset == 0:
2318 raise ReadError(str(e))
2319 except SubsequentHeaderError as e:
2320 raise ReadError(str(e))
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002321 break
2322
Lars Gustäbel9520a432009-11-22 18:48:49 +00002323 if tarinfo is not None:
2324 self.members.append(tarinfo)
2325 else:
2326 self._loaded = True
2327
Thomas Wouters477c8d52006-05-27 19:21:47 +00002328 return tarinfo
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002329
2330 #--------------------------------------------------------------------------
2331 # Little helper methods:
2332
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002333 def _getmember(self, name, tarinfo=None):
2334 """Find an archive member by name from bottom to top.
2335 If tarinfo is given, it is used as the starting point.
2336 """
Martin v. Löwisf3c56112004-09-18 09:08:52 +00002337 # Ensure that all members have been loaded.
2338 members = self.getmembers()
2339
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002340 if tarinfo is None:
Martin v. Löwisf3c56112004-09-18 09:08:52 +00002341 end = len(members)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002342 else:
Martin v. Löwisf3c56112004-09-18 09:08:52 +00002343 end = members.index(tarinfo)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002344
Guido van Rossum805365e2007-05-07 22:24:25 +00002345 for i in range(end - 1, -1, -1):
Martin v. Löwisf3c56112004-09-18 09:08:52 +00002346 if name == members[i].name:
2347 return members[i]
Andrew M. Kuchling864bba12004-07-10 22:02:11 +00002348
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002349 def _load(self):
2350 """Read through the entire archive file and look for readable
2351 members.
2352 """
2353 while True:
2354 tarinfo = self.next()
2355 if tarinfo is None:
2356 break
2357 self._loaded = True
2358
2359 def _check(self, mode=None):
2360 """Check if TarFile is still open, and if the operation's mode
2361 corresponds to TarFile's mode.
2362 """
2363 if self.closed:
Thomas Wouters477c8d52006-05-27 19:21:47 +00002364 raise IOError("%s is closed" % self.__class__.__name__)
Guido van Rossumd8faa362007-04-27 19:54:29 +00002365 if mode is not None and self.mode not in mode:
2366 raise IOError("bad operation for mode %r" % self.mode)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002367
2368 def __iter__(self):
2369 """Provide an iterator object.
2370 """
2371 if self._loaded:
2372 return iter(self.members)
2373 else:
2374 return TarIter(self)
2375
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002376 def _dbg(self, level, msg):
2377 """Write debugging output to sys.stderr.
2378 """
2379 if level <= self.debug:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00002380 print(msg, file=sys.stderr)
Lars Gustäbel01385812010-03-03 12:08:54 +00002381
2382 def __enter__(self):
2383 self._check()
2384 return self
2385
2386 def __exit__(self, type, value, traceback):
2387 if type is None:
2388 self.close()
2389 else:
2390 # An exception occurred. We must not call close() because
2391 # it would try to write end-of-archive blocks and padding.
2392 if not self._extfileobj:
2393 self.fileobj.close()
2394 self.closed = True
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002395# class TarFile
2396
2397class TarIter:
2398 """Iterator Class.
2399
2400 for tarinfo in TarFile(...):
2401 suite...
2402 """
2403
2404 def __init__(self, tarfile):
2405 """Construct a TarIter object.
2406 """
2407 self.tarfile = tarfile
Martin v. Löwis637431b2005-03-03 23:12:42 +00002408 self.index = 0
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002409 def __iter__(self):
2410 """Return iterator object.
2411 """
2412 return self
Georg Brandla18af4e2007-04-21 15:47:16 +00002413 def __next__(self):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002414 """Return the next item using TarFile's next() method.
2415 When all members have been read, set TarFile as _loaded.
2416 """
Martin v. Löwis637431b2005-03-03 23:12:42 +00002417 # Fix for SF #1100429: Under rare circumstances it can
2418 # happen that getmembers() is called during iteration,
2419 # which will cause TarIter to stop prematurely.
2420 if not self.tarfile._loaded:
2421 tarinfo = self.tarfile.next()
2422 if not tarinfo:
2423 self.tarfile._loaded = True
2424 raise StopIteration
2425 else:
2426 try:
2427 tarinfo = self.tarfile.members[self.index]
2428 except IndexError:
2429 raise StopIteration
2430 self.index += 1
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002431 return tarinfo
2432
2433# Helper classes for sparse file support
2434class _section:
2435 """Base class for _data and _hole.
2436 """
2437 def __init__(self, offset, size):
2438 self.offset = offset
2439 self.size = size
2440 def __contains__(self, offset):
2441 return self.offset <= offset < self.offset + self.size
2442
2443class _data(_section):
2444 """Represent a data section in a sparse file.
2445 """
2446 def __init__(self, offset, size, realpos):
2447 _section.__init__(self, offset, size)
2448 self.realpos = realpos
2449
2450class _hole(_section):
2451 """Represent a hole section in a sparse file.
2452 """
2453 pass
2454
2455class _ringbuffer(list):
2456 """Ringbuffer class which increases performance
2457 over a regular list.
2458 """
2459 def __init__(self):
2460 self.idx = 0
2461 def find(self, offset):
2462 idx = self.idx
2463 while True:
2464 item = self[idx]
2465 if offset in item:
2466 break
2467 idx += 1
2468 if idx == len(self):
2469 idx = 0
2470 if idx == self.idx:
2471 # End of File
2472 return None
2473 self.idx = idx
2474 return item
2475
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002476#--------------------
2477# exported functions
2478#--------------------
2479def is_tarfile(name):
2480 """Return True if name points to a tar archive that we
2481 are able to handle, else return False.
2482 """
2483 try:
2484 t = open(name)
2485 t.close()
2486 return True
2487 except TarError:
2488 return False
2489
Guido van Rossume7ba4952007-06-06 23:52:48 +00002490bltn_open = open
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002491open = TarFile.open