blob: b0af5b15b3ea9e87d9b6c0b626ef029e0edd350c [file] [log] [blame]
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001#!/usr/bin/env python
2# -*- coding: iso-8859-1 -*-
3#-------------------------------------------------------------------
4# tarfile.py
5#-------------------------------------------------------------------
6# Copyright (C) 2002 Lars Gustäbel <lars@gustaebel.de>
7# All rights reserved.
8#
9# Permission is hereby granted, free of charge, to any person
10# obtaining a copy of this software and associated documentation
11# files (the "Software"), to deal in the Software without
12# restriction, including without limitation the rights to use,
13# copy, modify, merge, publish, distribute, sublicense, and/or sell
14# copies of the Software, and to permit persons to whom the
15# Software is furnished to do so, subject to the following
16# conditions:
17#
18# The above copyright notice and this permission notice shall be
19# included in all copies or substantial portions of the Software.
20#
21# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
22# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
23# OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
24# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
25# HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
26# WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
27# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
28# OTHER DEALINGS IN THE SOFTWARE.
29#
30"""Read from and write to tar format archives.
31"""
32
33__version__ = "$Revision$"
34# $Source$
35
Lars Gustäbelc64e4022007-03-13 10:47:19 +000036version = "0.9.0"
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +000037__author__ = "Lars Gustäbel (lars@gustaebel.de)"
38__date__ = "$Date$"
39__cvsid__ = "$Id$"
40__credits__ = "Gustavo Niemeyer, Niels Gustäbel, Richard Townsend."
41
42#---------
43# Imports
44#---------
45import sys
46import os
47import shutil
48import stat
49import errno
50import time
51import struct
Georg Brandl3354f282006-10-29 09:16:12 +000052import copy
Lars Gustäbelc64e4022007-03-13 10:47:19 +000053import re
Brett Cannon132fc542008-08-04 21:23:07 +000054import operator
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +000055
Jack Jansencfc49022003-03-07 13:37:32 +000056if sys.platform == 'mac':
57 # This module needs work for MacOS9, especially in the area of pathname
58 # handling. In many places it is assumed a simple substitution of / by the
59 # local os.path.sep is good enough to convert pathnames, but this does not
60 # work with the mac rooted:path:name versus :nonrooted:path:name syntax
61 raise ImportError, "tarfile does not work for platform==mac"
62
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +000063try:
64 import grp, pwd
65except ImportError:
66 grp = pwd = None
67
68# from tarfile import *
69__all__ = ["TarFile", "TarInfo", "is_tarfile", "TarError"]
70
71#---------------------------------------------------------
72# tar constants
73#---------------------------------------------------------
Lars Gustäbelc64e4022007-03-13 10:47:19 +000074NUL = "\0" # the null character
75BLOCKSIZE = 512 # length of processing blocks
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +000076RECORDSIZE = BLOCKSIZE * 20 # length of records
Lars Gustäbelc64e4022007-03-13 10:47:19 +000077GNU_MAGIC = "ustar \0" # magic gnu tar string
78POSIX_MAGIC = "ustar\x0000" # magic posix tar string
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +000079
Lars Gustäbelc64e4022007-03-13 10:47:19 +000080LENGTH_NAME = 100 # maximum length of a filename
81LENGTH_LINK = 100 # maximum length of a linkname
82LENGTH_PREFIX = 155 # maximum length of the prefix field
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +000083
Lars Gustäbelc64e4022007-03-13 10:47:19 +000084REGTYPE = "0" # regular file
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +000085AREGTYPE = "\0" # regular file
Lars Gustäbelc64e4022007-03-13 10:47:19 +000086LNKTYPE = "1" # link (inside tarfile)
87SYMTYPE = "2" # symbolic link
88CHRTYPE = "3" # character special device
89BLKTYPE = "4" # block special device
90DIRTYPE = "5" # directory
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +000091FIFOTYPE = "6" # fifo special device
92CONTTYPE = "7" # contiguous file
93
Lars Gustäbelc64e4022007-03-13 10:47:19 +000094GNUTYPE_LONGNAME = "L" # GNU tar longname
95GNUTYPE_LONGLINK = "K" # GNU tar longlink
96GNUTYPE_SPARSE = "S" # GNU tar sparse file
97
98XHDTYPE = "x" # POSIX.1-2001 extended header
99XGLTYPE = "g" # POSIX.1-2001 global header
100SOLARIS_XHDTYPE = "X" # Solaris extended header
101
102USTAR_FORMAT = 0 # POSIX.1-1988 (ustar) format
103GNU_FORMAT = 1 # GNU tar format
104PAX_FORMAT = 2 # POSIX.1-2001 (pax) format
105DEFAULT_FORMAT = GNU_FORMAT
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000106
107#---------------------------------------------------------
108# tarfile constants
109#---------------------------------------------------------
Lars Gustäbelc64e4022007-03-13 10:47:19 +0000110# File types that tarfile supports:
111SUPPORTED_TYPES = (REGTYPE, AREGTYPE, LNKTYPE,
112 SYMTYPE, DIRTYPE, FIFOTYPE,
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000113 CONTTYPE, CHRTYPE, BLKTYPE,
114 GNUTYPE_LONGNAME, GNUTYPE_LONGLINK,
115 GNUTYPE_SPARSE)
116
Lars Gustäbelc64e4022007-03-13 10:47:19 +0000117# File types that will be treated as a regular file.
118REGULAR_TYPES = (REGTYPE, AREGTYPE,
119 CONTTYPE, GNUTYPE_SPARSE)
120
121# File types that are part of the GNU tar format.
122GNU_TYPES = (GNUTYPE_LONGNAME, GNUTYPE_LONGLINK,
123 GNUTYPE_SPARSE)
124
125# Fields from a pax header that override a TarInfo attribute.
126PAX_FIELDS = ("path", "linkpath", "size", "mtime",
127 "uid", "gid", "uname", "gname")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000128
Lars Gustäbela0fcb932007-05-27 19:49:30 +0000129# Fields in a pax header that are numbers, all other fields
130# are treated as strings.
131PAX_NUMBER_FIELDS = {
132 "atime": float,
133 "ctime": float,
134 "mtime": float,
135 "uid": int,
136 "gid": int,
137 "size": int
138}
139
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000140#---------------------------------------------------------
141# Bits used in the mode field, values in octal.
142#---------------------------------------------------------
143S_IFLNK = 0120000 # symbolic link
144S_IFREG = 0100000 # regular file
145S_IFBLK = 0060000 # block device
146S_IFDIR = 0040000 # directory
147S_IFCHR = 0020000 # character device
148S_IFIFO = 0010000 # fifo
149
150TSUID = 04000 # set UID on execution
151TSGID = 02000 # set GID on execution
152TSVTX = 01000 # reserved
153
154TUREAD = 0400 # read by owner
155TUWRITE = 0200 # write by owner
156TUEXEC = 0100 # execute/search by owner
157TGREAD = 0040 # read by group
158TGWRITE = 0020 # write by group
159TGEXEC = 0010 # execute/search by group
160TOREAD = 0004 # read by other
161TOWRITE = 0002 # write by other
162TOEXEC = 0001 # execute/search by other
163
164#---------------------------------------------------------
Lars Gustäbelc64e4022007-03-13 10:47:19 +0000165# initialization
166#---------------------------------------------------------
167ENCODING = sys.getfilesystemencoding()
168if ENCODING is None:
Lars Gustäbela0fcb932007-05-27 19:49:30 +0000169 ENCODING = sys.getdefaultencoding()
Lars Gustäbelc64e4022007-03-13 10:47:19 +0000170
171#---------------------------------------------------------
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000172# Some useful functions
173#---------------------------------------------------------
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000174
Georg Brandl38c6a222006-05-10 16:26:03 +0000175def stn(s, length):
176 """Convert a python string to a null-terminated string buffer.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000177 """
Georg Brandla32e0a02006-10-24 16:54:16 +0000178 return s[:length] + (length - len(s)) * NUL
Georg Brandl38c6a222006-05-10 16:26:03 +0000179
Lars Gustäbelc64e4022007-03-13 10:47:19 +0000180def nts(s):
181 """Convert a null-terminated string field to a python string.
182 """
183 # Use the string up to the first null char.
184 p = s.find("\0")
185 if p == -1:
186 return s
187 return s[:p]
188
Georg Brandl38c6a222006-05-10 16:26:03 +0000189def nti(s):
190 """Convert a number field to a python number.
191 """
192 # There are two possible encodings for a number field, see
193 # itn() below.
194 if s[0] != chr(0200):
Georg Brandlded1c4d2006-12-20 11:55:16 +0000195 try:
Lars Gustäbelc64e4022007-03-13 10:47:19 +0000196 n = int(nts(s) or "0", 8)
Georg Brandlded1c4d2006-12-20 11:55:16 +0000197 except ValueError:
Lars Gustäbeldd866d52009-11-22 18:30:53 +0000198 raise InvalidHeaderError("invalid header")
Georg Brandl38c6a222006-05-10 16:26:03 +0000199 else:
200 n = 0L
201 for i in xrange(len(s) - 1):
202 n <<= 8
203 n += ord(s[i + 1])
204 return n
205
Lars Gustäbelc64e4022007-03-13 10:47:19 +0000206def itn(n, digits=8, format=DEFAULT_FORMAT):
Georg Brandl38c6a222006-05-10 16:26:03 +0000207 """Convert a python number to a number field.
208 """
209 # POSIX 1003.1-1988 requires numbers to be encoded as a string of
210 # octal digits followed by a null-byte, this allows values up to
211 # (8**(digits-1))-1. GNU tar allows storing numbers greater than
212 # that if necessary. A leading 0200 byte indicates this particular
213 # encoding, the following digits-1 bytes are a big-endian
214 # representation. This allows values up to (256**(digits-1))-1.
215 if 0 <= n < 8 ** (digits - 1):
216 s = "%0*o" % (digits - 1, n) + NUL
217 else:
Lars Gustäbelc64e4022007-03-13 10:47:19 +0000218 if format != GNU_FORMAT or n >= 256 ** (digits - 1):
Georg Brandle4751e32006-05-18 06:11:19 +0000219 raise ValueError("overflow in number field")
Georg Brandl38c6a222006-05-10 16:26:03 +0000220
221 if n < 0:
222 # XXX We mimic GNU tar's behaviour with negative numbers,
223 # this could raise OverflowError.
224 n = struct.unpack("L", struct.pack("l", n))[0]
225
226 s = ""
227 for i in xrange(digits - 1):
228 s = chr(n & 0377) + s
229 n >>= 8
230 s = chr(0200) + s
231 return s
232
Lars Gustäbela0fcb932007-05-27 19:49:30 +0000233def uts(s, encoding, errors):
234 """Convert a unicode object to a string.
235 """
236 if errors == "utf-8":
237 # An extra error handler similar to the -o invalid=UTF-8 option
238 # in POSIX.1-2001. Replace untranslatable characters with their
239 # UTF-8 representation.
240 try:
241 return s.encode(encoding, "strict")
242 except UnicodeEncodeError:
243 x = []
244 for c in s:
245 try:
246 x.append(c.encode(encoding, "strict"))
247 except UnicodeEncodeError:
248 x.append(c.encode("utf8"))
249 return "".join(x)
250 else:
251 return s.encode(encoding, errors)
252
Georg Brandl38c6a222006-05-10 16:26:03 +0000253def calc_chksums(buf):
254 """Calculate the checksum for a member's header by summing up all
255 characters except for the chksum field which is treated as if
256 it was filled with spaces. According to the GNU tar sources,
257 some tars (Sun and NeXT) calculate chksum with signed char,
258 which will be different if there are chars in the buffer with
259 the high bit set. So we calculate two checksums, unsigned and
260 signed.
261 """
262 unsigned_chksum = 256 + sum(struct.unpack("148B", buf[:148]) + struct.unpack("356B", buf[156:512]))
263 signed_chksum = 256 + sum(struct.unpack("148b", buf[:148]) + struct.unpack("356b", buf[156:512]))
264 return unsigned_chksum, signed_chksum
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000265
266def copyfileobj(src, dst, length=None):
267 """Copy length bytes from fileobj src to fileobj dst.
268 If length is None, copy the entire content.
269 """
270 if length == 0:
271 return
272 if length is None:
273 shutil.copyfileobj(src, dst)
274 return
275
276 BUFSIZE = 16 * 1024
277 blocks, remainder = divmod(length, BUFSIZE)
278 for b in xrange(blocks):
279 buf = src.read(BUFSIZE)
280 if len(buf) < BUFSIZE:
Georg Brandle4751e32006-05-18 06:11:19 +0000281 raise IOError("end of file reached")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000282 dst.write(buf)
283
284 if remainder != 0:
285 buf = src.read(remainder)
286 if len(buf) < remainder:
Georg Brandle4751e32006-05-18 06:11:19 +0000287 raise IOError("end of file reached")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000288 dst.write(buf)
289 return
290
291filemode_table = (
Andrew M. Kuchling8bc462f2004-10-20 11:48:42 +0000292 ((S_IFLNK, "l"),
293 (S_IFREG, "-"),
294 (S_IFBLK, "b"),
295 (S_IFDIR, "d"),
296 (S_IFCHR, "c"),
297 (S_IFIFO, "p")),
298
299 ((TUREAD, "r"),),
300 ((TUWRITE, "w"),),
301 ((TUEXEC|TSUID, "s"),
302 (TSUID, "S"),
303 (TUEXEC, "x")),
304
305 ((TGREAD, "r"),),
306 ((TGWRITE, "w"),),
307 ((TGEXEC|TSGID, "s"),
308 (TSGID, "S"),
309 (TGEXEC, "x")),
310
311 ((TOREAD, "r"),),
312 ((TOWRITE, "w"),),
313 ((TOEXEC|TSVTX, "t"),
314 (TSVTX, "T"),
315 (TOEXEC, "x"))
316)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000317
318def filemode(mode):
319 """Convert a file's mode to a string of the form
320 -rwxrwxrwx.
321 Used by TarFile.list()
322 """
Andrew M. Kuchling8bc462f2004-10-20 11:48:42 +0000323 perm = []
324 for table in filemode_table:
325 for bit, char in table:
326 if mode & bit == bit:
327 perm.append(char)
328 break
329 else:
330 perm.append("-")
331 return "".join(perm)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000332
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000333class TarError(Exception):
334 """Base exception."""
335 pass
336class ExtractError(TarError):
337 """General exception for extract errors."""
338 pass
339class ReadError(TarError):
340 """Exception for unreadble tar archives."""
341 pass
342class CompressionError(TarError):
343 """Exception for unavailable compression methods."""
344 pass
345class StreamError(TarError):
346 """Exception for unsupported operations on stream-like TarFiles."""
347 pass
Georg Brandlebbeed72006-12-19 22:06:46 +0000348class HeaderError(TarError):
Lars Gustäbeldd866d52009-11-22 18:30:53 +0000349 """Base exception for header errors."""
350 pass
351class EmptyHeaderError(HeaderError):
352 """Exception for empty headers."""
353 pass
354class TruncatedHeaderError(HeaderError):
355 """Exception for truncated headers."""
356 pass
357class EOFHeaderError(HeaderError):
358 """Exception for end of file headers."""
359 pass
360class InvalidHeaderError(HeaderError):
Georg Brandlebbeed72006-12-19 22:06:46 +0000361 """Exception for invalid headers."""
362 pass
Lars Gustäbeldd866d52009-11-22 18:30:53 +0000363class SubsequentHeaderError(HeaderError):
364 """Exception for missing and invalid extended headers."""
365 pass
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000366
367#---------------------------
368# internal stream interface
369#---------------------------
370class _LowLevelFile:
371 """Low-level file object. Supports reading and writing.
372 It is used instead of a regular file object for streaming
373 access.
374 """
375
376 def __init__(self, name, mode):
377 mode = {
378 "r": os.O_RDONLY,
379 "w": os.O_WRONLY | os.O_CREAT | os.O_TRUNC,
380 }[mode]
381 if hasattr(os, "O_BINARY"):
382 mode |= os.O_BINARY
Lars Gustäbel5c4c4612010-04-29 15:23:38 +0000383 self.fd = os.open(name, mode, 0666)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000384
385 def close(self):
386 os.close(self.fd)
387
388 def read(self, size):
389 return os.read(self.fd, size)
390
391 def write(self, s):
392 os.write(self.fd, s)
393
394class _Stream:
395 """Class that serves as an adapter between TarFile and
396 a stream-like object. The stream-like object only
397 needs to have a read() or write() method and is accessed
398 blockwise. Use of gzip or bzip2 compression is possible.
399 A stream-like object could be for example: sys.stdin,
400 sys.stdout, a socket, a tape device etc.
401
402 _Stream is intended to be used only internally.
403 """
404
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000405 def __init__(self, name, mode, comptype, fileobj, bufsize):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000406 """Construct a _Stream object.
407 """
408 self._extfileobj = True
409 if fileobj is None:
410 fileobj = _LowLevelFile(name, mode)
411 self._extfileobj = False
412
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000413 if comptype == '*':
414 # Enable transparent compression detection for the
415 # stream interface
416 fileobj = _StreamProxy(fileobj)
417 comptype = fileobj.getcomptype()
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000418
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000419 self.name = name or ""
420 self.mode = mode
421 self.comptype = comptype
422 self.fileobj = fileobj
423 self.bufsize = bufsize
424 self.buf = ""
425 self.pos = 0L
426 self.closed = False
427
428 if comptype == "gz":
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000429 try:
430 import zlib
431 except ImportError:
Georg Brandle4751e32006-05-18 06:11:19 +0000432 raise CompressionError("zlib module is not available")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000433 self.zlib = zlib
Gregory P. Smith88440962008-03-25 06:12:45 +0000434 self.crc = zlib.crc32("") & 0xffffffffL
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000435 if mode == "r":
436 self._init_read_gz()
437 else:
438 self._init_write_gz()
439
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000440 if comptype == "bz2":
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000441 try:
442 import bz2
443 except ImportError:
Georg Brandle4751e32006-05-18 06:11:19 +0000444 raise CompressionError("bz2 module is not available")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000445 if mode == "r":
446 self.dbuf = ""
447 self.cmp = bz2.BZ2Decompressor()
448 else:
449 self.cmp = bz2.BZ2Compressor()
450
451 def __del__(self):
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000452 if hasattr(self, "closed") and not self.closed:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000453 self.close()
454
455 def _init_write_gz(self):
456 """Initialize for writing with gzip compression.
457 """
458 self.cmp = self.zlib.compressobj(9, self.zlib.DEFLATED,
459 -self.zlib.MAX_WBITS,
460 self.zlib.DEF_MEM_LEVEL,
461 0)
462 timestamp = struct.pack("<L", long(time.time()))
463 self.__write("\037\213\010\010%s\002\377" % timestamp)
464 if self.name.endswith(".gz"):
465 self.name = self.name[:-3]
466 self.__write(self.name + NUL)
467
468 def write(self, s):
469 """Write string s to the stream.
470 """
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000471 if self.comptype == "gz":
Gregory P. Smith88440962008-03-25 06:12:45 +0000472 self.crc = self.zlib.crc32(s, self.crc) & 0xffffffffL
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000473 self.pos += len(s)
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000474 if self.comptype != "tar":
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000475 s = self.cmp.compress(s)
476 self.__write(s)
477
478 def __write(self, s):
479 """Write string s to the stream if a whole new block
480 is ready to be written.
481 """
482 self.buf += s
483 while len(self.buf) > self.bufsize:
484 self.fileobj.write(self.buf[:self.bufsize])
485 self.buf = self.buf[self.bufsize:]
486
487 def close(self):
488 """Close the _Stream object. No operation should be
489 done on it afterwards.
490 """
491 if self.closed:
492 return
493
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000494 if self.mode == "w" and self.comptype != "tar":
Martin v. Löwisc234a522004-08-22 21:28:33 +0000495 self.buf += self.cmp.flush()
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000496
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000497 if self.mode == "w" and self.buf:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000498 self.fileobj.write(self.buf)
499 self.buf = ""
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000500 if self.comptype == "gz":
Tim Petersa05f6e22006-08-02 05:20:08 +0000501 # The native zlib crc is an unsigned 32-bit integer, but
502 # the Python wrapper implicitly casts that to a signed C
503 # long. So, on a 32-bit box self.crc may "look negative",
504 # while the same crc on a 64-bit box may "look positive".
505 # To avoid irksome warnings from the `struct` module, force
506 # it to look positive on all boxes.
507 self.fileobj.write(struct.pack("<L", self.crc & 0xffffffffL))
Andrew M. Kuchling10a44492003-10-24 17:38:34 +0000508 self.fileobj.write(struct.pack("<L", self.pos & 0xffffFFFFL))
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000509
510 if not self._extfileobj:
511 self.fileobj.close()
512
513 self.closed = True
514
515 def _init_read_gz(self):
516 """Initialize for reading a gzip compressed fileobj.
517 """
518 self.cmp = self.zlib.decompressobj(-self.zlib.MAX_WBITS)
519 self.dbuf = ""
520
521 # taken from gzip.GzipFile with some alterations
522 if self.__read(2) != "\037\213":
Georg Brandle4751e32006-05-18 06:11:19 +0000523 raise ReadError("not a gzip file")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000524 if self.__read(1) != "\010":
Georg Brandle4751e32006-05-18 06:11:19 +0000525 raise CompressionError("unsupported compression method")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000526
527 flag = ord(self.__read(1))
528 self.__read(6)
529
530 if flag & 4:
531 xlen = ord(self.__read(1)) + 256 * ord(self.__read(1))
532 self.read(xlen)
533 if flag & 8:
534 while True:
535 s = self.__read(1)
536 if not s or s == NUL:
537 break
538 if flag & 16:
539 while True:
540 s = self.__read(1)
541 if not s or s == NUL:
542 break
543 if flag & 2:
544 self.__read(2)
545
546 def tell(self):
547 """Return the stream's file pointer position.
548 """
549 return self.pos
550
551 def seek(self, pos=0):
552 """Set the stream's file pointer to pos. Negative seeking
553 is forbidden.
554 """
555 if pos - self.pos >= 0:
556 blocks, remainder = divmod(pos - self.pos, self.bufsize)
557 for i in xrange(blocks):
558 self.read(self.bufsize)
559 self.read(remainder)
560 else:
Georg Brandle4751e32006-05-18 06:11:19 +0000561 raise StreamError("seeking backwards is not allowed")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000562 return self.pos
563
564 def read(self, size=None):
565 """Return the next size number of bytes from the stream.
566 If size is not defined, return all bytes of the stream
567 up to EOF.
568 """
569 if size is None:
570 t = []
571 while True:
572 buf = self._read(self.bufsize)
573 if not buf:
574 break
575 t.append(buf)
576 buf = "".join(t)
577 else:
578 buf = self._read(size)
579 self.pos += len(buf)
580 return buf
581
582 def _read(self, size):
583 """Return size bytes from the stream.
584 """
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000585 if self.comptype == "tar":
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000586 return self.__read(size)
587
588 c = len(self.dbuf)
589 t = [self.dbuf]
590 while c < size:
591 buf = self.__read(self.bufsize)
592 if not buf:
593 break
Lars Gustäbelc64e4022007-03-13 10:47:19 +0000594 try:
595 buf = self.cmp.decompress(buf)
596 except IOError:
597 raise ReadError("invalid compressed data")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000598 t.append(buf)
599 c += len(buf)
600 t = "".join(t)
601 self.dbuf = t[size:]
602 return t[:size]
603
604 def __read(self, size):
605 """Return size bytes from stream. If internal buffer is empty,
606 read another block from the stream.
607 """
608 c = len(self.buf)
609 t = [self.buf]
610 while c < size:
611 buf = self.fileobj.read(self.bufsize)
612 if not buf:
613 break
614 t.append(buf)
615 c += len(buf)
616 t = "".join(t)
617 self.buf = t[size:]
618 return t[:size]
619# class _Stream
620
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000621class _StreamProxy(object):
622 """Small proxy class that enables transparent compression
623 detection for the Stream interface (mode 'r|*').
624 """
625
626 def __init__(self, fileobj):
627 self.fileobj = fileobj
628 self.buf = self.fileobj.read(BLOCKSIZE)
629
630 def read(self, size):
631 self.read = self.fileobj.read
632 return self.buf
633
634 def getcomptype(self):
635 if self.buf.startswith("\037\213\010"):
636 return "gz"
637 if self.buf.startswith("BZh91"):
638 return "bz2"
639 return "tar"
640
641 def close(self):
642 self.fileobj.close()
643# class StreamProxy
644
Georg Brandl49c8f4c2006-05-15 19:30:35 +0000645class _BZ2Proxy(object):
646 """Small proxy class that enables external file object
647 support for "r:bz2" and "w:bz2" modes. This is actually
648 a workaround for a limitation in bz2 module's BZ2File
649 class which (unlike gzip.GzipFile) has no support for
650 a file object argument.
651 """
652
653 blocksize = 16 * 1024
654
655 def __init__(self, fileobj, mode):
656 self.fileobj = fileobj
657 self.mode = mode
Lars Gustäbelc64e4022007-03-13 10:47:19 +0000658 self.name = getattr(self.fileobj, "name", None)
Georg Brandl49c8f4c2006-05-15 19:30:35 +0000659 self.init()
660
661 def init(self):
662 import bz2
663 self.pos = 0
664 if self.mode == "r":
665 self.bz2obj = bz2.BZ2Decompressor()
666 self.fileobj.seek(0)
667 self.buf = ""
668 else:
669 self.bz2obj = bz2.BZ2Compressor()
670
671 def read(self, size):
672 b = [self.buf]
673 x = len(self.buf)
674 while x < size:
Lars Gustäbel2020a592009-03-22 20:09:33 +0000675 raw = self.fileobj.read(self.blocksize)
676 if not raw:
Georg Brandl49c8f4c2006-05-15 19:30:35 +0000677 break
Lars Gustäbel2020a592009-03-22 20:09:33 +0000678 data = self.bz2obj.decompress(raw)
679 b.append(data)
Georg Brandl49c8f4c2006-05-15 19:30:35 +0000680 x += len(data)
681 self.buf = "".join(b)
682
683 buf = self.buf[:size]
684 self.buf = self.buf[size:]
685 self.pos += len(buf)
686 return buf
687
688 def seek(self, pos):
689 if pos < self.pos:
690 self.init()
691 self.read(pos - self.pos)
692
693 def tell(self):
694 return self.pos
695
696 def write(self, data):
697 self.pos += len(data)
698 raw = self.bz2obj.compress(data)
699 self.fileobj.write(raw)
700
701 def close(self):
702 if self.mode == "w":
703 raw = self.bz2obj.flush()
704 self.fileobj.write(raw)
Georg Brandl49c8f4c2006-05-15 19:30:35 +0000705# class _BZ2Proxy
706
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000707#------------------------
708# Extraction file object
709#------------------------
Lars Gustäbel6baa5022006-12-23 16:40:13 +0000710class _FileInFile(object):
711 """A thin wrapper around an existing file object that
712 provides a part of its data as an individual file
713 object.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000714 """
715
Lars Gustäbel6baa5022006-12-23 16:40:13 +0000716 def __init__(self, fileobj, offset, size, sparse=None):
717 self.fileobj = fileobj
718 self.offset = offset
719 self.size = size
720 self.sparse = sparse
721 self.position = 0
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000722
Lars Gustäbel6baa5022006-12-23 16:40:13 +0000723 def tell(self):
724 """Return the current file position.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000725 """
Lars Gustäbel6baa5022006-12-23 16:40:13 +0000726 return self.position
727
728 def seek(self, position):
729 """Seek to a position in the file.
730 """
731 self.position = position
732
733 def read(self, size=None):
734 """Read data from the file.
735 """
736 if size is None:
737 size = self.size - self.position
738 else:
739 size = min(size, self.size - self.position)
740
741 if self.sparse is None:
742 return self.readnormal(size)
743 else:
744 return self.readsparse(size)
745
746 def readnormal(self, size):
747 """Read operation for regular files.
748 """
749 self.fileobj.seek(self.offset + self.position)
750 self.position += size
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000751 return self.fileobj.read(size)
752
Lars Gustäbel6baa5022006-12-23 16:40:13 +0000753 def readsparse(self, size):
754 """Read operation for sparse files.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000755 """
Lars Gustäbel6baa5022006-12-23 16:40:13 +0000756 data = []
757 while size > 0:
758 buf = self.readsparsesection(size)
759 if not buf:
760 break
761 size -= len(buf)
762 data.append(buf)
763 return "".join(data)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000764
Lars Gustäbel6baa5022006-12-23 16:40:13 +0000765 def readsparsesection(self, size):
766 """Read a single section of a sparse file.
767 """
768 section = self.sparse.find(self.position)
769
770 if section is None:
771 return ""
772
773 size = min(size, section.offset + section.size - self.position)
774
775 if isinstance(section, _data):
776 realpos = section.realpos + self.position - section.offset
777 self.fileobj.seek(self.offset + realpos)
778 self.position += size
779 return self.fileobj.read(size)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000780 else:
Lars Gustäbel6baa5022006-12-23 16:40:13 +0000781 self.position += size
782 return NUL * size
783#class _FileInFile
784
785
786class ExFileObject(object):
787 """File-like object for reading an archive member.
788 Is returned by TarFile.extractfile().
789 """
790 blocksize = 1024
791
792 def __init__(self, tarfile, tarinfo):
793 self.fileobj = _FileInFile(tarfile.fileobj,
794 tarinfo.offset_data,
795 tarinfo.size,
796 getattr(tarinfo, "sparse", None))
797 self.name = tarinfo.name
798 self.mode = "r"
799 self.closed = False
800 self.size = tarinfo.size
801
802 self.position = 0
803 self.buffer = ""
804
805 def read(self, size=None):
806 """Read at most size bytes from the file. If size is not
807 present or None, read all data until EOF is reached.
808 """
809 if self.closed:
810 raise ValueError("I/O operation on closed file")
811
812 buf = ""
813 if self.buffer:
814 if size is None:
815 buf = self.buffer
816 self.buffer = ""
817 else:
818 buf = self.buffer[:size]
819 self.buffer = self.buffer[size:]
820
821 if size is None:
822 buf += self.fileobj.read()
823 else:
824 buf += self.fileobj.read(size - len(buf))
825
826 self.position += len(buf)
827 return buf
828
829 def readline(self, size=-1):
830 """Read one entire line from the file. If size is present
831 and non-negative, return a string with at most that
832 size, which may be an incomplete line.
833 """
834 if self.closed:
835 raise ValueError("I/O operation on closed file")
836
837 if "\n" in self.buffer:
838 pos = self.buffer.find("\n") + 1
839 else:
840 buffers = [self.buffer]
841 while True:
842 buf = self.fileobj.read(self.blocksize)
843 buffers.append(buf)
844 if not buf or "\n" in buf:
845 self.buffer = "".join(buffers)
846 pos = self.buffer.find("\n") + 1
847 if pos == 0:
848 # no newline found.
849 pos = len(self.buffer)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000850 break
Lars Gustäbel6baa5022006-12-23 16:40:13 +0000851
852 if size != -1:
853 pos = min(size, pos)
854
855 buf = self.buffer[:pos]
856 self.buffer = self.buffer[pos:]
857 self.position += len(buf)
858 return buf
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000859
860 def readlines(self):
Lars Gustäbel6baa5022006-12-23 16:40:13 +0000861 """Return a list with all remaining lines.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000862 """
863 result = []
864 while True:
865 line = self.readline()
866 if not line: break
867 result.append(line)
868 return result
869
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000870 def tell(self):
871 """Return the current file position.
872 """
Lars Gustäbel6baa5022006-12-23 16:40:13 +0000873 if self.closed:
874 raise ValueError("I/O operation on closed file")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000875
Lars Gustäbel6baa5022006-12-23 16:40:13 +0000876 return self.position
877
878 def seek(self, pos, whence=os.SEEK_SET):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000879 """Seek to a position in the file.
880 """
Lars Gustäbel6baa5022006-12-23 16:40:13 +0000881 if self.closed:
882 raise ValueError("I/O operation on closed file")
883
884 if whence == os.SEEK_SET:
885 self.position = min(max(pos, 0), self.size)
886 elif whence == os.SEEK_CUR:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000887 if pos < 0:
Lars Gustäbel6baa5022006-12-23 16:40:13 +0000888 self.position = max(self.position + pos, 0)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000889 else:
Lars Gustäbel6baa5022006-12-23 16:40:13 +0000890 self.position = min(self.position + pos, self.size)
891 elif whence == os.SEEK_END:
892 self.position = max(min(self.size + pos, self.size), 0)
893 else:
894 raise ValueError("Invalid argument")
895
896 self.buffer = ""
897 self.fileobj.seek(self.position)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000898
899 def close(self):
900 """Close the file object.
901 """
902 self.closed = True
Martin v. Löwisdf241532005-03-03 08:17:42 +0000903
904 def __iter__(self):
Lars Gustäbel6baa5022006-12-23 16:40:13 +0000905 """Get an iterator over the file's lines.
Martin v. Löwisdf241532005-03-03 08:17:42 +0000906 """
Lars Gustäbel6baa5022006-12-23 16:40:13 +0000907 while True:
908 line = self.readline()
909 if not line:
910 break
911 yield line
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000912#class ExFileObject
913
914#------------------
915# Exported Classes
916#------------------
917class TarInfo(object):
918 """Informational class which holds the details about an
919 archive member given by a tar header block.
920 TarInfo objects are returned by TarFile.getmember(),
921 TarFile.getmembers() and TarFile.gettarinfo() and are
922 usually created internally.
923 """
924
925 def __init__(self, name=""):
926 """Construct a TarInfo object. name is the optional name
927 of the member.
928 """
Lars Gustäbelc64e4022007-03-13 10:47:19 +0000929 self.name = name # member name
930 self.mode = 0644 # file permissions
Georg Brandl38c6a222006-05-10 16:26:03 +0000931 self.uid = 0 # user id
932 self.gid = 0 # group id
933 self.size = 0 # file size
934 self.mtime = 0 # modification time
935 self.chksum = 0 # header checksum
936 self.type = REGTYPE # member type
937 self.linkname = "" # link name
Lars Gustäbelc64e4022007-03-13 10:47:19 +0000938 self.uname = "root" # user name
939 self.gname = "root" # group name
Georg Brandl38c6a222006-05-10 16:26:03 +0000940 self.devmajor = 0 # device major number
941 self.devminor = 0 # device minor number
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000942
Georg Brandl38c6a222006-05-10 16:26:03 +0000943 self.offset = 0 # the tar header starts here
944 self.offset_data = 0 # the file's data starts here
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000945
Lars Gustäbelc64e4022007-03-13 10:47:19 +0000946 self.pax_headers = {} # pax header information
947
948 # In pax headers the "name" and "linkname" field are called
949 # "path" and "linkpath".
950 def _getpath(self):
951 return self.name
952 def _setpath(self, name):
953 self.name = name
954 path = property(_getpath, _setpath)
955
956 def _getlinkpath(self):
957 return self.linkname
958 def _setlinkpath(self, linkname):
959 self.linkname = linkname
960 linkpath = property(_getlinkpath, _setlinkpath)
961
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000962 def __repr__(self):
963 return "<%s %r at %#x>" % (self.__class__.__name__,self.name,id(self))
964
Lars Gustäbela0fcb932007-05-27 19:49:30 +0000965 def get_info(self, encoding, errors):
Lars Gustäbelc64e4022007-03-13 10:47:19 +0000966 """Return the TarInfo's attributes as a dictionary.
967 """
968 info = {
Lars Gustäbelf7cda522009-08-28 19:23:44 +0000969 "name": self.name,
Lars Gustäbelc64e4022007-03-13 10:47:19 +0000970 "mode": self.mode & 07777,
971 "uid": self.uid,
972 "gid": self.gid,
973 "size": self.size,
974 "mtime": self.mtime,
975 "chksum": self.chksum,
976 "type": self.type,
Lars Gustäbelf7cda522009-08-28 19:23:44 +0000977 "linkname": self.linkname,
Lars Gustäbelc64e4022007-03-13 10:47:19 +0000978 "uname": self.uname,
979 "gname": self.gname,
980 "devmajor": self.devmajor,
981 "devminor": self.devminor
982 }
983
984 if info["type"] == DIRTYPE and not info["name"].endswith("/"):
985 info["name"] += "/"
986
Lars Gustäbela0fcb932007-05-27 19:49:30 +0000987 for key in ("name", "linkname", "uname", "gname"):
988 if type(info[key]) is unicode:
989 info[key] = info[key].encode(encoding, errors)
990
Lars Gustäbelc64e4022007-03-13 10:47:19 +0000991 return info
992
Lars Gustäbela0fcb932007-05-27 19:49:30 +0000993 def tobuf(self, format=DEFAULT_FORMAT, encoding=ENCODING, errors="strict"):
Lars Gustäbelc64e4022007-03-13 10:47:19 +0000994 """Return a tar header as a string of 512 byte blocks.
995 """
Lars Gustäbela0fcb932007-05-27 19:49:30 +0000996 info = self.get_info(encoding, errors)
997
Lars Gustäbelc64e4022007-03-13 10:47:19 +0000998 if format == USTAR_FORMAT:
Lars Gustäbela0fcb932007-05-27 19:49:30 +0000999 return self.create_ustar_header(info)
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001000 elif format == GNU_FORMAT:
Lars Gustäbela0fcb932007-05-27 19:49:30 +00001001 return self.create_gnu_header(info)
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001002 elif format == PAX_FORMAT:
Lars Gustäbela0fcb932007-05-27 19:49:30 +00001003 return self.create_pax_header(info, encoding, errors)
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001004 else:
1005 raise ValueError("invalid format")
1006
Lars Gustäbela0fcb932007-05-27 19:49:30 +00001007 def create_ustar_header(self, info):
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001008 """Return the object as a ustar header block.
1009 """
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001010 info["magic"] = POSIX_MAGIC
1011
1012 if len(info["linkname"]) > LENGTH_LINK:
1013 raise ValueError("linkname is too long")
1014
1015 if len(info["name"]) > LENGTH_NAME:
1016 info["prefix"], info["name"] = self._posix_split_name(info["name"])
1017
1018 return self._create_header(info, USTAR_FORMAT)
1019
Lars Gustäbela0fcb932007-05-27 19:49:30 +00001020 def create_gnu_header(self, info):
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001021 """Return the object as a GNU header block sequence.
1022 """
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001023 info["magic"] = GNU_MAGIC
1024
1025 buf = ""
1026 if len(info["linkname"]) > LENGTH_LINK:
1027 buf += self._create_gnu_long_header(info["linkname"], GNUTYPE_LONGLINK)
1028
1029 if len(info["name"]) > LENGTH_NAME:
1030 buf += self._create_gnu_long_header(info["name"], GNUTYPE_LONGNAME)
1031
1032 return buf + self._create_header(info, GNU_FORMAT)
1033
Lars Gustäbela0fcb932007-05-27 19:49:30 +00001034 def create_pax_header(self, info, encoding, errors):
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001035 """Return the object as a ustar header block. If it cannot be
1036 represented this way, prepend a pax extended header sequence
1037 with supplement information.
1038 """
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001039 info["magic"] = POSIX_MAGIC
1040 pax_headers = self.pax_headers.copy()
1041
1042 # Test string fields for values that exceed the field length or cannot
1043 # be represented in ASCII encoding.
1044 for name, hname, length in (
1045 ("name", "path", LENGTH_NAME), ("linkname", "linkpath", LENGTH_LINK),
1046 ("uname", "uname", 32), ("gname", "gname", 32)):
1047
Lars Gustäbela0fcb932007-05-27 19:49:30 +00001048 if hname in pax_headers:
1049 # The pax header has priority.
1050 continue
1051
1052 val = info[name].decode(encoding, errors)
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001053
1054 # Try to encode the string as ASCII.
1055 try:
1056 val.encode("ascii")
1057 except UnicodeEncodeError:
1058 pax_headers[hname] = val
1059 continue
1060
Lars Gustäbela0fcb932007-05-27 19:49:30 +00001061 if len(info[name]) > length:
1062 pax_headers[hname] = val
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001063
1064 # Test number fields for values that exceed the field limit or values
1065 # that like to be stored as float.
1066 for name, digits in (("uid", 8), ("gid", 8), ("size", 12), ("mtime", 12)):
Lars Gustäbela0fcb932007-05-27 19:49:30 +00001067 if name in pax_headers:
1068 # The pax header has priority. Avoid overflow.
1069 info[name] = 0
1070 continue
1071
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001072 val = info[name]
1073 if not 0 <= val < 8 ** (digits - 1) or isinstance(val, float):
1074 pax_headers[name] = unicode(val)
1075 info[name] = 0
1076
Lars Gustäbela0fcb932007-05-27 19:49:30 +00001077 # Create a pax extended header if necessary.
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001078 if pax_headers:
1079 buf = self._create_pax_generic_header(pax_headers)
1080 else:
1081 buf = ""
1082
1083 return buf + self._create_header(info, USTAR_FORMAT)
1084
1085 @classmethod
Lars Gustäbela0fcb932007-05-27 19:49:30 +00001086 def create_pax_global_header(cls, pax_headers):
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001087 """Return the object as a pax global header block sequence.
1088 """
Lars Gustäbela0fcb932007-05-27 19:49:30 +00001089 return cls._create_pax_generic_header(pax_headers, type=XGLTYPE)
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001090
1091 def _posix_split_name(self, name):
1092 """Split a name longer than 100 chars into a prefix
1093 and a name part.
1094 """
1095 prefix = name[:LENGTH_PREFIX + 1]
1096 while prefix and prefix[-1] != "/":
1097 prefix = prefix[:-1]
1098
1099 name = name[len(prefix):]
1100 prefix = prefix[:-1]
1101
1102 if not prefix or len(name) > LENGTH_NAME:
1103 raise ValueError("name is too long")
1104 return prefix, name
1105
1106 @staticmethod
1107 def _create_header(info, format):
1108 """Return a header block. info is a dictionary with file
1109 information, format must be one of the *_FORMAT constants.
1110 """
1111 parts = [
1112 stn(info.get("name", ""), 100),
1113 itn(info.get("mode", 0) & 07777, 8, format),
1114 itn(info.get("uid", 0), 8, format),
1115 itn(info.get("gid", 0), 8, format),
1116 itn(info.get("size", 0), 12, format),
1117 itn(info.get("mtime", 0), 12, format),
1118 " ", # checksum field
1119 info.get("type", REGTYPE),
1120 stn(info.get("linkname", ""), 100),
Lars Gustäbela0fcb932007-05-27 19:49:30 +00001121 stn(info.get("magic", POSIX_MAGIC), 8),
1122 stn(info.get("uname", "root"), 32),
1123 stn(info.get("gname", "root"), 32),
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001124 itn(info.get("devmajor", 0), 8, format),
1125 itn(info.get("devminor", 0), 8, format),
1126 stn(info.get("prefix", ""), 155)
1127 ]
1128
1129 buf = struct.pack("%ds" % BLOCKSIZE, "".join(parts))
1130 chksum = calc_chksums(buf[-BLOCKSIZE:])[0]
1131 buf = buf[:-364] + "%06o\0" % chksum + buf[-357:]
1132 return buf
1133
1134 @staticmethod
1135 def _create_payload(payload):
1136 """Return the string payload filled with zero bytes
1137 up to the next 512 byte border.
1138 """
1139 blocks, remainder = divmod(len(payload), BLOCKSIZE)
1140 if remainder > 0:
1141 payload += (BLOCKSIZE - remainder) * NUL
1142 return payload
1143
1144 @classmethod
1145 def _create_gnu_long_header(cls, name, type):
1146 """Return a GNUTYPE_LONGNAME or GNUTYPE_LONGLINK sequence
1147 for name.
1148 """
1149 name += NUL
1150
1151 info = {}
1152 info["name"] = "././@LongLink"
1153 info["type"] = type
1154 info["size"] = len(name)
1155 info["magic"] = GNU_MAGIC
1156
1157 # create extended header + name blocks.
1158 return cls._create_header(info, USTAR_FORMAT) + \
1159 cls._create_payload(name)
1160
1161 @classmethod
1162 def _create_pax_generic_header(cls, pax_headers, type=XHDTYPE):
1163 """Return a POSIX.1-2001 extended or global header sequence
1164 that contains a list of keyword, value pairs. The values
1165 must be unicode objects.
1166 """
1167 records = []
1168 for keyword, value in pax_headers.iteritems():
1169 keyword = keyword.encode("utf8")
1170 value = value.encode("utf8")
1171 l = len(keyword) + len(value) + 3 # ' ' + '=' + '\n'
1172 n = p = 0
1173 while True:
1174 n = l + len(str(p))
1175 if n == p:
1176 break
1177 p = n
1178 records.append("%d %s=%s\n" % (p, keyword, value))
1179 records = "".join(records)
1180
1181 # We use a hardcoded "././@PaxHeader" name like star does
1182 # instead of the one that POSIX recommends.
1183 info = {}
1184 info["name"] = "././@PaxHeader"
1185 info["type"] = type
1186 info["size"] = len(records)
1187 info["magic"] = POSIX_MAGIC
1188
1189 # Create pax header + record blocks.
1190 return cls._create_header(info, USTAR_FORMAT) + \
1191 cls._create_payload(records)
1192
Guido van Rossum75b64e62005-01-16 00:16:11 +00001193 @classmethod
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001194 def frombuf(cls, buf):
1195 """Construct a TarInfo object from a 512 byte string buffer.
1196 """
Lars Gustäbeldd866d52009-11-22 18:30:53 +00001197 if len(buf) == 0:
1198 raise EmptyHeaderError("empty header")
Georg Brandl38c6a222006-05-10 16:26:03 +00001199 if len(buf) != BLOCKSIZE:
Lars Gustäbeldd866d52009-11-22 18:30:53 +00001200 raise TruncatedHeaderError("truncated header")
Georg Brandl38c6a222006-05-10 16:26:03 +00001201 if buf.count(NUL) == BLOCKSIZE:
Lars Gustäbeldd866d52009-11-22 18:30:53 +00001202 raise EOFHeaderError("end of file header")
Georg Brandlebbeed72006-12-19 22:06:46 +00001203
Georg Brandlded1c4d2006-12-20 11:55:16 +00001204 chksum = nti(buf[148:156])
Georg Brandlebbeed72006-12-19 22:06:46 +00001205 if chksum not in calc_chksums(buf):
Lars Gustäbeldd866d52009-11-22 18:30:53 +00001206 raise InvalidHeaderError("bad checksum")
Georg Brandl38c6a222006-05-10 16:26:03 +00001207
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001208 obj = cls()
1209 obj.buf = buf
1210 obj.name = nts(buf[0:100])
1211 obj.mode = nti(buf[100:108])
1212 obj.uid = nti(buf[108:116])
1213 obj.gid = nti(buf[116:124])
1214 obj.size = nti(buf[124:136])
1215 obj.mtime = nti(buf[136:148])
1216 obj.chksum = chksum
1217 obj.type = buf[156:157]
1218 obj.linkname = nts(buf[157:257])
1219 obj.uname = nts(buf[265:297])
1220 obj.gname = nts(buf[297:329])
1221 obj.devmajor = nti(buf[329:337])
1222 obj.devminor = nti(buf[337:345])
1223 prefix = nts(buf[345:500])
Georg Brandl3354f282006-10-29 09:16:12 +00001224
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001225 # Old V7 tar format represents a directory as a regular
1226 # file with a trailing slash.
1227 if obj.type == AREGTYPE and obj.name.endswith("/"):
1228 obj.type = DIRTYPE
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001229
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001230 # Remove redundant slashes from directories.
1231 if obj.isdir():
1232 obj.name = obj.name.rstrip("/")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001233
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001234 # Reconstruct a ustar longname.
1235 if prefix and obj.type not in GNU_TYPES:
1236 obj.name = prefix + "/" + obj.name
1237 return obj
1238
1239 @classmethod
1240 def fromtarfile(cls, tarfile):
1241 """Return the next TarInfo object from TarFile object
1242 tarfile.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001243 """
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001244 buf = tarfile.fileobj.read(BLOCKSIZE)
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001245 obj = cls.frombuf(buf)
1246 obj.offset = tarfile.fileobj.tell() - BLOCKSIZE
1247 return obj._proc_member(tarfile)
Georg Brandl3354f282006-10-29 09:16:12 +00001248
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001249 #--------------------------------------------------------------------------
1250 # The following are methods that are called depending on the type of a
1251 # member. The entry point is _proc_member() which can be overridden in a
1252 # subclass to add custom _proc_*() methods. A _proc_*() method MUST
1253 # implement the following
1254 # operations:
1255 # 1. Set self.offset_data to the position where the data blocks begin,
1256 # if there is data that follows.
1257 # 2. Set tarfile.offset to the position where the next member's header will
1258 # begin.
1259 # 3. Return self or another valid TarInfo object.
1260 def _proc_member(self, tarfile):
1261 """Choose the right processing method depending on
1262 the type and call it.
Georg Brandl3354f282006-10-29 09:16:12 +00001263 """
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001264 if self.type in (GNUTYPE_LONGNAME, GNUTYPE_LONGLINK):
1265 return self._proc_gnulong(tarfile)
1266 elif self.type == GNUTYPE_SPARSE:
1267 return self._proc_sparse(tarfile)
1268 elif self.type in (XHDTYPE, XGLTYPE, SOLARIS_XHDTYPE):
1269 return self._proc_pax(tarfile)
1270 else:
1271 return self._proc_builtin(tarfile)
Georg Brandl3354f282006-10-29 09:16:12 +00001272
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001273 def _proc_builtin(self, tarfile):
1274 """Process a builtin type or an unknown type which
1275 will be treated as a regular file.
1276 """
1277 self.offset_data = tarfile.fileobj.tell()
1278 offset = self.offset_data
1279 if self.isreg() or self.type not in SUPPORTED_TYPES:
1280 # Skip the following data blocks.
1281 offset += self._block(self.size)
1282 tarfile.offset = offset
Georg Brandl3354f282006-10-29 09:16:12 +00001283
Lars Gustäbela0fcb932007-05-27 19:49:30 +00001284 # Patch the TarInfo object with saved global
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001285 # header information.
Lars Gustäbela0fcb932007-05-27 19:49:30 +00001286 self._apply_pax_info(tarfile.pax_headers, tarfile.encoding, tarfile.errors)
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001287
1288 return self
1289
1290 def _proc_gnulong(self, tarfile):
1291 """Process the blocks that hold a GNU longname
1292 or longlink member.
1293 """
1294 buf = tarfile.fileobj.read(self._block(self.size))
1295
1296 # Fetch the next header and process it.
Lars Gustäbeldd866d52009-11-22 18:30:53 +00001297 try:
1298 next = self.fromtarfile(tarfile)
1299 except HeaderError:
1300 raise SubsequentHeaderError("missing or bad subsequent header")
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001301
1302 # Patch the TarInfo object from the next header with
1303 # the longname information.
1304 next.offset = self.offset
1305 if self.type == GNUTYPE_LONGNAME:
Lars Gustäbela0fcb932007-05-27 19:49:30 +00001306 next.name = nts(buf)
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001307 elif self.type == GNUTYPE_LONGLINK:
Lars Gustäbela0fcb932007-05-27 19:49:30 +00001308 next.linkname = nts(buf)
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001309
1310 return next
1311
1312 def _proc_sparse(self, tarfile):
1313 """Process a GNU sparse header plus extra headers.
1314 """
1315 buf = self.buf
1316 sp = _ringbuffer()
1317 pos = 386
1318 lastpos = 0L
1319 realpos = 0L
1320 # There are 4 possible sparse structs in the
1321 # first header.
1322 for i in xrange(4):
1323 try:
1324 offset = nti(buf[pos:pos + 12])
1325 numbytes = nti(buf[pos + 12:pos + 24])
1326 except ValueError:
1327 break
1328 if offset > lastpos:
1329 sp.append(_hole(lastpos, offset - lastpos))
1330 sp.append(_data(offset, numbytes, realpos))
1331 realpos += numbytes
1332 lastpos = offset + numbytes
1333 pos += 24
1334
1335 isextended = ord(buf[482])
1336 origsize = nti(buf[483:495])
1337
1338 # If the isextended flag is given,
1339 # there are extra headers to process.
1340 while isextended == 1:
1341 buf = tarfile.fileobj.read(BLOCKSIZE)
1342 pos = 0
1343 for i in xrange(21):
1344 try:
1345 offset = nti(buf[pos:pos + 12])
1346 numbytes = nti(buf[pos + 12:pos + 24])
1347 except ValueError:
1348 break
1349 if offset > lastpos:
1350 sp.append(_hole(lastpos, offset - lastpos))
1351 sp.append(_data(offset, numbytes, realpos))
1352 realpos += numbytes
1353 lastpos = offset + numbytes
1354 pos += 24
1355 isextended = ord(buf[504])
1356
1357 if lastpos < origsize:
1358 sp.append(_hole(lastpos, origsize - lastpos))
1359
1360 self.sparse = sp
1361
1362 self.offset_data = tarfile.fileobj.tell()
1363 tarfile.offset = self.offset_data + self._block(self.size)
1364 self.size = origsize
1365
1366 return self
1367
1368 def _proc_pax(self, tarfile):
1369 """Process an extended or global header as described in
1370 POSIX.1-2001.
1371 """
1372 # Read the header information.
1373 buf = tarfile.fileobj.read(self._block(self.size))
1374
1375 # A pax header stores supplemental information for either
1376 # the following file (extended) or all following files
1377 # (global).
1378 if self.type == XGLTYPE:
1379 pax_headers = tarfile.pax_headers
1380 else:
1381 pax_headers = tarfile.pax_headers.copy()
1382
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001383 # Parse pax header information. A record looks like that:
1384 # "%d %s=%s\n" % (length, keyword, value). length is the size
1385 # of the complete record including the length field itself and
Lars Gustäbela0fcb932007-05-27 19:49:30 +00001386 # the newline. keyword and value are both UTF-8 encoded strings.
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001387 regex = re.compile(r"(\d+) ([^=]+)=", re.U)
1388 pos = 0
1389 while True:
1390 match = regex.match(buf, pos)
1391 if not match:
1392 break
1393
1394 length, keyword = match.groups()
1395 length = int(length)
1396 value = buf[match.end(2) + 1:match.start(1) + length - 1]
1397
1398 keyword = keyword.decode("utf8")
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001399 value = value.decode("utf8")
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001400
1401 pax_headers[keyword] = value
1402 pos += length
1403
Lars Gustäbela0fcb932007-05-27 19:49:30 +00001404 # Fetch the next header.
Lars Gustäbeldd866d52009-11-22 18:30:53 +00001405 try:
1406 next = self.fromtarfile(tarfile)
1407 except HeaderError:
1408 raise SubsequentHeaderError("missing or bad subsequent header")
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001409
Lars Gustäbela0fcb932007-05-27 19:49:30 +00001410 if self.type in (XHDTYPE, SOLARIS_XHDTYPE):
Lars Gustäbela0fcb932007-05-27 19:49:30 +00001411 # Patch the TarInfo object with the extended header info.
1412 next._apply_pax_info(pax_headers, tarfile.encoding, tarfile.errors)
1413 next.offset = self.offset
1414
Brett Cannon132fc542008-08-04 21:23:07 +00001415 if "size" in pax_headers:
Lars Gustäbela0fcb932007-05-27 19:49:30 +00001416 # If the extended header replaces the size field,
1417 # we need to recalculate the offset where the next
1418 # header starts.
1419 offset = next.offset_data
1420 if next.isreg() or next.type not in SUPPORTED_TYPES:
1421 offset += next._block(next.size)
1422 tarfile.offset = offset
1423
1424 return next
1425
1426 def _apply_pax_info(self, pax_headers, encoding, errors):
1427 """Replace fields with supplemental information from a previous
1428 pax extended or global header.
1429 """
1430 for keyword, value in pax_headers.iteritems():
1431 if keyword not in PAX_FIELDS:
1432 continue
1433
1434 if keyword == "path":
1435 value = value.rstrip("/")
1436
1437 if keyword in PAX_NUMBER_FIELDS:
1438 try:
1439 value = PAX_NUMBER_FIELDS[keyword](value)
1440 except ValueError:
1441 value = 0
1442 else:
1443 value = uts(value, encoding, errors)
1444
1445 setattr(self, keyword, value)
1446
1447 self.pax_headers = pax_headers.copy()
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001448
1449 def _block(self, count):
1450 """Round up a byte count by BLOCKSIZE and return it,
1451 e.g. _block(834) => 1024.
1452 """
1453 blocks, remainder = divmod(count, BLOCKSIZE)
1454 if remainder:
1455 blocks += 1
1456 return blocks * BLOCKSIZE
Georg Brandl3354f282006-10-29 09:16:12 +00001457
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001458 def isreg(self):
1459 return self.type in REGULAR_TYPES
1460 def isfile(self):
1461 return self.isreg()
1462 def isdir(self):
1463 return self.type == DIRTYPE
1464 def issym(self):
1465 return self.type == SYMTYPE
1466 def islnk(self):
1467 return self.type == LNKTYPE
1468 def ischr(self):
1469 return self.type == CHRTYPE
1470 def isblk(self):
1471 return self.type == BLKTYPE
1472 def isfifo(self):
1473 return self.type == FIFOTYPE
1474 def issparse(self):
1475 return self.type == GNUTYPE_SPARSE
1476 def isdev(self):
1477 return self.type in (CHRTYPE, BLKTYPE, FIFOTYPE)
1478# class TarInfo
1479
1480class TarFile(object):
1481 """The TarFile Class provides an interface to tar archives.
1482 """
1483
1484 debug = 0 # May be set from 0 (no msgs) to 3 (all msgs)
1485
1486 dereference = False # If true, add content of linked file to the
1487 # tar file, else the link.
1488
1489 ignore_zeros = False # If true, skips empty or invalid blocks and
1490 # continues processing.
1491
Lars Gustäbel92ca7562009-12-13 11:32:27 +00001492 errorlevel = 1 # If 0, fatal errors only appear in debug
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001493 # messages (if debug >= 0). If > 0, errors
1494 # are passed to the caller as exceptions.
1495
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001496 format = DEFAULT_FORMAT # The format to use when creating an archive.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001497
Lars Gustäbela0fcb932007-05-27 19:49:30 +00001498 encoding = ENCODING # Encoding for 8-bit character strings.
1499
1500 errors = None # Error handler for unicode conversion.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001501
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001502 tarinfo = TarInfo # The default TarInfo class to use.
1503
1504 fileobject = ExFileObject # The default ExFileObject class to use.
1505
1506 def __init__(self, name=None, mode="r", fileobj=None, format=None,
1507 tarinfo=None, dereference=None, ignore_zeros=None, encoding=None,
Lars Gustäbela0fcb932007-05-27 19:49:30 +00001508 errors=None, pax_headers=None, debug=None, errorlevel=None):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001509 """Open an (uncompressed) tar archive `name'. `mode' is either 'r' to
1510 read from an existing archive, 'a' to append data to an existing
1511 file or 'w' to create a new file overwriting an existing one. `mode'
1512 defaults to 'r'.
1513 If `fileobj' is given, it is used for reading or writing data. If it
1514 can be determined, `mode' is overridden by `fileobj's mode.
1515 `fileobj' is not closed, when TarFile is closed.
1516 """
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001517 if len(mode) > 1 or mode not in "raw":
Georg Brandle4751e32006-05-18 06:11:19 +00001518 raise ValueError("mode must be 'r', 'a' or 'w'")
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001519 self.mode = mode
1520 self._mode = {"r": "rb", "a": "r+b", "w": "wb"}[mode]
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001521
1522 if not fileobj:
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001523 if self.mode == "a" and not os.path.exists(name):
Lars Gustäbel3f8aca12007-02-06 18:38:13 +00001524 # Create nonexistent files in append mode.
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001525 self.mode = "w"
1526 self._mode = "wb"
Brett Cannon6cef0762007-05-25 20:17:15 +00001527 fileobj = bltn_open(name, self._mode)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001528 self._extfileobj = False
1529 else:
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001530 if name is None and hasattr(fileobj, "name"):
1531 name = fileobj.name
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001532 if hasattr(fileobj, "mode"):
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001533 self._mode = fileobj.mode
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001534 self._extfileobj = True
Lars Gustäbel0f4a14b2007-08-28 12:31:09 +00001535 self.name = os.path.abspath(name) if name else None
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001536 self.fileobj = fileobj
1537
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001538 # Init attributes.
1539 if format is not None:
1540 self.format = format
1541 if tarinfo is not None:
1542 self.tarinfo = tarinfo
1543 if dereference is not None:
1544 self.dereference = dereference
1545 if ignore_zeros is not None:
1546 self.ignore_zeros = ignore_zeros
1547 if encoding is not None:
1548 self.encoding = encoding
Lars Gustäbela0fcb932007-05-27 19:49:30 +00001549
1550 if errors is not None:
1551 self.errors = errors
1552 elif mode == "r":
1553 self.errors = "utf-8"
1554 else:
1555 self.errors = "strict"
1556
1557 if pax_headers is not None and self.format == PAX_FORMAT:
1558 self.pax_headers = pax_headers
1559 else:
1560 self.pax_headers = {}
1561
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001562 if debug is not None:
1563 self.debug = debug
1564 if errorlevel is not None:
1565 self.errorlevel = errorlevel
1566
1567 # Init datastructures.
Georg Brandl38c6a222006-05-10 16:26:03 +00001568 self.closed = False
1569 self.members = [] # list of members as TarInfo objects
1570 self._loaded = False # flag if all members have been read
Lars Gustäbel77b2d632007-12-01 21:02:12 +00001571 self.offset = self.fileobj.tell()
1572 # current position in the archive file
Georg Brandl38c6a222006-05-10 16:26:03 +00001573 self.inodes = {} # dictionary caching the inodes of
1574 # archive members already added
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001575
Lars Gustäbel355538e2009-11-18 20:24:54 +00001576 try:
1577 if self.mode == "r":
1578 self.firstmember = None
1579 self.firstmember = self.next()
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001580
Lars Gustäbel355538e2009-11-18 20:24:54 +00001581 if self.mode == "a":
1582 # Move to the end of the archive,
1583 # before the first empty block.
Lars Gustäbel355538e2009-11-18 20:24:54 +00001584 while True:
Lars Gustäbeldd866d52009-11-22 18:30:53 +00001585 self.fileobj.seek(self.offset)
1586 try:
1587 tarinfo = self.tarinfo.fromtarfile(self)
1588 self.members.append(tarinfo)
1589 except EOFHeaderError:
1590 self.fileobj.seek(self.offset)
Lars Gustäbel355538e2009-11-18 20:24:54 +00001591 break
Lars Gustäbeldd866d52009-11-22 18:30:53 +00001592 except HeaderError, e:
1593 raise ReadError(str(e))
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001594
Lars Gustäbel355538e2009-11-18 20:24:54 +00001595 if self.mode in "aw":
1596 self._loaded = True
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001597
Lars Gustäbel355538e2009-11-18 20:24:54 +00001598 if self.pax_headers:
1599 buf = self.tarinfo.create_pax_global_header(self.pax_headers.copy())
1600 self.fileobj.write(buf)
1601 self.offset += len(buf)
1602 except:
1603 if not self._extfileobj:
1604 self.fileobj.close()
1605 self.closed = True
1606 raise
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001607
1608 def _getposix(self):
1609 return self.format == USTAR_FORMAT
1610 def _setposix(self, value):
1611 import warnings
Philip Jenveyd846f1d2009-05-08 02:28:39 +00001612 warnings.warn("use the format attribute instead", DeprecationWarning,
1613 2)
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001614 if value:
1615 self.format = USTAR_FORMAT
1616 else:
1617 self.format = GNU_FORMAT
1618 posix = property(_getposix, _setposix)
1619
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001620 #--------------------------------------------------------------------------
1621 # Below are the classmethods which act as alternate constructors to the
1622 # TarFile class. The open() method is the only one that is needed for
1623 # public use; it is the "super"-constructor and is able to select an
1624 # adequate "sub"-constructor for a particular compression using the mapping
1625 # from OPEN_METH.
1626 #
1627 # This concept allows one to subclass TarFile without losing the comfort of
1628 # the super-constructor. A sub-constructor is registered and made available
1629 # by adding it to the mapping in OPEN_METH.
1630
Guido van Rossum75b64e62005-01-16 00:16:11 +00001631 @classmethod
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001632 def open(cls, name=None, mode="r", fileobj=None, bufsize=RECORDSIZE, **kwargs):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001633 """Open a tar archive for reading, writing or appending. Return
1634 an appropriate TarFile class.
1635
1636 mode:
Martin v. Löwis78be7df2005-03-05 12:47:42 +00001637 'r' or 'r:*' open for reading with transparent compression
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001638 'r:' open for reading exclusively uncompressed
1639 'r:gz' open for reading with gzip compression
1640 'r:bz2' open for reading with bzip2 compression
Lars Gustäbel3f8aca12007-02-06 18:38:13 +00001641 'a' or 'a:' open for appending, creating the file if necessary
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001642 'w' or 'w:' open for writing without compression
1643 'w:gz' open for writing with gzip compression
1644 'w:bz2' open for writing with bzip2 compression
Martin v. Löwis78be7df2005-03-05 12:47:42 +00001645
1646 'r|*' open a stream of tar blocks with transparent compression
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001647 'r|' open an uncompressed stream of tar blocks for reading
1648 'r|gz' open a gzip compressed stream of tar blocks
1649 'r|bz2' open a bzip2 compressed stream of tar blocks
1650 'w|' open an uncompressed stream for writing
1651 'w|gz' open a gzip compressed stream for writing
1652 'w|bz2' open a bzip2 compressed stream for writing
1653 """
1654
1655 if not name and not fileobj:
Georg Brandle4751e32006-05-18 06:11:19 +00001656 raise ValueError("nothing to open")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001657
Martin v. Löwis78be7df2005-03-05 12:47:42 +00001658 if mode in ("r", "r:*"):
1659 # Find out which *open() is appropriate for opening the file.
1660 for comptype in cls.OPEN_METH:
1661 func = getattr(cls, cls.OPEN_METH[comptype])
Lars Gustäbela7ba6fc2006-12-27 10:30:46 +00001662 if fileobj is not None:
1663 saved_pos = fileobj.tell()
Martin v. Löwis78be7df2005-03-05 12:47:42 +00001664 try:
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001665 return func(name, "r", fileobj, **kwargs)
1666 except (ReadError, CompressionError), e:
Lars Gustäbela7ba6fc2006-12-27 10:30:46 +00001667 if fileobj is not None:
1668 fileobj.seek(saved_pos)
Martin v. Löwis78be7df2005-03-05 12:47:42 +00001669 continue
Georg Brandle4751e32006-05-18 06:11:19 +00001670 raise ReadError("file could not be opened successfully")
Martin v. Löwis78be7df2005-03-05 12:47:42 +00001671
1672 elif ":" in mode:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001673 filemode, comptype = mode.split(":", 1)
1674 filemode = filemode or "r"
1675 comptype = comptype or "tar"
1676
1677 # Select the *open() function according to
1678 # given compression.
1679 if comptype in cls.OPEN_METH:
1680 func = getattr(cls, cls.OPEN_METH[comptype])
1681 else:
Georg Brandle4751e32006-05-18 06:11:19 +00001682 raise CompressionError("unknown compression type %r" % comptype)
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001683 return func(name, filemode, fileobj, **kwargs)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001684
1685 elif "|" in mode:
1686 filemode, comptype = mode.split("|", 1)
1687 filemode = filemode or "r"
1688 comptype = comptype or "tar"
1689
1690 if filemode not in "rw":
Georg Brandle4751e32006-05-18 06:11:19 +00001691 raise ValueError("mode must be 'r' or 'w'")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001692
1693 t = cls(name, filemode,
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001694 _Stream(name, filemode, comptype, fileobj, bufsize),
1695 **kwargs)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001696 t._extfileobj = False
1697 return t
1698
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001699 elif mode in "aw":
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001700 return cls.taropen(name, mode, fileobj, **kwargs)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001701
Georg Brandle4751e32006-05-18 06:11:19 +00001702 raise ValueError("undiscernible mode")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001703
Guido van Rossum75b64e62005-01-16 00:16:11 +00001704 @classmethod
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001705 def taropen(cls, name, mode="r", fileobj=None, **kwargs):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001706 """Open uncompressed tar archive name for reading or writing.
1707 """
1708 if len(mode) > 1 or mode not in "raw":
Georg Brandle4751e32006-05-18 06:11:19 +00001709 raise ValueError("mode must be 'r', 'a' or 'w'")
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001710 return cls(name, mode, fileobj, **kwargs)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001711
Guido van Rossum75b64e62005-01-16 00:16:11 +00001712 @classmethod
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001713 def gzopen(cls, name, mode="r", fileobj=None, compresslevel=9, **kwargs):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001714 """Open gzip compressed tar archive name for reading or writing.
1715 Appending is not allowed.
1716 """
1717 if len(mode) > 1 or mode not in "rw":
Georg Brandle4751e32006-05-18 06:11:19 +00001718 raise ValueError("mode must be 'r' or 'w'")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001719
1720 try:
1721 import gzip
Neal Norwitz4ec68242003-04-11 03:05:56 +00001722 gzip.GzipFile
1723 except (ImportError, AttributeError):
Georg Brandle4751e32006-05-18 06:11:19 +00001724 raise CompressionError("gzip module is not available")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001725
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001726 if fileobj is None:
Brett Cannon6cef0762007-05-25 20:17:15 +00001727 fileobj = bltn_open(name, mode + "b")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001728
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001729 try:
Lars Gustäbela4b23812006-12-23 17:57:23 +00001730 t = cls.taropen(name, mode,
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001731 gzip.GzipFile(name, mode, compresslevel, fileobj),
1732 **kwargs)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001733 except IOError:
Georg Brandle4751e32006-05-18 06:11:19 +00001734 raise ReadError("not a gzip file")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001735 t._extfileobj = False
1736 return t
1737
Guido van Rossum75b64e62005-01-16 00:16:11 +00001738 @classmethod
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001739 def bz2open(cls, name, mode="r", fileobj=None, compresslevel=9, **kwargs):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001740 """Open bzip2 compressed tar archive name for reading or writing.
1741 Appending is not allowed.
1742 """
1743 if len(mode) > 1 or mode not in "rw":
Georg Brandle4751e32006-05-18 06:11:19 +00001744 raise ValueError("mode must be 'r' or 'w'.")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001745
1746 try:
1747 import bz2
1748 except ImportError:
Georg Brandle4751e32006-05-18 06:11:19 +00001749 raise CompressionError("bz2 module is not available")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001750
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001751 if fileobj is not None:
Georg Brandl49c8f4c2006-05-15 19:30:35 +00001752 fileobj = _BZ2Proxy(fileobj, mode)
1753 else:
1754 fileobj = bz2.BZ2File(name, mode, compresslevel=compresslevel)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001755
1756 try:
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001757 t = cls.taropen(name, mode, fileobj, **kwargs)
Lars Gustäbeldd866d52009-11-22 18:30:53 +00001758 except (IOError, EOFError):
Georg Brandle4751e32006-05-18 06:11:19 +00001759 raise ReadError("not a bzip2 file")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001760 t._extfileobj = False
1761 return t
1762
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001763 # All *open() methods are registered here.
1764 OPEN_METH = {
1765 "tar": "taropen", # uncompressed tar
1766 "gz": "gzopen", # gzip compressed tar
1767 "bz2": "bz2open" # bzip2 compressed tar
1768 }
1769
1770 #--------------------------------------------------------------------------
1771 # The public methods which TarFile provides:
1772
1773 def close(self):
1774 """Close the TarFile. In write-mode, two finishing zero blocks are
1775 appended to the archive.
1776 """
1777 if self.closed:
1778 return
1779
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001780 if self.mode in "aw":
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001781 self.fileobj.write(NUL * (BLOCKSIZE * 2))
1782 self.offset += (BLOCKSIZE * 2)
1783 # fill up the end with zero-blocks
1784 # (like option -b20 for tar does)
1785 blocks, remainder = divmod(self.offset, RECORDSIZE)
1786 if remainder > 0:
1787 self.fileobj.write(NUL * (RECORDSIZE - remainder))
1788
1789 if not self._extfileobj:
1790 self.fileobj.close()
1791 self.closed = True
1792
1793 def getmember(self, name):
1794 """Return a TarInfo object for member `name'. If `name' can not be
1795 found in the archive, KeyError is raised. If a member occurs more
Mark Dickinson3e4caeb2009-02-21 20:27:01 +00001796 than once in the archive, its last occurrence is assumed to be the
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001797 most up-to-date version.
1798 """
Martin v. Löwisf3c56112004-09-18 09:08:52 +00001799 tarinfo = self._getmember(name)
1800 if tarinfo is None:
Georg Brandle4751e32006-05-18 06:11:19 +00001801 raise KeyError("filename %r not found" % name)
Martin v. Löwisf3c56112004-09-18 09:08:52 +00001802 return tarinfo
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001803
1804 def getmembers(self):
1805 """Return the members of the archive as a list of TarInfo objects. The
1806 list has the same order as the members in the archive.
1807 """
1808 self._check()
1809 if not self._loaded: # if we want to obtain a list of
1810 self._load() # all members, we first have to
1811 # scan the whole archive.
1812 return self.members
1813
1814 def getnames(self):
1815 """Return the members of the archive as a list of their names. It has
1816 the same order as the list returned by getmembers().
1817 """
Martin v. Löwisf3c56112004-09-18 09:08:52 +00001818 return [tarinfo.name for tarinfo in self.getmembers()]
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001819
1820 def gettarinfo(self, name=None, arcname=None, fileobj=None):
1821 """Create a TarInfo object for either the file `name' or the file
1822 object `fileobj' (using os.fstat on its file descriptor). You can
1823 modify some of the TarInfo's attributes before you add it using
1824 addfile(). If given, `arcname' specifies an alternative name for the
1825 file in the archive.
1826 """
1827 self._check("aw")
1828
1829 # When fileobj is given, replace name by
1830 # fileobj's real name.
1831 if fileobj is not None:
1832 name = fileobj.name
1833
1834 # Building the name of the member in the archive.
1835 # Backward slashes are converted to forward slashes,
1836 # Absolute paths are turned to relative paths.
1837 if arcname is None:
1838 arcname = name
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001839 drv, arcname = os.path.splitdrive(arcname)
Lars Gustäbelf7cda522009-08-28 19:23:44 +00001840 arcname = arcname.replace(os.sep, "/")
1841 arcname = arcname.lstrip("/")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001842
1843 # Now, fill the TarInfo object with
1844 # information specific for the file.
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001845 tarinfo = self.tarinfo()
1846 tarinfo.tarfile = self
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001847
1848 # Use os.stat or os.lstat, depending on platform
1849 # and if symlinks shall be resolved.
1850 if fileobj is None:
1851 if hasattr(os, "lstat") and not self.dereference:
1852 statres = os.lstat(name)
1853 else:
1854 statres = os.stat(name)
1855 else:
1856 statres = os.fstat(fileobj.fileno())
1857 linkname = ""
1858
1859 stmd = statres.st_mode
1860 if stat.S_ISREG(stmd):
1861 inode = (statres.st_ino, statres.st_dev)
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001862 if not self.dereference and statres.st_nlink > 1 and \
1863 inode in self.inodes and arcname != self.inodes[inode]:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001864 # Is it a hardlink to an already
1865 # archived file?
1866 type = LNKTYPE
1867 linkname = self.inodes[inode]
1868 else:
1869 # The inode is added only if its valid.
1870 # For win32 it is always 0.
1871 type = REGTYPE
1872 if inode[0]:
1873 self.inodes[inode] = arcname
1874 elif stat.S_ISDIR(stmd):
1875 type = DIRTYPE
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001876 elif stat.S_ISFIFO(stmd):
1877 type = FIFOTYPE
1878 elif stat.S_ISLNK(stmd):
1879 type = SYMTYPE
1880 linkname = os.readlink(name)
1881 elif stat.S_ISCHR(stmd):
1882 type = CHRTYPE
1883 elif stat.S_ISBLK(stmd):
1884 type = BLKTYPE
1885 else:
1886 return None
1887
1888 # Fill the TarInfo object with all
1889 # information we can get.
Martin v. Löwis5dbdc592005-08-27 10:07:56 +00001890 tarinfo.name = arcname
1891 tarinfo.mode = stmd
1892 tarinfo.uid = statres.st_uid
1893 tarinfo.gid = statres.st_gid
1894 if stat.S_ISREG(stmd):
Martin v. Löwis61d77e02004-08-20 06:35:46 +00001895 tarinfo.size = statres.st_size
Martin v. Löwis5dbdc592005-08-27 10:07:56 +00001896 else:
1897 tarinfo.size = 0L
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001898 tarinfo.mtime = statres.st_mtime
Martin v. Löwis5dbdc592005-08-27 10:07:56 +00001899 tarinfo.type = type
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001900 tarinfo.linkname = linkname
1901 if pwd:
1902 try:
1903 tarinfo.uname = pwd.getpwuid(tarinfo.uid)[0]
1904 except KeyError:
1905 pass
1906 if grp:
1907 try:
1908 tarinfo.gname = grp.getgrgid(tarinfo.gid)[0]
1909 except KeyError:
1910 pass
1911
1912 if type in (CHRTYPE, BLKTYPE):
1913 if hasattr(os, "major") and hasattr(os, "minor"):
1914 tarinfo.devmajor = os.major(statres.st_rdev)
1915 tarinfo.devminor = os.minor(statres.st_rdev)
1916 return tarinfo
1917
1918 def list(self, verbose=True):
1919 """Print a table of contents to sys.stdout. If `verbose' is False, only
1920 the names of the members are printed. If it is True, an `ls -l'-like
1921 output is produced.
1922 """
1923 self._check()
1924
1925 for tarinfo in self:
1926 if verbose:
1927 print filemode(tarinfo.mode),
1928 print "%s/%s" % (tarinfo.uname or tarinfo.uid,
1929 tarinfo.gname or tarinfo.gid),
1930 if tarinfo.ischr() or tarinfo.isblk():
1931 print "%10s" % ("%d,%d" \
1932 % (tarinfo.devmajor, tarinfo.devminor)),
1933 else:
1934 print "%10d" % tarinfo.size,
1935 print "%d-%02d-%02d %02d:%02d:%02d" \
1936 % time.localtime(tarinfo.mtime)[:6],
1937
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001938 print tarinfo.name + ("/" if tarinfo.isdir() else ""),
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001939
1940 if verbose:
1941 if tarinfo.issym():
1942 print "->", tarinfo.linkname,
1943 if tarinfo.islnk():
1944 print "link to", tarinfo.linkname,
1945 print
1946
Lars Gustäbel21121e62009-09-12 10:28:15 +00001947 def add(self, name, arcname=None, recursive=True, exclude=None, filter=None):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001948 """Add the file `name' to the archive. `name' may be any type of file
1949 (directory, fifo, symbolic link, etc.). If given, `arcname'
1950 specifies an alternative name for the file in the archive.
1951 Directories are added recursively by default. This can be avoided by
Lars Gustäbel104490e2007-06-18 11:42:11 +00001952 setting `recursive' to False. `exclude' is a function that should
Lars Gustäbel21121e62009-09-12 10:28:15 +00001953 return True for each filename to be excluded. `filter' is a function
1954 that expects a TarInfo object argument and returns the changed
1955 TarInfo object, if it returns None the TarInfo object will be
1956 excluded from the archive.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001957 """
1958 self._check("aw")
1959
1960 if arcname is None:
1961 arcname = name
1962
Lars Gustäbel104490e2007-06-18 11:42:11 +00001963 # Exclude pathnames.
Lars Gustäbel21121e62009-09-12 10:28:15 +00001964 if exclude is not None:
1965 import warnings
1966 warnings.warn("use the filter argument instead",
1967 DeprecationWarning, 2)
1968 if exclude(name):
1969 self._dbg(2, "tarfile: Excluded %r" % name)
1970 return
Lars Gustäbel104490e2007-06-18 11:42:11 +00001971
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001972 # Skip if somebody tries to archive the archive...
Lars Gustäbela4b23812006-12-23 17:57:23 +00001973 if self.name is not None and os.path.abspath(name) == self.name:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001974 self._dbg(2, "tarfile: Skipped %r" % name)
1975 return
1976
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001977 self._dbg(1, name)
1978
1979 # Create a TarInfo object from the file.
1980 tarinfo = self.gettarinfo(name, arcname)
1981
1982 if tarinfo is None:
1983 self._dbg(1, "tarfile: Unsupported type %r" % name)
1984 return
1985
Lars Gustäbel21121e62009-09-12 10:28:15 +00001986 # Change or exclude the TarInfo object.
1987 if filter is not None:
1988 tarinfo = filter(tarinfo)
1989 if tarinfo is None:
1990 self._dbg(2, "tarfile: Excluded %r" % name)
1991 return
1992
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001993 # Append the tar header and data to the archive.
1994 if tarinfo.isreg():
Brett Cannon6cef0762007-05-25 20:17:15 +00001995 f = bltn_open(name, "rb")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001996 self.addfile(tarinfo, f)
1997 f.close()
1998
Martin v. Löwis5dbdc592005-08-27 10:07:56 +00001999 elif tarinfo.isdir():
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002000 self.addfile(tarinfo)
2001 if recursive:
2002 for f in os.listdir(name):
Lars Gustäbel21121e62009-09-12 10:28:15 +00002003 self.add(os.path.join(name, f), os.path.join(arcname, f),
2004 recursive, exclude, filter)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002005
Martin v. Löwis5dbdc592005-08-27 10:07:56 +00002006 else:
2007 self.addfile(tarinfo)
2008
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002009 def addfile(self, tarinfo, fileobj=None):
2010 """Add the TarInfo object `tarinfo' to the archive. If `fileobj' is
2011 given, tarinfo.size bytes are read from it and added to the archive.
2012 You can create TarInfo objects using gettarinfo().
2013 On Windows platforms, `fileobj' should always be opened with mode
2014 'rb' to avoid irritation about the file size.
2015 """
2016 self._check("aw")
2017
Georg Brandl3354f282006-10-29 09:16:12 +00002018 tarinfo = copy.copy(tarinfo)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002019
Lars Gustäbela0fcb932007-05-27 19:49:30 +00002020 buf = tarinfo.tobuf(self.format, self.encoding, self.errors)
Georg Brandl3354f282006-10-29 09:16:12 +00002021 self.fileobj.write(buf)
2022 self.offset += len(buf)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002023
2024 # If there's data to follow, append it.
2025 if fileobj is not None:
2026 copyfileobj(fileobj, self.fileobj, tarinfo.size)
2027 blocks, remainder = divmod(tarinfo.size, BLOCKSIZE)
2028 if remainder > 0:
2029 self.fileobj.write(NUL * (BLOCKSIZE - remainder))
2030 blocks += 1
2031 self.offset += blocks * BLOCKSIZE
2032
Martin v. Löwisf3c56112004-09-18 09:08:52 +00002033 self.members.append(tarinfo)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002034
Martin v. Löwis00a73e72005-03-04 19:40:34 +00002035 def extractall(self, path=".", members=None):
2036 """Extract all members from the archive to the current working
2037 directory and set owner, modification time and permissions on
2038 directories afterwards. `path' specifies a different directory
2039 to extract to. `members' is optional and must be a subset of the
2040 list returned by getmembers().
2041 """
2042 directories = []
2043
2044 if members is None:
2045 members = self
2046
2047 for tarinfo in members:
2048 if tarinfo.isdir():
Lars Gustäbel0192e432008-02-05 11:51:40 +00002049 # Extract directories with a safe mode.
Martin v. Löwis00a73e72005-03-04 19:40:34 +00002050 directories.append(tarinfo)
Lars Gustäbel0192e432008-02-05 11:51:40 +00002051 tarinfo = copy.copy(tarinfo)
2052 tarinfo.mode = 0700
2053 self.extract(tarinfo, path)
Martin v. Löwis00a73e72005-03-04 19:40:34 +00002054
2055 # Reverse sort directories.
Brett Cannon132fc542008-08-04 21:23:07 +00002056 directories.sort(key=operator.attrgetter('name'))
Martin v. Löwis00a73e72005-03-04 19:40:34 +00002057 directories.reverse()
2058
2059 # Set correct owner, mtime and filemode on directories.
2060 for tarinfo in directories:
Lars Gustäbel2ee1c762008-01-04 14:00:33 +00002061 dirpath = os.path.join(path, tarinfo.name)
Martin v. Löwis00a73e72005-03-04 19:40:34 +00002062 try:
Lars Gustäbel2ee1c762008-01-04 14:00:33 +00002063 self.chown(tarinfo, dirpath)
2064 self.utime(tarinfo, dirpath)
2065 self.chmod(tarinfo, dirpath)
Martin v. Löwis00a73e72005-03-04 19:40:34 +00002066 except ExtractError, e:
2067 if self.errorlevel > 1:
2068 raise
2069 else:
2070 self._dbg(1, "tarfile: %s" % e)
2071
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002072 def extract(self, member, path=""):
2073 """Extract a member from the archive to the current working directory,
2074 using its full name. Its file information is extracted as accurately
2075 as possible. `member' may be a filename or a TarInfo object. You can
2076 specify a different directory using `path'.
2077 """
2078 self._check("r")
2079
Lars Gustäbelc64e4022007-03-13 10:47:19 +00002080 if isinstance(member, basestring):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002081 tarinfo = self.getmember(member)
Lars Gustäbelc64e4022007-03-13 10:47:19 +00002082 else:
2083 tarinfo = member
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002084
Neal Norwitza4f651a2004-07-20 22:07:44 +00002085 # Prepare the link target for makelink().
2086 if tarinfo.islnk():
2087 tarinfo._link_target = os.path.join(path, tarinfo.linkname)
2088
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002089 try:
2090 self._extract_member(tarinfo, os.path.join(path, tarinfo.name))
2091 except EnvironmentError, e:
2092 if self.errorlevel > 0:
2093 raise
2094 else:
2095 if e.filename is None:
2096 self._dbg(1, "tarfile: %s" % e.strerror)
2097 else:
2098 self._dbg(1, "tarfile: %s %r" % (e.strerror, e.filename))
2099 except ExtractError, e:
2100 if self.errorlevel > 1:
2101 raise
2102 else:
2103 self._dbg(1, "tarfile: %s" % e)
2104
2105 def extractfile(self, member):
2106 """Extract a member from the archive as a file object. `member' may be
2107 a filename or a TarInfo object. If `member' is a regular file, a
2108 file-like object is returned. If `member' is a link, a file-like
2109 object is constructed from the link's target. If `member' is none of
2110 the above, None is returned.
2111 The file-like object is read-only and provides the following
2112 methods: read(), readline(), readlines(), seek() and tell()
2113 """
2114 self._check("r")
2115
Lars Gustäbelc64e4022007-03-13 10:47:19 +00002116 if isinstance(member, basestring):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002117 tarinfo = self.getmember(member)
Lars Gustäbelc64e4022007-03-13 10:47:19 +00002118 else:
2119 tarinfo = member
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002120
2121 if tarinfo.isreg():
2122 return self.fileobject(self, tarinfo)
2123
2124 elif tarinfo.type not in SUPPORTED_TYPES:
2125 # If a member's type is unknown, it is treated as a
2126 # regular file.
2127 return self.fileobject(self, tarinfo)
2128
2129 elif tarinfo.islnk() or tarinfo.issym():
2130 if isinstance(self.fileobj, _Stream):
2131 # A small but ugly workaround for the case that someone tries
2132 # to extract a (sym)link as a file-object from a non-seekable
2133 # stream of tar blocks.
Georg Brandle4751e32006-05-18 06:11:19 +00002134 raise StreamError("cannot extract (sym)link as file object")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002135 else:
Georg Brandl7eb4b7d2005-07-22 21:49:32 +00002136 # A (sym)link's file object is its target's file object.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002137 return self.extractfile(self._getmember(tarinfo.linkname,
2138 tarinfo))
2139 else:
2140 # If there's no data associated with the member (directory, chrdev,
2141 # blkdev, etc.), return None instead of a file object.
2142 return None
2143
2144 def _extract_member(self, tarinfo, targetpath):
2145 """Extract the TarInfo object tarinfo to a physical
2146 file called targetpath.
2147 """
2148 # Fetch the TarInfo object for the given name
2149 # and build the destination pathname, replacing
2150 # forward slashes to platform specific separators.
Lars Gustäbelf7cda522009-08-28 19:23:44 +00002151 targetpath = targetpath.rstrip("/")
2152 targetpath = targetpath.replace("/", os.sep)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002153
2154 # Create all upper directories.
2155 upperdirs = os.path.dirname(targetpath)
2156 if upperdirs and not os.path.exists(upperdirs):
Lars Gustäbel0192e432008-02-05 11:51:40 +00002157 # Create directories that are not part of the archive with
2158 # default permissions.
Lars Gustäbeld2e22902007-01-23 11:17:33 +00002159 os.makedirs(upperdirs)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002160
2161 if tarinfo.islnk() or tarinfo.issym():
2162 self._dbg(1, "%s -> %s" % (tarinfo.name, tarinfo.linkname))
2163 else:
2164 self._dbg(1, tarinfo.name)
2165
2166 if tarinfo.isreg():
2167 self.makefile(tarinfo, targetpath)
2168 elif tarinfo.isdir():
2169 self.makedir(tarinfo, targetpath)
2170 elif tarinfo.isfifo():
2171 self.makefifo(tarinfo, targetpath)
2172 elif tarinfo.ischr() or tarinfo.isblk():
2173 self.makedev(tarinfo, targetpath)
2174 elif tarinfo.islnk() or tarinfo.issym():
2175 self.makelink(tarinfo, targetpath)
2176 elif tarinfo.type not in SUPPORTED_TYPES:
2177 self.makeunknown(tarinfo, targetpath)
2178 else:
2179 self.makefile(tarinfo, targetpath)
2180
2181 self.chown(tarinfo, targetpath)
2182 if not tarinfo.issym():
2183 self.chmod(tarinfo, targetpath)
2184 self.utime(tarinfo, targetpath)
2185
2186 #--------------------------------------------------------------------------
2187 # Below are the different file methods. They are called via
2188 # _extract_member() when extract() is called. They can be replaced in a
2189 # subclass to implement other functionality.
2190
2191 def makedir(self, tarinfo, targetpath):
2192 """Make a directory called targetpath.
2193 """
2194 try:
Lars Gustäbel0192e432008-02-05 11:51:40 +00002195 # Use a safe mode for the directory, the real mode is set
2196 # later in _extract_member().
2197 os.mkdir(targetpath, 0700)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002198 except EnvironmentError, e:
2199 if e.errno != errno.EEXIST:
2200 raise
2201
2202 def makefile(self, tarinfo, targetpath):
2203 """Make a file called targetpath.
2204 """
2205 source = self.extractfile(tarinfo)
Brett Cannon6cef0762007-05-25 20:17:15 +00002206 target = bltn_open(targetpath, "wb")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002207 copyfileobj(source, target)
2208 source.close()
2209 target.close()
2210
2211 def makeunknown(self, tarinfo, targetpath):
2212 """Make a file from a TarInfo object with an unknown type
2213 at targetpath.
2214 """
2215 self.makefile(tarinfo, targetpath)
2216 self._dbg(1, "tarfile: Unknown file type %r, " \
2217 "extracted as regular file." % tarinfo.type)
2218
2219 def makefifo(self, tarinfo, targetpath):
2220 """Make a fifo called targetpath.
2221 """
2222 if hasattr(os, "mkfifo"):
2223 os.mkfifo(targetpath)
2224 else:
Georg Brandle4751e32006-05-18 06:11:19 +00002225 raise ExtractError("fifo not supported by system")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002226
2227 def makedev(self, tarinfo, targetpath):
2228 """Make a character or block device called targetpath.
2229 """
2230 if not hasattr(os, "mknod") or not hasattr(os, "makedev"):
Georg Brandle4751e32006-05-18 06:11:19 +00002231 raise ExtractError("special devices not supported by system")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002232
2233 mode = tarinfo.mode
2234 if tarinfo.isblk():
2235 mode |= stat.S_IFBLK
2236 else:
2237 mode |= stat.S_IFCHR
2238
2239 os.mknod(targetpath, mode,
2240 os.makedev(tarinfo.devmajor, tarinfo.devminor))
2241
2242 def makelink(self, tarinfo, targetpath):
2243 """Make a (symbolic) link called targetpath. If it cannot be created
2244 (platform limitation), we try to make a copy of the referenced file
2245 instead of a link.
2246 """
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002247 try:
2248 if tarinfo.issym():
Lars Gustäbelf7cda522009-08-28 19:23:44 +00002249 os.symlink(tarinfo.linkname, targetpath)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002250 else:
Neal Norwitza4f651a2004-07-20 22:07:44 +00002251 # See extract().
2252 os.link(tarinfo._link_target, targetpath)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002253 except AttributeError:
2254 if tarinfo.issym():
Lars Gustäbelf7cda522009-08-28 19:23:44 +00002255 linkpath = os.path.dirname(tarinfo.name) + "/" + \
2256 tarinfo.linkname
2257 else:
2258 linkpath = tarinfo.linkname
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002259
2260 try:
2261 self._extract_member(self.getmember(linkpath), targetpath)
2262 except (EnvironmentError, KeyError), e:
Lars Gustäbelf7cda522009-08-28 19:23:44 +00002263 linkpath = linkpath.replace("/", os.sep)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002264 try:
2265 shutil.copy2(linkpath, targetpath)
2266 except EnvironmentError, e:
Georg Brandle4751e32006-05-18 06:11:19 +00002267 raise IOError("link could not be created")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002268
2269 def chown(self, tarinfo, targetpath):
2270 """Set owner of targetpath according to tarinfo.
2271 """
2272 if pwd and hasattr(os, "geteuid") and os.geteuid() == 0:
2273 # We have to be root to do so.
2274 try:
2275 g = grp.getgrnam(tarinfo.gname)[2]
2276 except KeyError:
2277 try:
2278 g = grp.getgrgid(tarinfo.gid)[2]
2279 except KeyError:
2280 g = os.getgid()
2281 try:
2282 u = pwd.getpwnam(tarinfo.uname)[2]
2283 except KeyError:
2284 try:
2285 u = pwd.getpwuid(tarinfo.uid)[2]
2286 except KeyError:
2287 u = os.getuid()
2288 try:
2289 if tarinfo.issym() and hasattr(os, "lchown"):
2290 os.lchown(targetpath, u, g)
2291 else:
Andrew MacIntyre7970d202003-02-19 12:51:34 +00002292 if sys.platform != "os2emx":
2293 os.chown(targetpath, u, g)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002294 except EnvironmentError, e:
Georg Brandle4751e32006-05-18 06:11:19 +00002295 raise ExtractError("could not change owner")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002296
2297 def chmod(self, tarinfo, targetpath):
2298 """Set file permissions of targetpath according to tarinfo.
2299 """
Jack Jansen834eff62003-03-07 12:47:06 +00002300 if hasattr(os, 'chmod'):
2301 try:
2302 os.chmod(targetpath, tarinfo.mode)
2303 except EnvironmentError, e:
Georg Brandle4751e32006-05-18 06:11:19 +00002304 raise ExtractError("could not change mode")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002305
2306 def utime(self, tarinfo, targetpath):
2307 """Set modification time of targetpath according to tarinfo.
2308 """
Jack Jansen834eff62003-03-07 12:47:06 +00002309 if not hasattr(os, 'utime'):
Tim Petersf9347782003-03-07 15:36:41 +00002310 return
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002311 try:
2312 os.utime(targetpath, (tarinfo.mtime, tarinfo.mtime))
2313 except EnvironmentError, e:
Georg Brandle4751e32006-05-18 06:11:19 +00002314 raise ExtractError("could not change modification time")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002315
2316 #--------------------------------------------------------------------------
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002317 def next(self):
2318 """Return the next member of the archive as a TarInfo object, when
2319 TarFile is opened for reading. Return None if there is no more
2320 available.
2321 """
2322 self._check("ra")
2323 if self.firstmember is not None:
2324 m = self.firstmember
2325 self.firstmember = None
2326 return m
2327
2328 # Read the next block.
Andrew M. Kuchling864bba12004-07-10 22:02:11 +00002329 self.fileobj.seek(self.offset)
Lars Gustäbeldd866d52009-11-22 18:30:53 +00002330 tarinfo = None
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002331 while True:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002332 try:
Lars Gustäbelc64e4022007-03-13 10:47:19 +00002333 tarinfo = self.tarinfo.fromtarfile(self)
Lars Gustäbeldd866d52009-11-22 18:30:53 +00002334 except EOFHeaderError, e:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002335 if self.ignore_zeros:
Georg Brandlebbeed72006-12-19 22:06:46 +00002336 self._dbg(2, "0x%X: %s" % (self.offset, e))
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002337 self.offset += BLOCKSIZE
2338 continue
Lars Gustäbeldd866d52009-11-22 18:30:53 +00002339 except InvalidHeaderError, e:
2340 if self.ignore_zeros:
2341 self._dbg(2, "0x%X: %s" % (self.offset, e))
2342 self.offset += BLOCKSIZE
2343 continue
2344 elif self.offset == 0:
2345 raise ReadError(str(e))
2346 except EmptyHeaderError:
2347 if self.offset == 0:
2348 raise ReadError("empty file")
2349 except TruncatedHeaderError, e:
2350 if self.offset == 0:
2351 raise ReadError(str(e))
2352 except SubsequentHeaderError, e:
2353 raise ReadError(str(e))
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002354 break
2355
Lars Gustäbeldd866d52009-11-22 18:30:53 +00002356 if tarinfo is not None:
2357 self.members.append(tarinfo)
2358 else:
2359 self._loaded = True
2360
Georg Brandl38c6a222006-05-10 16:26:03 +00002361 return tarinfo
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002362
2363 #--------------------------------------------------------------------------
2364 # Little helper methods:
2365
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002366 def _getmember(self, name, tarinfo=None):
2367 """Find an archive member by name from bottom to top.
2368 If tarinfo is given, it is used as the starting point.
2369 """
Martin v. Löwisf3c56112004-09-18 09:08:52 +00002370 # Ensure that all members have been loaded.
2371 members = self.getmembers()
2372
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002373 if tarinfo is None:
Martin v. Löwisf3c56112004-09-18 09:08:52 +00002374 end = len(members)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002375 else:
Martin v. Löwisf3c56112004-09-18 09:08:52 +00002376 end = members.index(tarinfo)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002377
2378 for i in xrange(end - 1, -1, -1):
Martin v. Löwisf3c56112004-09-18 09:08:52 +00002379 if name == members[i].name:
2380 return members[i]
Andrew M. Kuchling864bba12004-07-10 22:02:11 +00002381
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002382 def _load(self):
2383 """Read through the entire archive file and look for readable
2384 members.
2385 """
2386 while True:
2387 tarinfo = self.next()
2388 if tarinfo is None:
2389 break
2390 self._loaded = True
2391
2392 def _check(self, mode=None):
2393 """Check if TarFile is still open, and if the operation's mode
2394 corresponds to TarFile's mode.
2395 """
2396 if self.closed:
Georg Brandle4751e32006-05-18 06:11:19 +00002397 raise IOError("%s is closed" % self.__class__.__name__)
Lars Gustäbelc64e4022007-03-13 10:47:19 +00002398 if mode is not None and self.mode not in mode:
2399 raise IOError("bad operation for mode %r" % self.mode)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002400
2401 def __iter__(self):
2402 """Provide an iterator object.
2403 """
2404 if self._loaded:
2405 return iter(self.members)
2406 else:
2407 return TarIter(self)
2408
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002409 def _dbg(self, level, msg):
2410 """Write debugging output to sys.stderr.
2411 """
2412 if level <= self.debug:
2413 print >> sys.stderr, msg
Lars Gustäbel64581042010-03-03 11:55:48 +00002414
2415 def __enter__(self):
2416 self._check()
2417 return self
2418
2419 def __exit__(self, type, value, traceback):
2420 if type is None:
2421 self.close()
2422 else:
2423 # An exception occurred. We must not call close() because
2424 # it would try to write end-of-archive blocks and padding.
2425 if not self._extfileobj:
2426 self.fileobj.close()
2427 self.closed = True
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002428# class TarFile
2429
2430class TarIter:
2431 """Iterator Class.
2432
2433 for tarinfo in TarFile(...):
2434 suite...
2435 """
2436
2437 def __init__(self, tarfile):
2438 """Construct a TarIter object.
2439 """
2440 self.tarfile = tarfile
Martin v. Löwis637431b2005-03-03 23:12:42 +00002441 self.index = 0
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002442 def __iter__(self):
2443 """Return iterator object.
2444 """
2445 return self
2446 def next(self):
2447 """Return the next item using TarFile's next() method.
2448 When all members have been read, set TarFile as _loaded.
2449 """
Martin v. Löwis637431b2005-03-03 23:12:42 +00002450 # Fix for SF #1100429: Under rare circumstances it can
2451 # happen that getmembers() is called during iteration,
2452 # which will cause TarIter to stop prematurely.
2453 if not self.tarfile._loaded:
2454 tarinfo = self.tarfile.next()
2455 if not tarinfo:
2456 self.tarfile._loaded = True
2457 raise StopIteration
2458 else:
2459 try:
2460 tarinfo = self.tarfile.members[self.index]
2461 except IndexError:
2462 raise StopIteration
2463 self.index += 1
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002464 return tarinfo
2465
2466# Helper classes for sparse file support
2467class _section:
2468 """Base class for _data and _hole.
2469 """
2470 def __init__(self, offset, size):
2471 self.offset = offset
2472 self.size = size
2473 def __contains__(self, offset):
2474 return self.offset <= offset < self.offset + self.size
2475
2476class _data(_section):
2477 """Represent a data section in a sparse file.
2478 """
2479 def __init__(self, offset, size, realpos):
2480 _section.__init__(self, offset, size)
2481 self.realpos = realpos
2482
2483class _hole(_section):
2484 """Represent a hole section in a sparse file.
2485 """
2486 pass
2487
2488class _ringbuffer(list):
2489 """Ringbuffer class which increases performance
2490 over a regular list.
2491 """
2492 def __init__(self):
2493 self.idx = 0
2494 def find(self, offset):
2495 idx = self.idx
2496 while True:
2497 item = self[idx]
2498 if offset in item:
2499 break
2500 idx += 1
2501 if idx == len(self):
2502 idx = 0
2503 if idx == self.idx:
2504 # End of File
2505 return None
2506 self.idx = idx
2507 return item
2508
2509#---------------------------------------------
2510# zipfile compatible TarFile class
2511#---------------------------------------------
2512TAR_PLAIN = 0 # zipfile.ZIP_STORED
2513TAR_GZIPPED = 8 # zipfile.ZIP_DEFLATED
2514class TarFileCompat:
2515 """TarFile class compatible with standard module zipfile's
2516 ZipFile class.
2517 """
2518 def __init__(self, file, mode="r", compression=TAR_PLAIN):
Lars Gustäbel727bd0b2008-08-02 11:26:39 +00002519 from warnings import warnpy3k
2520 warnpy3k("the TarFileCompat class has been removed in Python 3.0",
2521 stacklevel=2)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002522 if compression == TAR_PLAIN:
2523 self.tarfile = TarFile.taropen(file, mode)
2524 elif compression == TAR_GZIPPED:
2525 self.tarfile = TarFile.gzopen(file, mode)
2526 else:
Georg Brandle4751e32006-05-18 06:11:19 +00002527 raise ValueError("unknown compression constant")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002528 if mode[0:1] == "r":
2529 members = self.tarfile.getmembers()
Raymond Hettingera1d09e22005-09-11 16:34:05 +00002530 for m in members:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002531 m.filename = m.name
2532 m.file_size = m.size
2533 m.date_time = time.gmtime(m.mtime)[:6]
2534 def namelist(self):
2535 return map(lambda m: m.name, self.infolist())
2536 def infolist(self):
2537 return filter(lambda m: m.type in REGULAR_TYPES,
2538 self.tarfile.getmembers())
2539 def printdir(self):
2540 self.tarfile.list()
2541 def testzip(self):
2542 return
2543 def getinfo(self, name):
2544 return self.tarfile.getmember(name)
2545 def read(self, name):
2546 return self.tarfile.extractfile(self.tarfile.getmember(name)).read()
2547 def write(self, filename, arcname=None, compress_type=None):
2548 self.tarfile.add(filename, arcname)
2549 def writestr(self, zinfo, bytes):
Raymond Hettingera6172712004-12-31 19:15:26 +00002550 try:
2551 from cStringIO import StringIO
2552 except ImportError:
2553 from StringIO import StringIO
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002554 import calendar
Lars Gustäbel727bd0b2008-08-02 11:26:39 +00002555 tinfo = TarInfo(zinfo.filename)
2556 tinfo.size = len(bytes)
2557 tinfo.mtime = calendar.timegm(zinfo.date_time)
2558 self.tarfile.addfile(tinfo, StringIO(bytes))
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002559 def close(self):
2560 self.tarfile.close()
2561#class TarFileCompat
2562
2563#--------------------
2564# exported functions
2565#--------------------
2566def is_tarfile(name):
2567 """Return True if name points to a tar archive that we
2568 are able to handle, else return False.
2569 """
2570 try:
2571 t = open(name)
2572 t.close()
2573 return True
2574 except TarError:
2575 return False
2576
Brett Cannon6cef0762007-05-25 20:17:15 +00002577bltn_open = open
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002578open = TarFile.open