blob: db5ff7f9a4c148ac590a9b7e373e2b83fd82d36d [file] [log] [blame]
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001# -*- coding: iso-8859-1 -*-
2#-------------------------------------------------------------------
3# tarfile.py
4#-------------------------------------------------------------------
5# Copyright (C) 2002 Lars Gustäbel <lars@gustaebel.de>
6# All rights reserved.
7#
8# Permission is hereby granted, free of charge, to any person
9# obtaining a copy of this software and associated documentation
10# files (the "Software"), to deal in the Software without
11# restriction, including without limitation the rights to use,
12# copy, modify, merge, publish, distribute, sublicense, and/or sell
13# copies of the Software, and to permit persons to whom the
14# Software is furnished to do so, subject to the following
15# conditions:
16#
17# The above copyright notice and this permission notice shall be
18# included in all copies or substantial portions of the Software.
19#
20# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
21# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
22# OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
23# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
24# HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
25# WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
26# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
27# OTHER DEALINGS IN THE SOFTWARE.
28#
29"""Read from and write to tar format archives.
30"""
31
Senthil Kumaran4af1c6a2011-07-28 22:30:27 +080032__version__ = "$Revision: 85213 $"
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +000033# $Source$
34
Lars Gustäbelc64e4022007-03-13 10:47:19 +000035version = "0.9.0"
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +000036__author__ = "Lars Gustäbel (lars@gustaebel.de)"
37__date__ = "$Date$"
38__cvsid__ = "$Id$"
39__credits__ = "Gustavo Niemeyer, Niels Gustäbel, Richard Townsend."
40
41#---------
42# Imports
43#---------
44import sys
45import os
46import shutil
47import stat
48import errno
49import time
50import struct
Georg Brandl3354f282006-10-29 09:16:12 +000051import copy
Lars Gustäbelc64e4022007-03-13 10:47:19 +000052import re
Brett Cannon132fc542008-08-04 21:23:07 +000053import operator
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +000054
55try:
56 import grp, pwd
57except ImportError:
58 grp = pwd = None
59
60# from tarfile import *
61__all__ = ["TarFile", "TarInfo", "is_tarfile", "TarError"]
62
63#---------------------------------------------------------
64# tar constants
65#---------------------------------------------------------
Lars Gustäbelc64e4022007-03-13 10:47:19 +000066NUL = "\0" # the null character
67BLOCKSIZE = 512 # length of processing blocks
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +000068RECORDSIZE = BLOCKSIZE * 20 # length of records
Lars Gustäbelc64e4022007-03-13 10:47:19 +000069GNU_MAGIC = "ustar \0" # magic gnu tar string
70POSIX_MAGIC = "ustar\x0000" # magic posix tar string
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +000071
Lars Gustäbelc64e4022007-03-13 10:47:19 +000072LENGTH_NAME = 100 # maximum length of a filename
73LENGTH_LINK = 100 # maximum length of a linkname
74LENGTH_PREFIX = 155 # maximum length of the prefix field
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +000075
Lars Gustäbelc64e4022007-03-13 10:47:19 +000076REGTYPE = "0" # regular file
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +000077AREGTYPE = "\0" # regular file
Lars Gustäbelc64e4022007-03-13 10:47:19 +000078LNKTYPE = "1" # link (inside tarfile)
79SYMTYPE = "2" # symbolic link
80CHRTYPE = "3" # character special device
81BLKTYPE = "4" # block special device
82DIRTYPE = "5" # directory
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +000083FIFOTYPE = "6" # fifo special device
84CONTTYPE = "7" # contiguous file
85
Lars Gustäbelc64e4022007-03-13 10:47:19 +000086GNUTYPE_LONGNAME = "L" # GNU tar longname
87GNUTYPE_LONGLINK = "K" # GNU tar longlink
88GNUTYPE_SPARSE = "S" # GNU tar sparse file
89
90XHDTYPE = "x" # POSIX.1-2001 extended header
91XGLTYPE = "g" # POSIX.1-2001 global header
92SOLARIS_XHDTYPE = "X" # Solaris extended header
93
94USTAR_FORMAT = 0 # POSIX.1-1988 (ustar) format
95GNU_FORMAT = 1 # GNU tar format
96PAX_FORMAT = 2 # POSIX.1-2001 (pax) format
97DEFAULT_FORMAT = GNU_FORMAT
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +000098
99#---------------------------------------------------------
100# tarfile constants
101#---------------------------------------------------------
Lars Gustäbelc64e4022007-03-13 10:47:19 +0000102# File types that tarfile supports:
103SUPPORTED_TYPES = (REGTYPE, AREGTYPE, LNKTYPE,
104 SYMTYPE, DIRTYPE, FIFOTYPE,
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000105 CONTTYPE, CHRTYPE, BLKTYPE,
106 GNUTYPE_LONGNAME, GNUTYPE_LONGLINK,
107 GNUTYPE_SPARSE)
108
Lars Gustäbelc64e4022007-03-13 10:47:19 +0000109# File types that will be treated as a regular file.
110REGULAR_TYPES = (REGTYPE, AREGTYPE,
111 CONTTYPE, GNUTYPE_SPARSE)
112
113# File types that are part of the GNU tar format.
114GNU_TYPES = (GNUTYPE_LONGNAME, GNUTYPE_LONGLINK,
115 GNUTYPE_SPARSE)
116
117# Fields from a pax header that override a TarInfo attribute.
118PAX_FIELDS = ("path", "linkpath", "size", "mtime",
119 "uid", "gid", "uname", "gname")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000120
Lars Gustäbela0fcb932007-05-27 19:49:30 +0000121# Fields in a pax header that are numbers, all other fields
122# are treated as strings.
123PAX_NUMBER_FIELDS = {
124 "atime": float,
125 "ctime": float,
126 "mtime": float,
127 "uid": int,
128 "gid": int,
129 "size": int
130}
131
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000132#---------------------------------------------------------
133# Bits used in the mode field, values in octal.
134#---------------------------------------------------------
135S_IFLNK = 0120000 # symbolic link
136S_IFREG = 0100000 # regular file
137S_IFBLK = 0060000 # block device
138S_IFDIR = 0040000 # directory
139S_IFCHR = 0020000 # character device
140S_IFIFO = 0010000 # fifo
141
142TSUID = 04000 # set UID on execution
143TSGID = 02000 # set GID on execution
144TSVTX = 01000 # reserved
145
146TUREAD = 0400 # read by owner
147TUWRITE = 0200 # write by owner
148TUEXEC = 0100 # execute/search by owner
149TGREAD = 0040 # read by group
150TGWRITE = 0020 # write by group
151TGEXEC = 0010 # execute/search by group
152TOREAD = 0004 # read by other
153TOWRITE = 0002 # write by other
154TOEXEC = 0001 # execute/search by other
155
156#---------------------------------------------------------
Lars Gustäbelc64e4022007-03-13 10:47:19 +0000157# initialization
158#---------------------------------------------------------
159ENCODING = sys.getfilesystemencoding()
160if ENCODING is None:
Lars Gustäbela0fcb932007-05-27 19:49:30 +0000161 ENCODING = sys.getdefaultencoding()
Lars Gustäbelc64e4022007-03-13 10:47:19 +0000162
163#---------------------------------------------------------
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000164# Some useful functions
165#---------------------------------------------------------
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000166
Georg Brandl38c6a222006-05-10 16:26:03 +0000167def stn(s, length):
168 """Convert a python string to a null-terminated string buffer.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000169 """
Georg Brandla32e0a02006-10-24 16:54:16 +0000170 return s[:length] + (length - len(s)) * NUL
Georg Brandl38c6a222006-05-10 16:26:03 +0000171
Lars Gustäbelc64e4022007-03-13 10:47:19 +0000172def nts(s):
173 """Convert a null-terminated string field to a python string.
174 """
175 # Use the string up to the first null char.
176 p = s.find("\0")
177 if p == -1:
178 return s
179 return s[:p]
180
Georg Brandl38c6a222006-05-10 16:26:03 +0000181def nti(s):
182 """Convert a number field to a python number.
183 """
184 # There are two possible encodings for a number field, see
185 # itn() below.
186 if s[0] != chr(0200):
Georg Brandlded1c4d2006-12-20 11:55:16 +0000187 try:
Lars Gustäbelc64e4022007-03-13 10:47:19 +0000188 n = int(nts(s) or "0", 8)
Georg Brandlded1c4d2006-12-20 11:55:16 +0000189 except ValueError:
Lars Gustäbeldd866d52009-11-22 18:30:53 +0000190 raise InvalidHeaderError("invalid header")
Georg Brandl38c6a222006-05-10 16:26:03 +0000191 else:
192 n = 0L
193 for i in xrange(len(s) - 1):
194 n <<= 8
195 n += ord(s[i + 1])
196 return n
197
Lars Gustäbelc64e4022007-03-13 10:47:19 +0000198def itn(n, digits=8, format=DEFAULT_FORMAT):
Georg Brandl38c6a222006-05-10 16:26:03 +0000199 """Convert a python number to a number field.
200 """
201 # POSIX 1003.1-1988 requires numbers to be encoded as a string of
202 # octal digits followed by a null-byte, this allows values up to
203 # (8**(digits-1))-1. GNU tar allows storing numbers greater than
204 # that if necessary. A leading 0200 byte indicates this particular
205 # encoding, the following digits-1 bytes are a big-endian
206 # representation. This allows values up to (256**(digits-1))-1.
207 if 0 <= n < 8 ** (digits - 1):
208 s = "%0*o" % (digits - 1, n) + NUL
209 else:
Lars Gustäbelc64e4022007-03-13 10:47:19 +0000210 if format != GNU_FORMAT or n >= 256 ** (digits - 1):
Georg Brandle4751e32006-05-18 06:11:19 +0000211 raise ValueError("overflow in number field")
Georg Brandl38c6a222006-05-10 16:26:03 +0000212
213 if n < 0:
214 # XXX We mimic GNU tar's behaviour with negative numbers,
215 # this could raise OverflowError.
216 n = struct.unpack("L", struct.pack("l", n))[0]
217
218 s = ""
219 for i in xrange(digits - 1):
220 s = chr(n & 0377) + s
221 n >>= 8
222 s = chr(0200) + s
223 return s
224
Lars Gustäbela0fcb932007-05-27 19:49:30 +0000225def uts(s, encoding, errors):
226 """Convert a unicode object to a string.
227 """
228 if errors == "utf-8":
229 # An extra error handler similar to the -o invalid=UTF-8 option
230 # in POSIX.1-2001. Replace untranslatable characters with their
231 # UTF-8 representation.
232 try:
233 return s.encode(encoding, "strict")
234 except UnicodeEncodeError:
235 x = []
236 for c in s:
237 try:
238 x.append(c.encode(encoding, "strict"))
239 except UnicodeEncodeError:
240 x.append(c.encode("utf8"))
241 return "".join(x)
242 else:
243 return s.encode(encoding, errors)
244
Georg Brandl38c6a222006-05-10 16:26:03 +0000245def calc_chksums(buf):
246 """Calculate the checksum for a member's header by summing up all
247 characters except for the chksum field which is treated as if
248 it was filled with spaces. According to the GNU tar sources,
249 some tars (Sun and NeXT) calculate chksum with signed char,
250 which will be different if there are chars in the buffer with
251 the high bit set. So we calculate two checksums, unsigned and
252 signed.
253 """
254 unsigned_chksum = 256 + sum(struct.unpack("148B", buf[:148]) + struct.unpack("356B", buf[156:512]))
255 signed_chksum = 256 + sum(struct.unpack("148b", buf[:148]) + struct.unpack("356b", buf[156:512]))
256 return unsigned_chksum, signed_chksum
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000257
258def copyfileobj(src, dst, length=None):
259 """Copy length bytes from fileobj src to fileobj dst.
260 If length is None, copy the entire content.
261 """
262 if length == 0:
263 return
264 if length is None:
265 shutil.copyfileobj(src, dst)
266 return
267
268 BUFSIZE = 16 * 1024
269 blocks, remainder = divmod(length, BUFSIZE)
270 for b in xrange(blocks):
271 buf = src.read(BUFSIZE)
272 if len(buf) < BUFSIZE:
Georg Brandle4751e32006-05-18 06:11:19 +0000273 raise IOError("end of file reached")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000274 dst.write(buf)
275
276 if remainder != 0:
277 buf = src.read(remainder)
278 if len(buf) < remainder:
Georg Brandle4751e32006-05-18 06:11:19 +0000279 raise IOError("end of file reached")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000280 dst.write(buf)
281 return
282
283filemode_table = (
Andrew M. Kuchling8bc462f2004-10-20 11:48:42 +0000284 ((S_IFLNK, "l"),
285 (S_IFREG, "-"),
286 (S_IFBLK, "b"),
287 (S_IFDIR, "d"),
288 (S_IFCHR, "c"),
289 (S_IFIFO, "p")),
290
291 ((TUREAD, "r"),),
292 ((TUWRITE, "w"),),
293 ((TUEXEC|TSUID, "s"),
294 (TSUID, "S"),
295 (TUEXEC, "x")),
296
297 ((TGREAD, "r"),),
298 ((TGWRITE, "w"),),
299 ((TGEXEC|TSGID, "s"),
300 (TSGID, "S"),
301 (TGEXEC, "x")),
302
303 ((TOREAD, "r"),),
304 ((TOWRITE, "w"),),
305 ((TOEXEC|TSVTX, "t"),
306 (TSVTX, "T"),
307 (TOEXEC, "x"))
308)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000309
310def filemode(mode):
311 """Convert a file's mode to a string of the form
312 -rwxrwxrwx.
313 Used by TarFile.list()
314 """
Andrew M. Kuchling8bc462f2004-10-20 11:48:42 +0000315 perm = []
316 for table in filemode_table:
317 for bit, char in table:
318 if mode & bit == bit:
319 perm.append(char)
320 break
321 else:
322 perm.append("-")
323 return "".join(perm)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000324
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000325class TarError(Exception):
326 """Base exception."""
327 pass
328class ExtractError(TarError):
329 """General exception for extract errors."""
330 pass
331class ReadError(TarError):
Ezio Melottif5469cf2013-08-17 15:43:51 +0300332 """Exception for unreadable tar archives."""
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000333 pass
334class CompressionError(TarError):
335 """Exception for unavailable compression methods."""
336 pass
337class StreamError(TarError):
338 """Exception for unsupported operations on stream-like TarFiles."""
339 pass
Georg Brandlebbeed72006-12-19 22:06:46 +0000340class HeaderError(TarError):
Lars Gustäbeldd866d52009-11-22 18:30:53 +0000341 """Base exception for header errors."""
342 pass
343class EmptyHeaderError(HeaderError):
344 """Exception for empty headers."""
345 pass
346class TruncatedHeaderError(HeaderError):
347 """Exception for truncated headers."""
348 pass
349class EOFHeaderError(HeaderError):
350 """Exception for end of file headers."""
351 pass
352class InvalidHeaderError(HeaderError):
Georg Brandlebbeed72006-12-19 22:06:46 +0000353 """Exception for invalid headers."""
354 pass
Lars Gustäbeldd866d52009-11-22 18:30:53 +0000355class SubsequentHeaderError(HeaderError):
356 """Exception for missing and invalid extended headers."""
357 pass
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000358
359#---------------------------
360# internal stream interface
361#---------------------------
362class _LowLevelFile:
363 """Low-level file object. Supports reading and writing.
364 It is used instead of a regular file object for streaming
365 access.
366 """
367
368 def __init__(self, name, mode):
369 mode = {
370 "r": os.O_RDONLY,
371 "w": os.O_WRONLY | os.O_CREAT | os.O_TRUNC,
372 }[mode]
373 if hasattr(os, "O_BINARY"):
374 mode |= os.O_BINARY
Lars Gustäbel5c4c4612010-04-29 15:23:38 +0000375 self.fd = os.open(name, mode, 0666)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000376
377 def close(self):
378 os.close(self.fd)
379
380 def read(self, size):
381 return os.read(self.fd, size)
382
383 def write(self, s):
384 os.write(self.fd, s)
385
386class _Stream:
387 """Class that serves as an adapter between TarFile and
388 a stream-like object. The stream-like object only
389 needs to have a read() or write() method and is accessed
390 blockwise. Use of gzip or bzip2 compression is possible.
391 A stream-like object could be for example: sys.stdin,
392 sys.stdout, a socket, a tape device etc.
393
394 _Stream is intended to be used only internally.
395 """
396
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000397 def __init__(self, name, mode, comptype, fileobj, bufsize):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000398 """Construct a _Stream object.
399 """
400 self._extfileobj = True
401 if fileobj is None:
402 fileobj = _LowLevelFile(name, mode)
403 self._extfileobj = False
404
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000405 if comptype == '*':
406 # Enable transparent compression detection for the
407 # stream interface
408 fileobj = _StreamProxy(fileobj)
409 comptype = fileobj.getcomptype()
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000410
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000411 self.name = name or ""
412 self.mode = mode
413 self.comptype = comptype
414 self.fileobj = fileobj
415 self.bufsize = bufsize
416 self.buf = ""
417 self.pos = 0L
418 self.closed = False
419
420 if comptype == "gz":
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000421 try:
422 import zlib
423 except ImportError:
Georg Brandle4751e32006-05-18 06:11:19 +0000424 raise CompressionError("zlib module is not available")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000425 self.zlib = zlib
Gregory P. Smith88440962008-03-25 06:12:45 +0000426 self.crc = zlib.crc32("") & 0xffffffffL
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000427 if mode == "r":
428 self._init_read_gz()
429 else:
430 self._init_write_gz()
431
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000432 if comptype == "bz2":
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000433 try:
434 import bz2
435 except ImportError:
Georg Brandle4751e32006-05-18 06:11:19 +0000436 raise CompressionError("bz2 module is not available")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000437 if mode == "r":
438 self.dbuf = ""
439 self.cmp = bz2.BZ2Decompressor()
440 else:
441 self.cmp = bz2.BZ2Compressor()
442
443 def __del__(self):
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000444 if hasattr(self, "closed") and not self.closed:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000445 self.close()
446
447 def _init_write_gz(self):
448 """Initialize for writing with gzip compression.
449 """
450 self.cmp = self.zlib.compressobj(9, self.zlib.DEFLATED,
451 -self.zlib.MAX_WBITS,
452 self.zlib.DEF_MEM_LEVEL,
453 0)
454 timestamp = struct.pack("<L", long(time.time()))
455 self.__write("\037\213\010\010%s\002\377" % timestamp)
Lars Gustäbel7d4d0742011-12-21 19:27:50 +0100456 if type(self.name) is unicode:
457 self.name = self.name.encode("iso-8859-1", "replace")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000458 if self.name.endswith(".gz"):
459 self.name = self.name[:-3]
460 self.__write(self.name + NUL)
461
462 def write(self, s):
463 """Write string s to the stream.
464 """
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000465 if self.comptype == "gz":
Gregory P. Smith88440962008-03-25 06:12:45 +0000466 self.crc = self.zlib.crc32(s, self.crc) & 0xffffffffL
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000467 self.pos += len(s)
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000468 if self.comptype != "tar":
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000469 s = self.cmp.compress(s)
470 self.__write(s)
471
472 def __write(self, s):
473 """Write string s to the stream if a whole new block
474 is ready to be written.
475 """
476 self.buf += s
477 while len(self.buf) > self.bufsize:
478 self.fileobj.write(self.buf[:self.bufsize])
479 self.buf = self.buf[self.bufsize:]
480
481 def close(self):
482 """Close the _Stream object. No operation should be
483 done on it afterwards.
484 """
485 if self.closed:
486 return
487
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000488 if self.mode == "w" and self.comptype != "tar":
Martin v. Löwisc234a522004-08-22 21:28:33 +0000489 self.buf += self.cmp.flush()
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000490
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000491 if self.mode == "w" and self.buf:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000492 self.fileobj.write(self.buf)
493 self.buf = ""
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000494 if self.comptype == "gz":
Tim Petersa05f6e22006-08-02 05:20:08 +0000495 # The native zlib crc is an unsigned 32-bit integer, but
496 # the Python wrapper implicitly casts that to a signed C
497 # long. So, on a 32-bit box self.crc may "look negative",
498 # while the same crc on a 64-bit box may "look positive".
499 # To avoid irksome warnings from the `struct` module, force
500 # it to look positive on all boxes.
501 self.fileobj.write(struct.pack("<L", self.crc & 0xffffffffL))
Andrew M. Kuchling10a44492003-10-24 17:38:34 +0000502 self.fileobj.write(struct.pack("<L", self.pos & 0xffffFFFFL))
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000503
504 if not self._extfileobj:
505 self.fileobj.close()
506
507 self.closed = True
508
509 def _init_read_gz(self):
510 """Initialize for reading a gzip compressed fileobj.
511 """
512 self.cmp = self.zlib.decompressobj(-self.zlib.MAX_WBITS)
513 self.dbuf = ""
514
515 # taken from gzip.GzipFile with some alterations
516 if self.__read(2) != "\037\213":
Georg Brandle4751e32006-05-18 06:11:19 +0000517 raise ReadError("not a gzip file")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000518 if self.__read(1) != "\010":
Georg Brandle4751e32006-05-18 06:11:19 +0000519 raise CompressionError("unsupported compression method")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000520
521 flag = ord(self.__read(1))
522 self.__read(6)
523
524 if flag & 4:
525 xlen = ord(self.__read(1)) + 256 * ord(self.__read(1))
526 self.read(xlen)
527 if flag & 8:
528 while True:
529 s = self.__read(1)
530 if not s or s == NUL:
531 break
532 if flag & 16:
533 while True:
534 s = self.__read(1)
535 if not s or s == NUL:
536 break
537 if flag & 2:
538 self.__read(2)
539
540 def tell(self):
541 """Return the stream's file pointer position.
542 """
543 return self.pos
544
545 def seek(self, pos=0):
546 """Set the stream's file pointer to pos. Negative seeking
547 is forbidden.
548 """
549 if pos - self.pos >= 0:
550 blocks, remainder = divmod(pos - self.pos, self.bufsize)
551 for i in xrange(blocks):
552 self.read(self.bufsize)
553 self.read(remainder)
554 else:
Georg Brandle4751e32006-05-18 06:11:19 +0000555 raise StreamError("seeking backwards is not allowed")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000556 return self.pos
557
558 def read(self, size=None):
559 """Return the next size number of bytes from the stream.
560 If size is not defined, return all bytes of the stream
561 up to EOF.
562 """
563 if size is None:
564 t = []
565 while True:
566 buf = self._read(self.bufsize)
567 if not buf:
568 break
569 t.append(buf)
570 buf = "".join(t)
571 else:
572 buf = self._read(size)
573 self.pos += len(buf)
574 return buf
575
576 def _read(self, size):
577 """Return size bytes from the stream.
578 """
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000579 if self.comptype == "tar":
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000580 return self.__read(size)
581
582 c = len(self.dbuf)
583 t = [self.dbuf]
584 while c < size:
585 buf = self.__read(self.bufsize)
586 if not buf:
587 break
Lars Gustäbelc64e4022007-03-13 10:47:19 +0000588 try:
589 buf = self.cmp.decompress(buf)
590 except IOError:
591 raise ReadError("invalid compressed data")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000592 t.append(buf)
593 c += len(buf)
594 t = "".join(t)
595 self.dbuf = t[size:]
596 return t[:size]
597
598 def __read(self, size):
599 """Return size bytes from stream. If internal buffer is empty,
600 read another block from the stream.
601 """
602 c = len(self.buf)
603 t = [self.buf]
604 while c < size:
605 buf = self.fileobj.read(self.bufsize)
606 if not buf:
607 break
608 t.append(buf)
609 c += len(buf)
610 t = "".join(t)
611 self.buf = t[size:]
612 return t[:size]
613# class _Stream
614
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000615class _StreamProxy(object):
616 """Small proxy class that enables transparent compression
617 detection for the Stream interface (mode 'r|*').
618 """
619
620 def __init__(self, fileobj):
621 self.fileobj = fileobj
622 self.buf = self.fileobj.read(BLOCKSIZE)
623
624 def read(self, size):
625 self.read = self.fileobj.read
626 return self.buf
627
628 def getcomptype(self):
629 if self.buf.startswith("\037\213\010"):
630 return "gz"
Lars Gustäbel9a388632011-12-06 13:07:09 +0100631 if self.buf[0:3] == "BZh" and self.buf[4:10] == "1AY&SY":
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000632 return "bz2"
633 return "tar"
634
635 def close(self):
636 self.fileobj.close()
637# class StreamProxy
638
Georg Brandl49c8f4c2006-05-15 19:30:35 +0000639class _BZ2Proxy(object):
640 """Small proxy class that enables external file object
641 support for "r:bz2" and "w:bz2" modes. This is actually
642 a workaround for a limitation in bz2 module's BZ2File
643 class which (unlike gzip.GzipFile) has no support for
644 a file object argument.
645 """
646
647 blocksize = 16 * 1024
648
649 def __init__(self, fileobj, mode):
650 self.fileobj = fileobj
651 self.mode = mode
Lars Gustäbelc64e4022007-03-13 10:47:19 +0000652 self.name = getattr(self.fileobj, "name", None)
Georg Brandl49c8f4c2006-05-15 19:30:35 +0000653 self.init()
654
655 def init(self):
656 import bz2
657 self.pos = 0
658 if self.mode == "r":
659 self.bz2obj = bz2.BZ2Decompressor()
660 self.fileobj.seek(0)
661 self.buf = ""
662 else:
663 self.bz2obj = bz2.BZ2Compressor()
664
665 def read(self, size):
666 b = [self.buf]
667 x = len(self.buf)
668 while x < size:
Lars Gustäbel2020a592009-03-22 20:09:33 +0000669 raw = self.fileobj.read(self.blocksize)
670 if not raw:
Georg Brandl49c8f4c2006-05-15 19:30:35 +0000671 break
Lars Gustäbel2020a592009-03-22 20:09:33 +0000672 data = self.bz2obj.decompress(raw)
673 b.append(data)
Georg Brandl49c8f4c2006-05-15 19:30:35 +0000674 x += len(data)
675 self.buf = "".join(b)
676
677 buf = self.buf[:size]
678 self.buf = self.buf[size:]
679 self.pos += len(buf)
680 return buf
681
682 def seek(self, pos):
683 if pos < self.pos:
684 self.init()
685 self.read(pos - self.pos)
686
687 def tell(self):
688 return self.pos
689
690 def write(self, data):
691 self.pos += len(data)
692 raw = self.bz2obj.compress(data)
693 self.fileobj.write(raw)
694
695 def close(self):
696 if self.mode == "w":
697 raw = self.bz2obj.flush()
698 self.fileobj.write(raw)
Georg Brandl49c8f4c2006-05-15 19:30:35 +0000699# class _BZ2Proxy
700
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000701#------------------------
702# Extraction file object
703#------------------------
Lars Gustäbel6baa5022006-12-23 16:40:13 +0000704class _FileInFile(object):
705 """A thin wrapper around an existing file object that
706 provides a part of its data as an individual file
707 object.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000708 """
709
Lars Gustäbel6baa5022006-12-23 16:40:13 +0000710 def __init__(self, fileobj, offset, size, sparse=None):
711 self.fileobj = fileobj
712 self.offset = offset
713 self.size = size
714 self.sparse = sparse
715 self.position = 0
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000716
Lars Gustäbel6baa5022006-12-23 16:40:13 +0000717 def tell(self):
718 """Return the current file position.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000719 """
Lars Gustäbel6baa5022006-12-23 16:40:13 +0000720 return self.position
721
722 def seek(self, position):
723 """Seek to a position in the file.
724 """
725 self.position = position
726
727 def read(self, size=None):
728 """Read data from the file.
729 """
730 if size is None:
731 size = self.size - self.position
732 else:
733 size = min(size, self.size - self.position)
734
735 if self.sparse is None:
736 return self.readnormal(size)
737 else:
738 return self.readsparse(size)
739
740 def readnormal(self, size):
741 """Read operation for regular files.
742 """
743 self.fileobj.seek(self.offset + self.position)
744 self.position += size
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000745 return self.fileobj.read(size)
746
Lars Gustäbel6baa5022006-12-23 16:40:13 +0000747 def readsparse(self, size):
748 """Read operation for sparse files.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000749 """
Lars Gustäbel6baa5022006-12-23 16:40:13 +0000750 data = []
751 while size > 0:
752 buf = self.readsparsesection(size)
753 if not buf:
754 break
755 size -= len(buf)
756 data.append(buf)
757 return "".join(data)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000758
Lars Gustäbel6baa5022006-12-23 16:40:13 +0000759 def readsparsesection(self, size):
760 """Read a single section of a sparse file.
761 """
762 section = self.sparse.find(self.position)
763
764 if section is None:
765 return ""
766
767 size = min(size, section.offset + section.size - self.position)
768
769 if isinstance(section, _data):
770 realpos = section.realpos + self.position - section.offset
771 self.fileobj.seek(self.offset + realpos)
772 self.position += size
773 return self.fileobj.read(size)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000774 else:
Lars Gustäbel6baa5022006-12-23 16:40:13 +0000775 self.position += size
776 return NUL * size
777#class _FileInFile
778
779
780class ExFileObject(object):
781 """File-like object for reading an archive member.
782 Is returned by TarFile.extractfile().
783 """
784 blocksize = 1024
785
786 def __init__(self, tarfile, tarinfo):
787 self.fileobj = _FileInFile(tarfile.fileobj,
788 tarinfo.offset_data,
789 tarinfo.size,
790 getattr(tarinfo, "sparse", None))
791 self.name = tarinfo.name
792 self.mode = "r"
793 self.closed = False
794 self.size = tarinfo.size
795
796 self.position = 0
797 self.buffer = ""
798
799 def read(self, size=None):
800 """Read at most size bytes from the file. If size is not
801 present or None, read all data until EOF is reached.
802 """
803 if self.closed:
804 raise ValueError("I/O operation on closed file")
805
806 buf = ""
807 if self.buffer:
808 if size is None:
809 buf = self.buffer
810 self.buffer = ""
811 else:
812 buf = self.buffer[:size]
813 self.buffer = self.buffer[size:]
814
815 if size is None:
816 buf += self.fileobj.read()
817 else:
818 buf += self.fileobj.read(size - len(buf))
819
820 self.position += len(buf)
821 return buf
822
823 def readline(self, size=-1):
824 """Read one entire line from the file. If size is present
825 and non-negative, return a string with at most that
826 size, which may be an incomplete line.
827 """
828 if self.closed:
829 raise ValueError("I/O operation on closed file")
830
831 if "\n" in self.buffer:
832 pos = self.buffer.find("\n") + 1
833 else:
834 buffers = [self.buffer]
835 while True:
836 buf = self.fileobj.read(self.blocksize)
837 buffers.append(buf)
838 if not buf or "\n" in buf:
839 self.buffer = "".join(buffers)
840 pos = self.buffer.find("\n") + 1
841 if pos == 0:
842 # no newline found.
843 pos = len(self.buffer)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000844 break
Lars Gustäbel6baa5022006-12-23 16:40:13 +0000845
846 if size != -1:
847 pos = min(size, pos)
848
849 buf = self.buffer[:pos]
850 self.buffer = self.buffer[pos:]
851 self.position += len(buf)
852 return buf
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000853
854 def readlines(self):
Lars Gustäbel6baa5022006-12-23 16:40:13 +0000855 """Return a list with all remaining lines.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000856 """
857 result = []
858 while True:
859 line = self.readline()
860 if not line: break
861 result.append(line)
862 return result
863
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000864 def tell(self):
865 """Return the current file position.
866 """
Lars Gustäbel6baa5022006-12-23 16:40:13 +0000867 if self.closed:
868 raise ValueError("I/O operation on closed file")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000869
Lars Gustäbel6baa5022006-12-23 16:40:13 +0000870 return self.position
871
872 def seek(self, pos, whence=os.SEEK_SET):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000873 """Seek to a position in the file.
874 """
Lars Gustäbel6baa5022006-12-23 16:40:13 +0000875 if self.closed:
876 raise ValueError("I/O operation on closed file")
877
878 if whence == os.SEEK_SET:
879 self.position = min(max(pos, 0), self.size)
880 elif whence == os.SEEK_CUR:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000881 if pos < 0:
Lars Gustäbel6baa5022006-12-23 16:40:13 +0000882 self.position = max(self.position + pos, 0)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000883 else:
Lars Gustäbel6baa5022006-12-23 16:40:13 +0000884 self.position = min(self.position + pos, self.size)
885 elif whence == os.SEEK_END:
886 self.position = max(min(self.size + pos, self.size), 0)
887 else:
888 raise ValueError("Invalid argument")
889
890 self.buffer = ""
891 self.fileobj.seek(self.position)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000892
893 def close(self):
894 """Close the file object.
895 """
896 self.closed = True
Martin v. Löwisdf241532005-03-03 08:17:42 +0000897
898 def __iter__(self):
Lars Gustäbel6baa5022006-12-23 16:40:13 +0000899 """Get an iterator over the file's lines.
Martin v. Löwisdf241532005-03-03 08:17:42 +0000900 """
Lars Gustäbel6baa5022006-12-23 16:40:13 +0000901 while True:
902 line = self.readline()
903 if not line:
904 break
905 yield line
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000906#class ExFileObject
907
908#------------------
909# Exported Classes
910#------------------
911class TarInfo(object):
912 """Informational class which holds the details about an
913 archive member given by a tar header block.
914 TarInfo objects are returned by TarFile.getmember(),
915 TarFile.getmembers() and TarFile.gettarinfo() and are
916 usually created internally.
917 """
918
919 def __init__(self, name=""):
920 """Construct a TarInfo object. name is the optional name
921 of the member.
922 """
Lars Gustäbelc64e4022007-03-13 10:47:19 +0000923 self.name = name # member name
924 self.mode = 0644 # file permissions
Georg Brandl38c6a222006-05-10 16:26:03 +0000925 self.uid = 0 # user id
926 self.gid = 0 # group id
927 self.size = 0 # file size
928 self.mtime = 0 # modification time
929 self.chksum = 0 # header checksum
930 self.type = REGTYPE # member type
931 self.linkname = "" # link name
Lars Gustäbel6aab8d02010-10-04 15:37:53 +0000932 self.uname = "" # user name
933 self.gname = "" # group name
Georg Brandl38c6a222006-05-10 16:26:03 +0000934 self.devmajor = 0 # device major number
935 self.devminor = 0 # device minor number
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000936
Georg Brandl38c6a222006-05-10 16:26:03 +0000937 self.offset = 0 # the tar header starts here
938 self.offset_data = 0 # the file's data starts here
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000939
Lars Gustäbelc64e4022007-03-13 10:47:19 +0000940 self.pax_headers = {} # pax header information
941
942 # In pax headers the "name" and "linkname" field are called
943 # "path" and "linkpath".
944 def _getpath(self):
945 return self.name
946 def _setpath(self, name):
947 self.name = name
948 path = property(_getpath, _setpath)
949
950 def _getlinkpath(self):
951 return self.linkname
952 def _setlinkpath(self, linkname):
953 self.linkname = linkname
954 linkpath = property(_getlinkpath, _setlinkpath)
955
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000956 def __repr__(self):
957 return "<%s %r at %#x>" % (self.__class__.__name__,self.name,id(self))
958
Lars Gustäbela0fcb932007-05-27 19:49:30 +0000959 def get_info(self, encoding, errors):
Lars Gustäbelc64e4022007-03-13 10:47:19 +0000960 """Return the TarInfo's attributes as a dictionary.
961 """
962 info = {
Lars Gustäbelf7cda522009-08-28 19:23:44 +0000963 "name": self.name,
Lars Gustäbelc64e4022007-03-13 10:47:19 +0000964 "mode": self.mode & 07777,
965 "uid": self.uid,
966 "gid": self.gid,
967 "size": self.size,
968 "mtime": self.mtime,
969 "chksum": self.chksum,
970 "type": self.type,
Lars Gustäbelf7cda522009-08-28 19:23:44 +0000971 "linkname": self.linkname,
Lars Gustäbelc64e4022007-03-13 10:47:19 +0000972 "uname": self.uname,
973 "gname": self.gname,
974 "devmajor": self.devmajor,
975 "devminor": self.devminor
976 }
977
978 if info["type"] == DIRTYPE and not info["name"].endswith("/"):
979 info["name"] += "/"
980
Lars Gustäbela0fcb932007-05-27 19:49:30 +0000981 for key in ("name", "linkname", "uname", "gname"):
982 if type(info[key]) is unicode:
983 info[key] = info[key].encode(encoding, errors)
984
Lars Gustäbelc64e4022007-03-13 10:47:19 +0000985 return info
986
Lars Gustäbela0fcb932007-05-27 19:49:30 +0000987 def tobuf(self, format=DEFAULT_FORMAT, encoding=ENCODING, errors="strict"):
Lars Gustäbelc64e4022007-03-13 10:47:19 +0000988 """Return a tar header as a string of 512 byte blocks.
989 """
Lars Gustäbela0fcb932007-05-27 19:49:30 +0000990 info = self.get_info(encoding, errors)
991
Lars Gustäbelc64e4022007-03-13 10:47:19 +0000992 if format == USTAR_FORMAT:
Lars Gustäbela0fcb932007-05-27 19:49:30 +0000993 return self.create_ustar_header(info)
Lars Gustäbelc64e4022007-03-13 10:47:19 +0000994 elif format == GNU_FORMAT:
Lars Gustäbela0fcb932007-05-27 19:49:30 +0000995 return self.create_gnu_header(info)
Lars Gustäbelc64e4022007-03-13 10:47:19 +0000996 elif format == PAX_FORMAT:
Lars Gustäbela0fcb932007-05-27 19:49:30 +0000997 return self.create_pax_header(info, encoding, errors)
Lars Gustäbelc64e4022007-03-13 10:47:19 +0000998 else:
999 raise ValueError("invalid format")
1000
Lars Gustäbela0fcb932007-05-27 19:49:30 +00001001 def create_ustar_header(self, info):
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001002 """Return the object as a ustar header block.
1003 """
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001004 info["magic"] = POSIX_MAGIC
1005
1006 if len(info["linkname"]) > LENGTH_LINK:
1007 raise ValueError("linkname is too long")
1008
1009 if len(info["name"]) > LENGTH_NAME:
1010 info["prefix"], info["name"] = self._posix_split_name(info["name"])
1011
1012 return self._create_header(info, USTAR_FORMAT)
1013
Lars Gustäbela0fcb932007-05-27 19:49:30 +00001014 def create_gnu_header(self, info):
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001015 """Return the object as a GNU header block sequence.
1016 """
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001017 info["magic"] = GNU_MAGIC
1018
1019 buf = ""
1020 if len(info["linkname"]) > LENGTH_LINK:
1021 buf += self._create_gnu_long_header(info["linkname"], GNUTYPE_LONGLINK)
1022
1023 if len(info["name"]) > LENGTH_NAME:
1024 buf += self._create_gnu_long_header(info["name"], GNUTYPE_LONGNAME)
1025
1026 return buf + self._create_header(info, GNU_FORMAT)
1027
Lars Gustäbela0fcb932007-05-27 19:49:30 +00001028 def create_pax_header(self, info, encoding, errors):
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001029 """Return the object as a ustar header block. If it cannot be
1030 represented this way, prepend a pax extended header sequence
1031 with supplement information.
1032 """
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001033 info["magic"] = POSIX_MAGIC
1034 pax_headers = self.pax_headers.copy()
1035
1036 # Test string fields for values that exceed the field length or cannot
1037 # be represented in ASCII encoding.
1038 for name, hname, length in (
1039 ("name", "path", LENGTH_NAME), ("linkname", "linkpath", LENGTH_LINK),
1040 ("uname", "uname", 32), ("gname", "gname", 32)):
1041
Lars Gustäbela0fcb932007-05-27 19:49:30 +00001042 if hname in pax_headers:
1043 # The pax header has priority.
1044 continue
1045
1046 val = info[name].decode(encoding, errors)
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001047
1048 # Try to encode the string as ASCII.
1049 try:
1050 val.encode("ascii")
1051 except UnicodeEncodeError:
1052 pax_headers[hname] = val
1053 continue
1054
Lars Gustäbela0fcb932007-05-27 19:49:30 +00001055 if len(info[name]) > length:
1056 pax_headers[hname] = val
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001057
1058 # Test number fields for values that exceed the field limit or values
1059 # that like to be stored as float.
1060 for name, digits in (("uid", 8), ("gid", 8), ("size", 12), ("mtime", 12)):
Lars Gustäbela0fcb932007-05-27 19:49:30 +00001061 if name in pax_headers:
1062 # The pax header has priority. Avoid overflow.
1063 info[name] = 0
1064 continue
1065
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001066 val = info[name]
1067 if not 0 <= val < 8 ** (digits - 1) or isinstance(val, float):
1068 pax_headers[name] = unicode(val)
1069 info[name] = 0
1070
Lars Gustäbela0fcb932007-05-27 19:49:30 +00001071 # Create a pax extended header if necessary.
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001072 if pax_headers:
1073 buf = self._create_pax_generic_header(pax_headers)
1074 else:
1075 buf = ""
1076
1077 return buf + self._create_header(info, USTAR_FORMAT)
1078
1079 @classmethod
Lars Gustäbela0fcb932007-05-27 19:49:30 +00001080 def create_pax_global_header(cls, pax_headers):
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001081 """Return the object as a pax global header block sequence.
1082 """
Lars Gustäbela0fcb932007-05-27 19:49:30 +00001083 return cls._create_pax_generic_header(pax_headers, type=XGLTYPE)
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001084
1085 def _posix_split_name(self, name):
1086 """Split a name longer than 100 chars into a prefix
1087 and a name part.
1088 """
1089 prefix = name[:LENGTH_PREFIX + 1]
1090 while prefix and prefix[-1] != "/":
1091 prefix = prefix[:-1]
1092
1093 name = name[len(prefix):]
1094 prefix = prefix[:-1]
1095
1096 if not prefix or len(name) > LENGTH_NAME:
1097 raise ValueError("name is too long")
1098 return prefix, name
1099
1100 @staticmethod
1101 def _create_header(info, format):
1102 """Return a header block. info is a dictionary with file
1103 information, format must be one of the *_FORMAT constants.
1104 """
1105 parts = [
1106 stn(info.get("name", ""), 100),
1107 itn(info.get("mode", 0) & 07777, 8, format),
1108 itn(info.get("uid", 0), 8, format),
1109 itn(info.get("gid", 0), 8, format),
1110 itn(info.get("size", 0), 12, format),
1111 itn(info.get("mtime", 0), 12, format),
1112 " ", # checksum field
1113 info.get("type", REGTYPE),
1114 stn(info.get("linkname", ""), 100),
Lars Gustäbela0fcb932007-05-27 19:49:30 +00001115 stn(info.get("magic", POSIX_MAGIC), 8),
Lars Gustäbel6aab8d02010-10-04 15:37:53 +00001116 stn(info.get("uname", ""), 32),
1117 stn(info.get("gname", ""), 32),
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001118 itn(info.get("devmajor", 0), 8, format),
1119 itn(info.get("devminor", 0), 8, format),
1120 stn(info.get("prefix", ""), 155)
1121 ]
1122
1123 buf = struct.pack("%ds" % BLOCKSIZE, "".join(parts))
1124 chksum = calc_chksums(buf[-BLOCKSIZE:])[0]
1125 buf = buf[:-364] + "%06o\0" % chksum + buf[-357:]
1126 return buf
1127
1128 @staticmethod
1129 def _create_payload(payload):
1130 """Return the string payload filled with zero bytes
1131 up to the next 512 byte border.
1132 """
1133 blocks, remainder = divmod(len(payload), BLOCKSIZE)
1134 if remainder > 0:
1135 payload += (BLOCKSIZE - remainder) * NUL
1136 return payload
1137
1138 @classmethod
1139 def _create_gnu_long_header(cls, name, type):
1140 """Return a GNUTYPE_LONGNAME or GNUTYPE_LONGLINK sequence
1141 for name.
1142 """
1143 name += NUL
1144
1145 info = {}
1146 info["name"] = "././@LongLink"
1147 info["type"] = type
1148 info["size"] = len(name)
1149 info["magic"] = GNU_MAGIC
1150
1151 # create extended header + name blocks.
1152 return cls._create_header(info, USTAR_FORMAT) + \
1153 cls._create_payload(name)
1154
1155 @classmethod
1156 def _create_pax_generic_header(cls, pax_headers, type=XHDTYPE):
1157 """Return a POSIX.1-2001 extended or global header sequence
1158 that contains a list of keyword, value pairs. The values
1159 must be unicode objects.
1160 """
1161 records = []
1162 for keyword, value in pax_headers.iteritems():
1163 keyword = keyword.encode("utf8")
1164 value = value.encode("utf8")
1165 l = len(keyword) + len(value) + 3 # ' ' + '=' + '\n'
1166 n = p = 0
1167 while True:
1168 n = l + len(str(p))
1169 if n == p:
1170 break
1171 p = n
1172 records.append("%d %s=%s\n" % (p, keyword, value))
1173 records = "".join(records)
1174
1175 # We use a hardcoded "././@PaxHeader" name like star does
1176 # instead of the one that POSIX recommends.
1177 info = {}
1178 info["name"] = "././@PaxHeader"
1179 info["type"] = type
1180 info["size"] = len(records)
1181 info["magic"] = POSIX_MAGIC
1182
1183 # Create pax header + record blocks.
1184 return cls._create_header(info, USTAR_FORMAT) + \
1185 cls._create_payload(records)
1186
Guido van Rossum75b64e62005-01-16 00:16:11 +00001187 @classmethod
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001188 def frombuf(cls, buf):
1189 """Construct a TarInfo object from a 512 byte string buffer.
1190 """
Lars Gustäbeldd866d52009-11-22 18:30:53 +00001191 if len(buf) == 0:
1192 raise EmptyHeaderError("empty header")
Georg Brandl38c6a222006-05-10 16:26:03 +00001193 if len(buf) != BLOCKSIZE:
Lars Gustäbeldd866d52009-11-22 18:30:53 +00001194 raise TruncatedHeaderError("truncated header")
Georg Brandl38c6a222006-05-10 16:26:03 +00001195 if buf.count(NUL) == BLOCKSIZE:
Lars Gustäbeldd866d52009-11-22 18:30:53 +00001196 raise EOFHeaderError("end of file header")
Georg Brandlebbeed72006-12-19 22:06:46 +00001197
Georg Brandlded1c4d2006-12-20 11:55:16 +00001198 chksum = nti(buf[148:156])
Georg Brandlebbeed72006-12-19 22:06:46 +00001199 if chksum not in calc_chksums(buf):
Lars Gustäbeldd866d52009-11-22 18:30:53 +00001200 raise InvalidHeaderError("bad checksum")
Georg Brandl38c6a222006-05-10 16:26:03 +00001201
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001202 obj = cls()
1203 obj.buf = buf
1204 obj.name = nts(buf[0:100])
1205 obj.mode = nti(buf[100:108])
1206 obj.uid = nti(buf[108:116])
1207 obj.gid = nti(buf[116:124])
1208 obj.size = nti(buf[124:136])
1209 obj.mtime = nti(buf[136:148])
1210 obj.chksum = chksum
1211 obj.type = buf[156:157]
1212 obj.linkname = nts(buf[157:257])
1213 obj.uname = nts(buf[265:297])
1214 obj.gname = nts(buf[297:329])
1215 obj.devmajor = nti(buf[329:337])
1216 obj.devminor = nti(buf[337:345])
1217 prefix = nts(buf[345:500])
Georg Brandl3354f282006-10-29 09:16:12 +00001218
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001219 # Old V7 tar format represents a directory as a regular
1220 # file with a trailing slash.
1221 if obj.type == AREGTYPE and obj.name.endswith("/"):
1222 obj.type = DIRTYPE
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001223
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001224 # Remove redundant slashes from directories.
1225 if obj.isdir():
1226 obj.name = obj.name.rstrip("/")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001227
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001228 # Reconstruct a ustar longname.
1229 if prefix and obj.type not in GNU_TYPES:
1230 obj.name = prefix + "/" + obj.name
1231 return obj
1232
1233 @classmethod
1234 def fromtarfile(cls, tarfile):
1235 """Return the next TarInfo object from TarFile object
1236 tarfile.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001237 """
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001238 buf = tarfile.fileobj.read(BLOCKSIZE)
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001239 obj = cls.frombuf(buf)
1240 obj.offset = tarfile.fileobj.tell() - BLOCKSIZE
1241 return obj._proc_member(tarfile)
Georg Brandl3354f282006-10-29 09:16:12 +00001242
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001243 #--------------------------------------------------------------------------
1244 # The following are methods that are called depending on the type of a
1245 # member. The entry point is _proc_member() which can be overridden in a
1246 # subclass to add custom _proc_*() methods. A _proc_*() method MUST
1247 # implement the following
1248 # operations:
1249 # 1. Set self.offset_data to the position where the data blocks begin,
1250 # if there is data that follows.
1251 # 2. Set tarfile.offset to the position where the next member's header will
1252 # begin.
1253 # 3. Return self or another valid TarInfo object.
1254 def _proc_member(self, tarfile):
1255 """Choose the right processing method depending on
1256 the type and call it.
Georg Brandl3354f282006-10-29 09:16:12 +00001257 """
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001258 if self.type in (GNUTYPE_LONGNAME, GNUTYPE_LONGLINK):
1259 return self._proc_gnulong(tarfile)
1260 elif self.type == GNUTYPE_SPARSE:
1261 return self._proc_sparse(tarfile)
1262 elif self.type in (XHDTYPE, XGLTYPE, SOLARIS_XHDTYPE):
1263 return self._proc_pax(tarfile)
1264 else:
1265 return self._proc_builtin(tarfile)
Georg Brandl3354f282006-10-29 09:16:12 +00001266
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001267 def _proc_builtin(self, tarfile):
1268 """Process a builtin type or an unknown type which
1269 will be treated as a regular file.
1270 """
1271 self.offset_data = tarfile.fileobj.tell()
1272 offset = self.offset_data
1273 if self.isreg() or self.type not in SUPPORTED_TYPES:
1274 # Skip the following data blocks.
1275 offset += self._block(self.size)
1276 tarfile.offset = offset
Georg Brandl3354f282006-10-29 09:16:12 +00001277
Lars Gustäbela0fcb932007-05-27 19:49:30 +00001278 # Patch the TarInfo object with saved global
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001279 # header information.
Lars Gustäbela0fcb932007-05-27 19:49:30 +00001280 self._apply_pax_info(tarfile.pax_headers, tarfile.encoding, tarfile.errors)
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001281
1282 return self
1283
1284 def _proc_gnulong(self, tarfile):
1285 """Process the blocks that hold a GNU longname
1286 or longlink member.
1287 """
1288 buf = tarfile.fileobj.read(self._block(self.size))
1289
1290 # Fetch the next header and process it.
Lars Gustäbeldd866d52009-11-22 18:30:53 +00001291 try:
1292 next = self.fromtarfile(tarfile)
1293 except HeaderError:
1294 raise SubsequentHeaderError("missing or bad subsequent header")
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001295
1296 # Patch the TarInfo object from the next header with
1297 # the longname information.
1298 next.offset = self.offset
1299 if self.type == GNUTYPE_LONGNAME:
Lars Gustäbela0fcb932007-05-27 19:49:30 +00001300 next.name = nts(buf)
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001301 elif self.type == GNUTYPE_LONGLINK:
Lars Gustäbela0fcb932007-05-27 19:49:30 +00001302 next.linkname = nts(buf)
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001303
1304 return next
1305
1306 def _proc_sparse(self, tarfile):
1307 """Process a GNU sparse header plus extra headers.
1308 """
1309 buf = self.buf
1310 sp = _ringbuffer()
1311 pos = 386
1312 lastpos = 0L
1313 realpos = 0L
1314 # There are 4 possible sparse structs in the
1315 # first header.
1316 for i in xrange(4):
1317 try:
1318 offset = nti(buf[pos:pos + 12])
1319 numbytes = nti(buf[pos + 12:pos + 24])
1320 except ValueError:
1321 break
1322 if offset > lastpos:
1323 sp.append(_hole(lastpos, offset - lastpos))
1324 sp.append(_data(offset, numbytes, realpos))
1325 realpos += numbytes
1326 lastpos = offset + numbytes
1327 pos += 24
1328
1329 isextended = ord(buf[482])
1330 origsize = nti(buf[483:495])
1331
1332 # If the isextended flag is given,
1333 # there are extra headers to process.
1334 while isextended == 1:
1335 buf = tarfile.fileobj.read(BLOCKSIZE)
1336 pos = 0
1337 for i in xrange(21):
1338 try:
1339 offset = nti(buf[pos:pos + 12])
1340 numbytes = nti(buf[pos + 12:pos + 24])
1341 except ValueError:
1342 break
1343 if offset > lastpos:
1344 sp.append(_hole(lastpos, offset - lastpos))
1345 sp.append(_data(offset, numbytes, realpos))
1346 realpos += numbytes
1347 lastpos = offset + numbytes
1348 pos += 24
1349 isextended = ord(buf[504])
1350
1351 if lastpos < origsize:
1352 sp.append(_hole(lastpos, origsize - lastpos))
1353
1354 self.sparse = sp
1355
1356 self.offset_data = tarfile.fileobj.tell()
1357 tarfile.offset = self.offset_data + self._block(self.size)
1358 self.size = origsize
1359
1360 return self
1361
1362 def _proc_pax(self, tarfile):
1363 """Process an extended or global header as described in
1364 POSIX.1-2001.
1365 """
1366 # Read the header information.
1367 buf = tarfile.fileobj.read(self._block(self.size))
1368
1369 # A pax header stores supplemental information for either
1370 # the following file (extended) or all following files
1371 # (global).
1372 if self.type == XGLTYPE:
1373 pax_headers = tarfile.pax_headers
1374 else:
1375 pax_headers = tarfile.pax_headers.copy()
1376
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001377 # Parse pax header information. A record looks like that:
1378 # "%d %s=%s\n" % (length, keyword, value). length is the size
1379 # of the complete record including the length field itself and
Lars Gustäbela0fcb932007-05-27 19:49:30 +00001380 # the newline. keyword and value are both UTF-8 encoded strings.
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001381 regex = re.compile(r"(\d+) ([^=]+)=", re.U)
1382 pos = 0
1383 while True:
1384 match = regex.match(buf, pos)
1385 if not match:
1386 break
1387
1388 length, keyword = match.groups()
1389 length = int(length)
1390 value = buf[match.end(2) + 1:match.start(1) + length - 1]
1391
1392 keyword = keyword.decode("utf8")
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001393 value = value.decode("utf8")
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001394
1395 pax_headers[keyword] = value
1396 pos += length
1397
Lars Gustäbela0fcb932007-05-27 19:49:30 +00001398 # Fetch the next header.
Lars Gustäbeldd866d52009-11-22 18:30:53 +00001399 try:
1400 next = self.fromtarfile(tarfile)
1401 except HeaderError:
1402 raise SubsequentHeaderError("missing or bad subsequent header")
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001403
Lars Gustäbela0fcb932007-05-27 19:49:30 +00001404 if self.type in (XHDTYPE, SOLARIS_XHDTYPE):
Lars Gustäbela0fcb932007-05-27 19:49:30 +00001405 # Patch the TarInfo object with the extended header info.
1406 next._apply_pax_info(pax_headers, tarfile.encoding, tarfile.errors)
1407 next.offset = self.offset
1408
Brett Cannon132fc542008-08-04 21:23:07 +00001409 if "size" in pax_headers:
Lars Gustäbela0fcb932007-05-27 19:49:30 +00001410 # If the extended header replaces the size field,
1411 # we need to recalculate the offset where the next
1412 # header starts.
1413 offset = next.offset_data
1414 if next.isreg() or next.type not in SUPPORTED_TYPES:
1415 offset += next._block(next.size)
1416 tarfile.offset = offset
1417
1418 return next
1419
1420 def _apply_pax_info(self, pax_headers, encoding, errors):
1421 """Replace fields with supplemental information from a previous
1422 pax extended or global header.
1423 """
1424 for keyword, value in pax_headers.iteritems():
1425 if keyword not in PAX_FIELDS:
1426 continue
1427
1428 if keyword == "path":
1429 value = value.rstrip("/")
1430
1431 if keyword in PAX_NUMBER_FIELDS:
1432 try:
1433 value = PAX_NUMBER_FIELDS[keyword](value)
1434 except ValueError:
1435 value = 0
1436 else:
1437 value = uts(value, encoding, errors)
1438
1439 setattr(self, keyword, value)
1440
1441 self.pax_headers = pax_headers.copy()
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001442
1443 def _block(self, count):
1444 """Round up a byte count by BLOCKSIZE and return it,
1445 e.g. _block(834) => 1024.
1446 """
1447 blocks, remainder = divmod(count, BLOCKSIZE)
1448 if remainder:
1449 blocks += 1
1450 return blocks * BLOCKSIZE
Georg Brandl3354f282006-10-29 09:16:12 +00001451
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001452 def isreg(self):
1453 return self.type in REGULAR_TYPES
1454 def isfile(self):
1455 return self.isreg()
1456 def isdir(self):
1457 return self.type == DIRTYPE
1458 def issym(self):
1459 return self.type == SYMTYPE
1460 def islnk(self):
1461 return self.type == LNKTYPE
1462 def ischr(self):
1463 return self.type == CHRTYPE
1464 def isblk(self):
1465 return self.type == BLKTYPE
1466 def isfifo(self):
1467 return self.type == FIFOTYPE
1468 def issparse(self):
1469 return self.type == GNUTYPE_SPARSE
1470 def isdev(self):
1471 return self.type in (CHRTYPE, BLKTYPE, FIFOTYPE)
1472# class TarInfo
1473
1474class TarFile(object):
1475 """The TarFile Class provides an interface to tar archives.
1476 """
1477
1478 debug = 0 # May be set from 0 (no msgs) to 3 (all msgs)
1479
1480 dereference = False # If true, add content of linked file to the
1481 # tar file, else the link.
1482
1483 ignore_zeros = False # If true, skips empty or invalid blocks and
1484 # continues processing.
1485
Lars Gustäbel92ca7562009-12-13 11:32:27 +00001486 errorlevel = 1 # If 0, fatal errors only appear in debug
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001487 # messages (if debug >= 0). If > 0, errors
1488 # are passed to the caller as exceptions.
1489
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001490 format = DEFAULT_FORMAT # The format to use when creating an archive.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001491
Lars Gustäbela0fcb932007-05-27 19:49:30 +00001492 encoding = ENCODING # Encoding for 8-bit character strings.
1493
1494 errors = None # Error handler for unicode conversion.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001495
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001496 tarinfo = TarInfo # The default TarInfo class to use.
1497
1498 fileobject = ExFileObject # The default ExFileObject class to use.
1499
1500 def __init__(self, name=None, mode="r", fileobj=None, format=None,
1501 tarinfo=None, dereference=None, ignore_zeros=None, encoding=None,
Lars Gustäbela0fcb932007-05-27 19:49:30 +00001502 errors=None, pax_headers=None, debug=None, errorlevel=None):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001503 """Open an (uncompressed) tar archive `name'. `mode' is either 'r' to
1504 read from an existing archive, 'a' to append data to an existing
1505 file or 'w' to create a new file overwriting an existing one. `mode'
1506 defaults to 'r'.
1507 If `fileobj' is given, it is used for reading or writing data. If it
1508 can be determined, `mode' is overridden by `fileobj's mode.
1509 `fileobj' is not closed, when TarFile is closed.
1510 """
Serhiy Storchaka75ba21a2014-01-18 15:35:19 +02001511 modes = {"r": "rb", "a": "r+b", "w": "wb"}
1512 if mode not in modes:
Georg Brandle4751e32006-05-18 06:11:19 +00001513 raise ValueError("mode must be 'r', 'a' or 'w'")
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001514 self.mode = mode
Serhiy Storchaka75ba21a2014-01-18 15:35:19 +02001515 self._mode = modes[mode]
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001516
1517 if not fileobj:
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001518 if self.mode == "a" and not os.path.exists(name):
Lars Gustäbel3f8aca12007-02-06 18:38:13 +00001519 # Create nonexistent files in append mode.
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001520 self.mode = "w"
1521 self._mode = "wb"
Brett Cannon6cef0762007-05-25 20:17:15 +00001522 fileobj = bltn_open(name, self._mode)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001523 self._extfileobj = False
1524 else:
Serhiy Storchakae7829bd2014-07-16 23:58:12 +03001525 if (name is None and hasattr(fileobj, "name") and
1526 isinstance(fileobj.name, basestring)):
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001527 name = fileobj.name
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001528 if hasattr(fileobj, "mode"):
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001529 self._mode = fileobj.mode
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001530 self._extfileobj = True
Lars Gustäbel0f4a14b2007-08-28 12:31:09 +00001531 self.name = os.path.abspath(name) if name else None
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001532 self.fileobj = fileobj
1533
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001534 # Init attributes.
1535 if format is not None:
1536 self.format = format
1537 if tarinfo is not None:
1538 self.tarinfo = tarinfo
1539 if dereference is not None:
1540 self.dereference = dereference
1541 if ignore_zeros is not None:
1542 self.ignore_zeros = ignore_zeros
1543 if encoding is not None:
1544 self.encoding = encoding
Lars Gustäbela0fcb932007-05-27 19:49:30 +00001545
1546 if errors is not None:
1547 self.errors = errors
1548 elif mode == "r":
1549 self.errors = "utf-8"
1550 else:
1551 self.errors = "strict"
1552
1553 if pax_headers is not None and self.format == PAX_FORMAT:
1554 self.pax_headers = pax_headers
1555 else:
1556 self.pax_headers = {}
1557
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001558 if debug is not None:
1559 self.debug = debug
1560 if errorlevel is not None:
1561 self.errorlevel = errorlevel
1562
1563 # Init datastructures.
Georg Brandl38c6a222006-05-10 16:26:03 +00001564 self.closed = False
1565 self.members = [] # list of members as TarInfo objects
1566 self._loaded = False # flag if all members have been read
Lars Gustäbel77b2d632007-12-01 21:02:12 +00001567 self.offset = self.fileobj.tell()
1568 # current position in the archive file
Georg Brandl38c6a222006-05-10 16:26:03 +00001569 self.inodes = {} # dictionary caching the inodes of
1570 # archive members already added
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001571
Lars Gustäbel355538e2009-11-18 20:24:54 +00001572 try:
1573 if self.mode == "r":
1574 self.firstmember = None
1575 self.firstmember = self.next()
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001576
Lars Gustäbel355538e2009-11-18 20:24:54 +00001577 if self.mode == "a":
1578 # Move to the end of the archive,
1579 # before the first empty block.
Lars Gustäbel355538e2009-11-18 20:24:54 +00001580 while True:
Lars Gustäbeldd866d52009-11-22 18:30:53 +00001581 self.fileobj.seek(self.offset)
1582 try:
1583 tarinfo = self.tarinfo.fromtarfile(self)
1584 self.members.append(tarinfo)
1585 except EOFHeaderError:
1586 self.fileobj.seek(self.offset)
Lars Gustäbel355538e2009-11-18 20:24:54 +00001587 break
Lars Gustäbeldd866d52009-11-22 18:30:53 +00001588 except HeaderError, e:
1589 raise ReadError(str(e))
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001590
Lars Gustäbel355538e2009-11-18 20:24:54 +00001591 if self.mode in "aw":
1592 self._loaded = True
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001593
Lars Gustäbel355538e2009-11-18 20:24:54 +00001594 if self.pax_headers:
1595 buf = self.tarinfo.create_pax_global_header(self.pax_headers.copy())
1596 self.fileobj.write(buf)
1597 self.offset += len(buf)
1598 except:
1599 if not self._extfileobj:
1600 self.fileobj.close()
1601 self.closed = True
1602 raise
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001603
1604 def _getposix(self):
1605 return self.format == USTAR_FORMAT
1606 def _setposix(self, value):
1607 import warnings
Philip Jenveyd846f1d2009-05-08 02:28:39 +00001608 warnings.warn("use the format attribute instead", DeprecationWarning,
1609 2)
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001610 if value:
1611 self.format = USTAR_FORMAT
1612 else:
1613 self.format = GNU_FORMAT
1614 posix = property(_getposix, _setposix)
1615
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001616 #--------------------------------------------------------------------------
1617 # Below are the classmethods which act as alternate constructors to the
1618 # TarFile class. The open() method is the only one that is needed for
1619 # public use; it is the "super"-constructor and is able to select an
1620 # adequate "sub"-constructor for a particular compression using the mapping
1621 # from OPEN_METH.
1622 #
1623 # This concept allows one to subclass TarFile without losing the comfort of
1624 # the super-constructor. A sub-constructor is registered and made available
1625 # by adding it to the mapping in OPEN_METH.
1626
Guido van Rossum75b64e62005-01-16 00:16:11 +00001627 @classmethod
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001628 def open(cls, name=None, mode="r", fileobj=None, bufsize=RECORDSIZE, **kwargs):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001629 """Open a tar archive for reading, writing or appending. Return
1630 an appropriate TarFile class.
1631
1632 mode:
Martin v. Löwis78be7df2005-03-05 12:47:42 +00001633 'r' or 'r:*' open for reading with transparent compression
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001634 'r:' open for reading exclusively uncompressed
1635 'r:gz' open for reading with gzip compression
1636 'r:bz2' open for reading with bzip2 compression
Lars Gustäbel3f8aca12007-02-06 18:38:13 +00001637 'a' or 'a:' open for appending, creating the file if necessary
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001638 'w' or 'w:' open for writing without compression
1639 'w:gz' open for writing with gzip compression
1640 'w:bz2' open for writing with bzip2 compression
Martin v. Löwis78be7df2005-03-05 12:47:42 +00001641
1642 'r|*' open a stream of tar blocks with transparent compression
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001643 'r|' open an uncompressed stream of tar blocks for reading
1644 'r|gz' open a gzip compressed stream of tar blocks
1645 'r|bz2' open a bzip2 compressed stream of tar blocks
1646 'w|' open an uncompressed stream for writing
1647 'w|gz' open a gzip compressed stream for writing
1648 'w|bz2' open a bzip2 compressed stream for writing
1649 """
1650
1651 if not name and not fileobj:
Georg Brandle4751e32006-05-18 06:11:19 +00001652 raise ValueError("nothing to open")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001653
Martin v. Löwis78be7df2005-03-05 12:47:42 +00001654 if mode in ("r", "r:*"):
1655 # Find out which *open() is appropriate for opening the file.
1656 for comptype in cls.OPEN_METH:
1657 func = getattr(cls, cls.OPEN_METH[comptype])
Lars Gustäbela7ba6fc2006-12-27 10:30:46 +00001658 if fileobj is not None:
1659 saved_pos = fileobj.tell()
Martin v. Löwis78be7df2005-03-05 12:47:42 +00001660 try:
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001661 return func(name, "r", fileobj, **kwargs)
1662 except (ReadError, CompressionError), e:
Lars Gustäbela7ba6fc2006-12-27 10:30:46 +00001663 if fileobj is not None:
1664 fileobj.seek(saved_pos)
Martin v. Löwis78be7df2005-03-05 12:47:42 +00001665 continue
Georg Brandle4751e32006-05-18 06:11:19 +00001666 raise ReadError("file could not be opened successfully")
Martin v. Löwis78be7df2005-03-05 12:47:42 +00001667
1668 elif ":" in mode:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001669 filemode, comptype = mode.split(":", 1)
1670 filemode = filemode or "r"
1671 comptype = comptype or "tar"
1672
1673 # Select the *open() function according to
1674 # given compression.
1675 if comptype in cls.OPEN_METH:
1676 func = getattr(cls, cls.OPEN_METH[comptype])
1677 else:
Georg Brandle4751e32006-05-18 06:11:19 +00001678 raise CompressionError("unknown compression type %r" % comptype)
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001679 return func(name, filemode, fileobj, **kwargs)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001680
1681 elif "|" in mode:
1682 filemode, comptype = mode.split("|", 1)
1683 filemode = filemode or "r"
1684 comptype = comptype or "tar"
1685
Serhiy Storchaka75ba21a2014-01-18 15:35:19 +02001686 if filemode not in ("r", "w"):
Georg Brandle4751e32006-05-18 06:11:19 +00001687 raise ValueError("mode must be 'r' or 'w'")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001688
1689 t = cls(name, filemode,
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001690 _Stream(name, filemode, comptype, fileobj, bufsize),
1691 **kwargs)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001692 t._extfileobj = False
1693 return t
1694
Serhiy Storchaka75ba21a2014-01-18 15:35:19 +02001695 elif mode in ("a", "w"):
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001696 return cls.taropen(name, mode, fileobj, **kwargs)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001697
Georg Brandle4751e32006-05-18 06:11:19 +00001698 raise ValueError("undiscernible mode")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001699
Guido van Rossum75b64e62005-01-16 00:16:11 +00001700 @classmethod
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001701 def taropen(cls, name, mode="r", fileobj=None, **kwargs):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001702 """Open uncompressed tar archive name for reading or writing.
1703 """
Serhiy Storchaka75ba21a2014-01-18 15:35:19 +02001704 if mode not in ("r", "a", "w"):
Georg Brandle4751e32006-05-18 06:11:19 +00001705 raise ValueError("mode must be 'r', 'a' or 'w'")
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001706 return cls(name, mode, fileobj, **kwargs)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001707
Guido van Rossum75b64e62005-01-16 00:16:11 +00001708 @classmethod
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001709 def gzopen(cls, name, mode="r", fileobj=None, compresslevel=9, **kwargs):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001710 """Open gzip compressed tar archive name for reading or writing.
1711 Appending is not allowed.
1712 """
Serhiy Storchaka75ba21a2014-01-18 15:35:19 +02001713 if mode not in ("r", "w"):
Georg Brandle4751e32006-05-18 06:11:19 +00001714 raise ValueError("mode must be 'r' or 'w'")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001715
1716 try:
1717 import gzip
Neal Norwitz4ec68242003-04-11 03:05:56 +00001718 gzip.GzipFile
1719 except (ImportError, AttributeError):
Georg Brandle4751e32006-05-18 06:11:19 +00001720 raise CompressionError("gzip module is not available")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001721
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001722 if fileobj is None:
Brett Cannon6cef0762007-05-25 20:17:15 +00001723 fileobj = bltn_open(name, mode + "b")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001724
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001725 try:
Lars Gustäbela4b23812006-12-23 17:57:23 +00001726 t = cls.taropen(name, mode,
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001727 gzip.GzipFile(name, mode, compresslevel, fileobj),
1728 **kwargs)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001729 except IOError:
Serhiy Storchaka7a278da2014-01-18 16:14:00 +02001730 if mode == 'r':
1731 raise ReadError("not a gzip file")
1732 raise
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001733 t._extfileobj = False
1734 return t
1735
Guido van Rossum75b64e62005-01-16 00:16:11 +00001736 @classmethod
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001737 def bz2open(cls, name, mode="r", fileobj=None, compresslevel=9, **kwargs):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001738 """Open bzip2 compressed tar archive name for reading or writing.
1739 Appending is not allowed.
1740 """
Serhiy Storchaka75ba21a2014-01-18 15:35:19 +02001741 if mode not in ("r", "w"):
Georg Brandle4751e32006-05-18 06:11:19 +00001742 raise ValueError("mode must be 'r' or 'w'.")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001743
1744 try:
1745 import bz2
1746 except ImportError:
Georg Brandle4751e32006-05-18 06:11:19 +00001747 raise CompressionError("bz2 module is not available")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001748
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001749 if fileobj is not None:
Georg Brandl49c8f4c2006-05-15 19:30:35 +00001750 fileobj = _BZ2Proxy(fileobj, mode)
1751 else:
1752 fileobj = bz2.BZ2File(name, mode, compresslevel=compresslevel)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001753
1754 try:
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001755 t = cls.taropen(name, mode, fileobj, **kwargs)
Lars Gustäbeldd866d52009-11-22 18:30:53 +00001756 except (IOError, EOFError):
Serhiy Storchaka7a278da2014-01-18 16:14:00 +02001757 if mode == 'r':
1758 raise ReadError("not a bzip2 file")
1759 raise
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001760 t._extfileobj = False
1761 return t
1762
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001763 # All *open() methods are registered here.
1764 OPEN_METH = {
1765 "tar": "taropen", # uncompressed tar
1766 "gz": "gzopen", # gzip compressed tar
1767 "bz2": "bz2open" # bzip2 compressed tar
1768 }
1769
1770 #--------------------------------------------------------------------------
1771 # The public methods which TarFile provides:
1772
1773 def close(self):
1774 """Close the TarFile. In write-mode, two finishing zero blocks are
1775 appended to the archive.
1776 """
1777 if self.closed:
1778 return
1779
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001780 if self.mode in "aw":
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001781 self.fileobj.write(NUL * (BLOCKSIZE * 2))
1782 self.offset += (BLOCKSIZE * 2)
1783 # fill up the end with zero-blocks
1784 # (like option -b20 for tar does)
1785 blocks, remainder = divmod(self.offset, RECORDSIZE)
1786 if remainder > 0:
1787 self.fileobj.write(NUL * (RECORDSIZE - remainder))
1788
1789 if not self._extfileobj:
1790 self.fileobj.close()
1791 self.closed = True
1792
1793 def getmember(self, name):
1794 """Return a TarInfo object for member `name'. If `name' can not be
1795 found in the archive, KeyError is raised. If a member occurs more
Mark Dickinson3e4caeb2009-02-21 20:27:01 +00001796 than once in the archive, its last occurrence is assumed to be the
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001797 most up-to-date version.
1798 """
Martin v. Löwisf3c56112004-09-18 09:08:52 +00001799 tarinfo = self._getmember(name)
1800 if tarinfo is None:
Georg Brandle4751e32006-05-18 06:11:19 +00001801 raise KeyError("filename %r not found" % name)
Martin v. Löwisf3c56112004-09-18 09:08:52 +00001802 return tarinfo
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001803
1804 def getmembers(self):
1805 """Return the members of the archive as a list of TarInfo objects. The
1806 list has the same order as the members in the archive.
1807 """
1808 self._check()
1809 if not self._loaded: # if we want to obtain a list of
1810 self._load() # all members, we first have to
1811 # scan the whole archive.
1812 return self.members
1813
1814 def getnames(self):
1815 """Return the members of the archive as a list of their names. It has
1816 the same order as the list returned by getmembers().
1817 """
Martin v. Löwisf3c56112004-09-18 09:08:52 +00001818 return [tarinfo.name for tarinfo in self.getmembers()]
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001819
1820 def gettarinfo(self, name=None, arcname=None, fileobj=None):
1821 """Create a TarInfo object for either the file `name' or the file
1822 object `fileobj' (using os.fstat on its file descriptor). You can
1823 modify some of the TarInfo's attributes before you add it using
1824 addfile(). If given, `arcname' specifies an alternative name for the
1825 file in the archive.
1826 """
1827 self._check("aw")
1828
1829 # When fileobj is given, replace name by
1830 # fileobj's real name.
1831 if fileobj is not None:
1832 name = fileobj.name
1833
1834 # Building the name of the member in the archive.
1835 # Backward slashes are converted to forward slashes,
1836 # Absolute paths are turned to relative paths.
1837 if arcname is None:
1838 arcname = name
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001839 drv, arcname = os.path.splitdrive(arcname)
Lars Gustäbelf7cda522009-08-28 19:23:44 +00001840 arcname = arcname.replace(os.sep, "/")
1841 arcname = arcname.lstrip("/")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001842
1843 # Now, fill the TarInfo object with
1844 # information specific for the file.
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001845 tarinfo = self.tarinfo()
1846 tarinfo.tarfile = self
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001847
1848 # Use os.stat or os.lstat, depending on platform
1849 # and if symlinks shall be resolved.
1850 if fileobj is None:
1851 if hasattr(os, "lstat") and not self.dereference:
1852 statres = os.lstat(name)
1853 else:
1854 statres = os.stat(name)
1855 else:
1856 statres = os.fstat(fileobj.fileno())
1857 linkname = ""
1858
1859 stmd = statres.st_mode
1860 if stat.S_ISREG(stmd):
1861 inode = (statres.st_ino, statres.st_dev)
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001862 if not self.dereference and statres.st_nlink > 1 and \
1863 inode in self.inodes and arcname != self.inodes[inode]:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001864 # Is it a hardlink to an already
1865 # archived file?
1866 type = LNKTYPE
1867 linkname = self.inodes[inode]
1868 else:
1869 # The inode is added only if its valid.
1870 # For win32 it is always 0.
1871 type = REGTYPE
1872 if inode[0]:
1873 self.inodes[inode] = arcname
1874 elif stat.S_ISDIR(stmd):
1875 type = DIRTYPE
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001876 elif stat.S_ISFIFO(stmd):
1877 type = FIFOTYPE
1878 elif stat.S_ISLNK(stmd):
1879 type = SYMTYPE
1880 linkname = os.readlink(name)
1881 elif stat.S_ISCHR(stmd):
1882 type = CHRTYPE
1883 elif stat.S_ISBLK(stmd):
1884 type = BLKTYPE
1885 else:
1886 return None
1887
1888 # Fill the TarInfo object with all
1889 # information we can get.
Martin v. Löwis5dbdc592005-08-27 10:07:56 +00001890 tarinfo.name = arcname
1891 tarinfo.mode = stmd
1892 tarinfo.uid = statres.st_uid
1893 tarinfo.gid = statres.st_gid
Lars Gustäbel2ee9c6f2010-06-03 09:56:22 +00001894 if type == REGTYPE:
Martin v. Löwis61d77e02004-08-20 06:35:46 +00001895 tarinfo.size = statres.st_size
Martin v. Löwis5dbdc592005-08-27 10:07:56 +00001896 else:
1897 tarinfo.size = 0L
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001898 tarinfo.mtime = statres.st_mtime
Martin v. Löwis5dbdc592005-08-27 10:07:56 +00001899 tarinfo.type = type
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001900 tarinfo.linkname = linkname
1901 if pwd:
1902 try:
1903 tarinfo.uname = pwd.getpwuid(tarinfo.uid)[0]
1904 except KeyError:
1905 pass
1906 if grp:
1907 try:
1908 tarinfo.gname = grp.getgrgid(tarinfo.gid)[0]
1909 except KeyError:
1910 pass
1911
1912 if type in (CHRTYPE, BLKTYPE):
1913 if hasattr(os, "major") and hasattr(os, "minor"):
1914 tarinfo.devmajor = os.major(statres.st_rdev)
1915 tarinfo.devminor = os.minor(statres.st_rdev)
1916 return tarinfo
1917
1918 def list(self, verbose=True):
1919 """Print a table of contents to sys.stdout. If `verbose' is False, only
1920 the names of the members are printed. If it is True, an `ls -l'-like
1921 output is produced.
1922 """
1923 self._check()
1924
1925 for tarinfo in self:
1926 if verbose:
1927 print filemode(tarinfo.mode),
1928 print "%s/%s" % (tarinfo.uname or tarinfo.uid,
1929 tarinfo.gname or tarinfo.gid),
1930 if tarinfo.ischr() or tarinfo.isblk():
1931 print "%10s" % ("%d,%d" \
1932 % (tarinfo.devmajor, tarinfo.devminor)),
1933 else:
1934 print "%10d" % tarinfo.size,
1935 print "%d-%02d-%02d %02d:%02d:%02d" \
1936 % time.localtime(tarinfo.mtime)[:6],
1937
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001938 print tarinfo.name + ("/" if tarinfo.isdir() else ""),
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001939
1940 if verbose:
1941 if tarinfo.issym():
1942 print "->", tarinfo.linkname,
1943 if tarinfo.islnk():
1944 print "link to", tarinfo.linkname,
1945 print
1946
Lars Gustäbel21121e62009-09-12 10:28:15 +00001947 def add(self, name, arcname=None, recursive=True, exclude=None, filter=None):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001948 """Add the file `name' to the archive. `name' may be any type of file
1949 (directory, fifo, symbolic link, etc.). If given, `arcname'
1950 specifies an alternative name for the file in the archive.
1951 Directories are added recursively by default. This can be avoided by
Lars Gustäbel104490e2007-06-18 11:42:11 +00001952 setting `recursive' to False. `exclude' is a function that should
Lars Gustäbel21121e62009-09-12 10:28:15 +00001953 return True for each filename to be excluded. `filter' is a function
1954 that expects a TarInfo object argument and returns the changed
1955 TarInfo object, if it returns None the TarInfo object will be
1956 excluded from the archive.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001957 """
1958 self._check("aw")
1959
1960 if arcname is None:
1961 arcname = name
1962
Lars Gustäbel104490e2007-06-18 11:42:11 +00001963 # Exclude pathnames.
Lars Gustäbel21121e62009-09-12 10:28:15 +00001964 if exclude is not None:
1965 import warnings
1966 warnings.warn("use the filter argument instead",
1967 DeprecationWarning, 2)
1968 if exclude(name):
1969 self._dbg(2, "tarfile: Excluded %r" % name)
1970 return
Lars Gustäbel104490e2007-06-18 11:42:11 +00001971
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001972 # Skip if somebody tries to archive the archive...
Lars Gustäbela4b23812006-12-23 17:57:23 +00001973 if self.name is not None and os.path.abspath(name) == self.name:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001974 self._dbg(2, "tarfile: Skipped %r" % name)
1975 return
1976
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001977 self._dbg(1, name)
1978
1979 # Create a TarInfo object from the file.
1980 tarinfo = self.gettarinfo(name, arcname)
1981
1982 if tarinfo is None:
1983 self._dbg(1, "tarfile: Unsupported type %r" % name)
1984 return
1985
Lars Gustäbel21121e62009-09-12 10:28:15 +00001986 # Change or exclude the TarInfo object.
1987 if filter is not None:
1988 tarinfo = filter(tarinfo)
1989 if tarinfo is None:
1990 self._dbg(2, "tarfile: Excluded %r" % name)
1991 return
1992
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001993 # Append the tar header and data to the archive.
1994 if tarinfo.isreg():
Andrew Svetlovac26a2e2012-11-29 14:22:26 +02001995 with bltn_open(name, "rb") as f:
1996 self.addfile(tarinfo, f)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001997
Martin v. Löwis5dbdc592005-08-27 10:07:56 +00001998 elif tarinfo.isdir():
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001999 self.addfile(tarinfo)
2000 if recursive:
2001 for f in os.listdir(name):
Lars Gustäbel21121e62009-09-12 10:28:15 +00002002 self.add(os.path.join(name, f), os.path.join(arcname, f),
2003 recursive, exclude, filter)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002004
Martin v. Löwis5dbdc592005-08-27 10:07:56 +00002005 else:
2006 self.addfile(tarinfo)
2007
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002008 def addfile(self, tarinfo, fileobj=None):
2009 """Add the TarInfo object `tarinfo' to the archive. If `fileobj' is
2010 given, tarinfo.size bytes are read from it and added to the archive.
2011 You can create TarInfo objects using gettarinfo().
2012 On Windows platforms, `fileobj' should always be opened with mode
2013 'rb' to avoid irritation about the file size.
2014 """
2015 self._check("aw")
2016
Georg Brandl3354f282006-10-29 09:16:12 +00002017 tarinfo = copy.copy(tarinfo)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002018
Lars Gustäbela0fcb932007-05-27 19:49:30 +00002019 buf = tarinfo.tobuf(self.format, self.encoding, self.errors)
Georg Brandl3354f282006-10-29 09:16:12 +00002020 self.fileobj.write(buf)
2021 self.offset += len(buf)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002022
2023 # If there's data to follow, append it.
2024 if fileobj is not None:
2025 copyfileobj(fileobj, self.fileobj, tarinfo.size)
2026 blocks, remainder = divmod(tarinfo.size, BLOCKSIZE)
2027 if remainder > 0:
2028 self.fileobj.write(NUL * (BLOCKSIZE - remainder))
2029 blocks += 1
2030 self.offset += blocks * BLOCKSIZE
2031
Martin v. Löwisf3c56112004-09-18 09:08:52 +00002032 self.members.append(tarinfo)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002033
Martin v. Löwis00a73e72005-03-04 19:40:34 +00002034 def extractall(self, path=".", members=None):
2035 """Extract all members from the archive to the current working
2036 directory and set owner, modification time and permissions on
2037 directories afterwards. `path' specifies a different directory
2038 to extract to. `members' is optional and must be a subset of the
2039 list returned by getmembers().
2040 """
2041 directories = []
2042
2043 if members is None:
2044 members = self
2045
2046 for tarinfo in members:
2047 if tarinfo.isdir():
Lars Gustäbel0192e432008-02-05 11:51:40 +00002048 # Extract directories with a safe mode.
Martin v. Löwis00a73e72005-03-04 19:40:34 +00002049 directories.append(tarinfo)
Lars Gustäbel0192e432008-02-05 11:51:40 +00002050 tarinfo = copy.copy(tarinfo)
2051 tarinfo.mode = 0700
2052 self.extract(tarinfo, path)
Martin v. Löwis00a73e72005-03-04 19:40:34 +00002053
2054 # Reverse sort directories.
Brett Cannon132fc542008-08-04 21:23:07 +00002055 directories.sort(key=operator.attrgetter('name'))
Martin v. Löwis00a73e72005-03-04 19:40:34 +00002056 directories.reverse()
2057
2058 # Set correct owner, mtime and filemode on directories.
2059 for tarinfo in directories:
Lars Gustäbel2ee1c762008-01-04 14:00:33 +00002060 dirpath = os.path.join(path, tarinfo.name)
Martin v. Löwis00a73e72005-03-04 19:40:34 +00002061 try:
Lars Gustäbel2ee1c762008-01-04 14:00:33 +00002062 self.chown(tarinfo, dirpath)
2063 self.utime(tarinfo, dirpath)
2064 self.chmod(tarinfo, dirpath)
Martin v. Löwis00a73e72005-03-04 19:40:34 +00002065 except ExtractError, e:
2066 if self.errorlevel > 1:
2067 raise
2068 else:
2069 self._dbg(1, "tarfile: %s" % e)
2070
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002071 def extract(self, member, path=""):
2072 """Extract a member from the archive to the current working directory,
2073 using its full name. Its file information is extracted as accurately
2074 as possible. `member' may be a filename or a TarInfo object. You can
2075 specify a different directory using `path'.
2076 """
2077 self._check("r")
2078
Lars Gustäbelc64e4022007-03-13 10:47:19 +00002079 if isinstance(member, basestring):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002080 tarinfo = self.getmember(member)
Lars Gustäbelc64e4022007-03-13 10:47:19 +00002081 else:
2082 tarinfo = member
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002083
Neal Norwitza4f651a2004-07-20 22:07:44 +00002084 # Prepare the link target for makelink().
2085 if tarinfo.islnk():
2086 tarinfo._link_target = os.path.join(path, tarinfo.linkname)
2087
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002088 try:
2089 self._extract_member(tarinfo, os.path.join(path, tarinfo.name))
2090 except EnvironmentError, e:
2091 if self.errorlevel > 0:
2092 raise
2093 else:
2094 if e.filename is None:
2095 self._dbg(1, "tarfile: %s" % e.strerror)
2096 else:
2097 self._dbg(1, "tarfile: %s %r" % (e.strerror, e.filename))
2098 except ExtractError, e:
2099 if self.errorlevel > 1:
2100 raise
2101 else:
2102 self._dbg(1, "tarfile: %s" % e)
2103
2104 def extractfile(self, member):
2105 """Extract a member from the archive as a file object. `member' may be
2106 a filename or a TarInfo object. If `member' is a regular file, a
2107 file-like object is returned. If `member' is a link, a file-like
2108 object is constructed from the link's target. If `member' is none of
2109 the above, None is returned.
2110 The file-like object is read-only and provides the following
2111 methods: read(), readline(), readlines(), seek() and tell()
2112 """
2113 self._check("r")
2114
Lars Gustäbelc64e4022007-03-13 10:47:19 +00002115 if isinstance(member, basestring):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002116 tarinfo = self.getmember(member)
Lars Gustäbelc64e4022007-03-13 10:47:19 +00002117 else:
2118 tarinfo = member
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002119
2120 if tarinfo.isreg():
2121 return self.fileobject(self, tarinfo)
2122
2123 elif tarinfo.type not in SUPPORTED_TYPES:
2124 # If a member's type is unknown, it is treated as a
2125 # regular file.
2126 return self.fileobject(self, tarinfo)
2127
2128 elif tarinfo.islnk() or tarinfo.issym():
2129 if isinstance(self.fileobj, _Stream):
2130 # A small but ugly workaround for the case that someone tries
2131 # to extract a (sym)link as a file-object from a non-seekable
2132 # stream of tar blocks.
Georg Brandle4751e32006-05-18 06:11:19 +00002133 raise StreamError("cannot extract (sym)link as file object")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002134 else:
Georg Brandl7eb4b7d2005-07-22 21:49:32 +00002135 # A (sym)link's file object is its target's file object.
Lars Gustäbel4da7d412010-06-03 12:34:14 +00002136 return self.extractfile(self._find_link_target(tarinfo))
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002137 else:
2138 # If there's no data associated with the member (directory, chrdev,
2139 # blkdev, etc.), return None instead of a file object.
2140 return None
2141
2142 def _extract_member(self, tarinfo, targetpath):
2143 """Extract the TarInfo object tarinfo to a physical
2144 file called targetpath.
2145 """
2146 # Fetch the TarInfo object for the given name
2147 # and build the destination pathname, replacing
2148 # forward slashes to platform specific separators.
Lars Gustäbelf7cda522009-08-28 19:23:44 +00002149 targetpath = targetpath.rstrip("/")
2150 targetpath = targetpath.replace("/", os.sep)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002151
2152 # Create all upper directories.
2153 upperdirs = os.path.dirname(targetpath)
2154 if upperdirs and not os.path.exists(upperdirs):
Lars Gustäbel0192e432008-02-05 11:51:40 +00002155 # Create directories that are not part of the archive with
2156 # default permissions.
Lars Gustäbeld2e22902007-01-23 11:17:33 +00002157 os.makedirs(upperdirs)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002158
2159 if tarinfo.islnk() or tarinfo.issym():
2160 self._dbg(1, "%s -> %s" % (tarinfo.name, tarinfo.linkname))
2161 else:
2162 self._dbg(1, tarinfo.name)
2163
2164 if tarinfo.isreg():
2165 self.makefile(tarinfo, targetpath)
2166 elif tarinfo.isdir():
2167 self.makedir(tarinfo, targetpath)
2168 elif tarinfo.isfifo():
2169 self.makefifo(tarinfo, targetpath)
2170 elif tarinfo.ischr() or tarinfo.isblk():
2171 self.makedev(tarinfo, targetpath)
2172 elif tarinfo.islnk() or tarinfo.issym():
2173 self.makelink(tarinfo, targetpath)
2174 elif tarinfo.type not in SUPPORTED_TYPES:
2175 self.makeunknown(tarinfo, targetpath)
2176 else:
2177 self.makefile(tarinfo, targetpath)
2178
2179 self.chown(tarinfo, targetpath)
2180 if not tarinfo.issym():
2181 self.chmod(tarinfo, targetpath)
2182 self.utime(tarinfo, targetpath)
2183
2184 #--------------------------------------------------------------------------
2185 # Below are the different file methods. They are called via
2186 # _extract_member() when extract() is called. They can be replaced in a
2187 # subclass to implement other functionality.
2188
2189 def makedir(self, tarinfo, targetpath):
2190 """Make a directory called targetpath.
2191 """
2192 try:
Lars Gustäbel0192e432008-02-05 11:51:40 +00002193 # Use a safe mode for the directory, the real mode is set
2194 # later in _extract_member().
2195 os.mkdir(targetpath, 0700)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002196 except EnvironmentError, e:
2197 if e.errno != errno.EEXIST:
2198 raise
2199
2200 def makefile(self, tarinfo, targetpath):
2201 """Make a file called targetpath.
2202 """
2203 source = self.extractfile(tarinfo)
Andrew Svetlovac26a2e2012-11-29 14:22:26 +02002204 try:
2205 with bltn_open(targetpath, "wb") as target:
2206 copyfileobj(source, target)
2207 finally:
2208 source.close()
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002209
2210 def makeunknown(self, tarinfo, targetpath):
2211 """Make a file from a TarInfo object with an unknown type
2212 at targetpath.
2213 """
2214 self.makefile(tarinfo, targetpath)
2215 self._dbg(1, "tarfile: Unknown file type %r, " \
2216 "extracted as regular file." % tarinfo.type)
2217
2218 def makefifo(self, tarinfo, targetpath):
2219 """Make a fifo called targetpath.
2220 """
2221 if hasattr(os, "mkfifo"):
2222 os.mkfifo(targetpath)
2223 else:
Georg Brandle4751e32006-05-18 06:11:19 +00002224 raise ExtractError("fifo not supported by system")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002225
2226 def makedev(self, tarinfo, targetpath):
2227 """Make a character or block device called targetpath.
2228 """
2229 if not hasattr(os, "mknod") or not hasattr(os, "makedev"):
Georg Brandle4751e32006-05-18 06:11:19 +00002230 raise ExtractError("special devices not supported by system")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002231
2232 mode = tarinfo.mode
2233 if tarinfo.isblk():
2234 mode |= stat.S_IFBLK
2235 else:
2236 mode |= stat.S_IFCHR
2237
2238 os.mknod(targetpath, mode,
2239 os.makedev(tarinfo.devmajor, tarinfo.devminor))
2240
2241 def makelink(self, tarinfo, targetpath):
2242 """Make a (symbolic) link called targetpath. If it cannot be created
2243 (platform limitation), we try to make a copy of the referenced file
2244 instead of a link.
2245 """
Lars Gustäbel4da7d412010-06-03 12:34:14 +00002246 if hasattr(os, "symlink") and hasattr(os, "link"):
2247 # For systems that support symbolic and hard links.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002248 if tarinfo.issym():
Senthil Kumaran4dd89ce2011-05-17 10:12:18 +08002249 if os.path.lexists(targetpath):
Senthil Kumaran011525e2011-04-28 15:30:31 +08002250 os.unlink(targetpath)
Lars Gustäbelf7cda522009-08-28 19:23:44 +00002251 os.symlink(tarinfo.linkname, targetpath)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002252 else:
Neal Norwitza4f651a2004-07-20 22:07:44 +00002253 # See extract().
Lars Gustäbel4da7d412010-06-03 12:34:14 +00002254 if os.path.exists(tarinfo._link_target):
Senthil Kumaran4dd89ce2011-05-17 10:12:18 +08002255 if os.path.lexists(targetpath):
2256 os.unlink(targetpath)
Lars Gustäbel4da7d412010-06-03 12:34:14 +00002257 os.link(tarinfo._link_target, targetpath)
2258 else:
2259 self._extract_member(self._find_link_target(tarinfo), targetpath)
2260 else:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002261 try:
Lars Gustäbel4da7d412010-06-03 12:34:14 +00002262 self._extract_member(self._find_link_target(tarinfo), targetpath)
2263 except KeyError:
2264 raise ExtractError("unable to resolve link inside archive")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002265
2266 def chown(self, tarinfo, targetpath):
2267 """Set owner of targetpath according to tarinfo.
2268 """
2269 if pwd and hasattr(os, "geteuid") and os.geteuid() == 0:
2270 # We have to be root to do so.
2271 try:
2272 g = grp.getgrnam(tarinfo.gname)[2]
2273 except KeyError:
Lars Gustäbel8babfdf2011-09-05 17:04:18 +02002274 g = tarinfo.gid
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002275 try:
2276 u = pwd.getpwnam(tarinfo.uname)[2]
2277 except KeyError:
Lars Gustäbel8babfdf2011-09-05 17:04:18 +02002278 u = tarinfo.uid
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002279 try:
2280 if tarinfo.issym() and hasattr(os, "lchown"):
2281 os.lchown(targetpath, u, g)
2282 else:
Andrew MacIntyre7970d202003-02-19 12:51:34 +00002283 if sys.platform != "os2emx":
2284 os.chown(targetpath, u, g)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002285 except EnvironmentError, e:
Georg Brandle4751e32006-05-18 06:11:19 +00002286 raise ExtractError("could not change owner")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002287
2288 def chmod(self, tarinfo, targetpath):
2289 """Set file permissions of targetpath according to tarinfo.
2290 """
Jack Jansen834eff62003-03-07 12:47:06 +00002291 if hasattr(os, 'chmod'):
2292 try:
2293 os.chmod(targetpath, tarinfo.mode)
2294 except EnvironmentError, e:
Georg Brandle4751e32006-05-18 06:11:19 +00002295 raise ExtractError("could not change mode")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002296
2297 def utime(self, tarinfo, targetpath):
2298 """Set modification time of targetpath according to tarinfo.
2299 """
Jack Jansen834eff62003-03-07 12:47:06 +00002300 if not hasattr(os, 'utime'):
Tim Petersf9347782003-03-07 15:36:41 +00002301 return
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002302 try:
2303 os.utime(targetpath, (tarinfo.mtime, tarinfo.mtime))
2304 except EnvironmentError, e:
Georg Brandle4751e32006-05-18 06:11:19 +00002305 raise ExtractError("could not change modification time")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002306
2307 #--------------------------------------------------------------------------
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002308 def next(self):
2309 """Return the next member of the archive as a TarInfo object, when
2310 TarFile is opened for reading. Return None if there is no more
2311 available.
2312 """
2313 self._check("ra")
2314 if self.firstmember is not None:
2315 m = self.firstmember
2316 self.firstmember = None
2317 return m
2318
2319 # Read the next block.
Andrew M. Kuchling864bba12004-07-10 22:02:11 +00002320 self.fileobj.seek(self.offset)
Lars Gustäbeldd866d52009-11-22 18:30:53 +00002321 tarinfo = None
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002322 while True:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002323 try:
Lars Gustäbelc64e4022007-03-13 10:47:19 +00002324 tarinfo = self.tarinfo.fromtarfile(self)
Lars Gustäbeldd866d52009-11-22 18:30:53 +00002325 except EOFHeaderError, e:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002326 if self.ignore_zeros:
Georg Brandlebbeed72006-12-19 22:06:46 +00002327 self._dbg(2, "0x%X: %s" % (self.offset, e))
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002328 self.offset += BLOCKSIZE
2329 continue
Lars Gustäbeldd866d52009-11-22 18:30:53 +00002330 except InvalidHeaderError, e:
2331 if self.ignore_zeros:
2332 self._dbg(2, "0x%X: %s" % (self.offset, e))
2333 self.offset += BLOCKSIZE
2334 continue
2335 elif self.offset == 0:
2336 raise ReadError(str(e))
2337 except EmptyHeaderError:
2338 if self.offset == 0:
2339 raise ReadError("empty file")
2340 except TruncatedHeaderError, e:
2341 if self.offset == 0:
2342 raise ReadError(str(e))
2343 except SubsequentHeaderError, e:
2344 raise ReadError(str(e))
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002345 break
2346
Lars Gustäbeldd866d52009-11-22 18:30:53 +00002347 if tarinfo is not None:
2348 self.members.append(tarinfo)
2349 else:
2350 self._loaded = True
2351
Georg Brandl38c6a222006-05-10 16:26:03 +00002352 return tarinfo
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002353
2354 #--------------------------------------------------------------------------
2355 # Little helper methods:
2356
Lars Gustäbel4da7d412010-06-03 12:34:14 +00002357 def _getmember(self, name, tarinfo=None, normalize=False):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002358 """Find an archive member by name from bottom to top.
2359 If tarinfo is given, it is used as the starting point.
2360 """
Martin v. Löwisf3c56112004-09-18 09:08:52 +00002361 # Ensure that all members have been loaded.
2362 members = self.getmembers()
2363
Lars Gustäbel4da7d412010-06-03 12:34:14 +00002364 # Limit the member search list up to tarinfo.
2365 if tarinfo is not None:
2366 members = members[:members.index(tarinfo)]
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002367
Lars Gustäbel4da7d412010-06-03 12:34:14 +00002368 if normalize:
2369 name = os.path.normpath(name)
2370
2371 for member in reversed(members):
2372 if normalize:
2373 member_name = os.path.normpath(member.name)
2374 else:
2375 member_name = member.name
2376
2377 if name == member_name:
2378 return member
Andrew M. Kuchling864bba12004-07-10 22:02:11 +00002379
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002380 def _load(self):
2381 """Read through the entire archive file and look for readable
2382 members.
2383 """
2384 while True:
2385 tarinfo = self.next()
2386 if tarinfo is None:
2387 break
2388 self._loaded = True
2389
2390 def _check(self, mode=None):
2391 """Check if TarFile is still open, and if the operation's mode
2392 corresponds to TarFile's mode.
2393 """
2394 if self.closed:
Georg Brandle4751e32006-05-18 06:11:19 +00002395 raise IOError("%s is closed" % self.__class__.__name__)
Lars Gustäbelc64e4022007-03-13 10:47:19 +00002396 if mode is not None and self.mode not in mode:
2397 raise IOError("bad operation for mode %r" % self.mode)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002398
Lars Gustäbel4da7d412010-06-03 12:34:14 +00002399 def _find_link_target(self, tarinfo):
2400 """Find the target member of a symlink or hardlink member in the
2401 archive.
2402 """
2403 if tarinfo.issym():
2404 # Always search the entire archive.
Lars Gustäbel231d4742012-04-24 22:42:08 +02002405 linkname = "/".join(filter(None, (os.path.dirname(tarinfo.name), tarinfo.linkname)))
Lars Gustäbel4da7d412010-06-03 12:34:14 +00002406 limit = None
2407 else:
2408 # Search the archive before the link, because a hard link is
2409 # just a reference to an already archived file.
2410 linkname = tarinfo.linkname
2411 limit = tarinfo
2412
2413 member = self._getmember(linkname, tarinfo=limit, normalize=True)
2414 if member is None:
2415 raise KeyError("linkname %r not found" % linkname)
2416 return member
2417
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002418 def __iter__(self):
2419 """Provide an iterator object.
2420 """
2421 if self._loaded:
2422 return iter(self.members)
2423 else:
2424 return TarIter(self)
2425
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002426 def _dbg(self, level, msg):
2427 """Write debugging output to sys.stderr.
2428 """
2429 if level <= self.debug:
2430 print >> sys.stderr, msg
Lars Gustäbel64581042010-03-03 11:55:48 +00002431
2432 def __enter__(self):
2433 self._check()
2434 return self
2435
2436 def __exit__(self, type, value, traceback):
2437 if type is None:
2438 self.close()
2439 else:
2440 # An exception occurred. We must not call close() because
2441 # it would try to write end-of-archive blocks and padding.
2442 if not self._extfileobj:
2443 self.fileobj.close()
2444 self.closed = True
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002445# class TarFile
2446
2447class TarIter:
2448 """Iterator Class.
2449
2450 for tarinfo in TarFile(...):
2451 suite...
2452 """
2453
2454 def __init__(self, tarfile):
2455 """Construct a TarIter object.
2456 """
2457 self.tarfile = tarfile
Martin v. Löwis637431b2005-03-03 23:12:42 +00002458 self.index = 0
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002459 def __iter__(self):
2460 """Return iterator object.
2461 """
2462 return self
2463 def next(self):
2464 """Return the next item using TarFile's next() method.
2465 When all members have been read, set TarFile as _loaded.
2466 """
Martin v. Löwis637431b2005-03-03 23:12:42 +00002467 # Fix for SF #1100429: Under rare circumstances it can
2468 # happen that getmembers() is called during iteration,
2469 # which will cause TarIter to stop prematurely.
Serhiy Storchakace34ba62013-05-09 14:22:05 +03002470
2471 if self.index == 0 and self.tarfile.firstmember is not None:
2472 tarinfo = self.tarfile.next()
2473 elif self.index < len(self.tarfile.members):
2474 tarinfo = self.tarfile.members[self.index]
2475 elif not self.tarfile._loaded:
Martin v. Löwis637431b2005-03-03 23:12:42 +00002476 tarinfo = self.tarfile.next()
2477 if not tarinfo:
2478 self.tarfile._loaded = True
2479 raise StopIteration
2480 else:
Serhiy Storchakace34ba62013-05-09 14:22:05 +03002481 raise StopIteration
Martin v. Löwis637431b2005-03-03 23:12:42 +00002482 self.index += 1
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002483 return tarinfo
2484
2485# Helper classes for sparse file support
2486class _section:
2487 """Base class for _data and _hole.
2488 """
2489 def __init__(self, offset, size):
2490 self.offset = offset
2491 self.size = size
2492 def __contains__(self, offset):
2493 return self.offset <= offset < self.offset + self.size
2494
2495class _data(_section):
2496 """Represent a data section in a sparse file.
2497 """
2498 def __init__(self, offset, size, realpos):
2499 _section.__init__(self, offset, size)
2500 self.realpos = realpos
2501
2502class _hole(_section):
2503 """Represent a hole section in a sparse file.
2504 """
2505 pass
2506
2507class _ringbuffer(list):
2508 """Ringbuffer class which increases performance
2509 over a regular list.
2510 """
2511 def __init__(self):
2512 self.idx = 0
2513 def find(self, offset):
2514 idx = self.idx
2515 while True:
2516 item = self[idx]
2517 if offset in item:
2518 break
2519 idx += 1
2520 if idx == len(self):
2521 idx = 0
2522 if idx == self.idx:
2523 # End of File
2524 return None
2525 self.idx = idx
2526 return item
2527
2528#---------------------------------------------
2529# zipfile compatible TarFile class
2530#---------------------------------------------
2531TAR_PLAIN = 0 # zipfile.ZIP_STORED
2532TAR_GZIPPED = 8 # zipfile.ZIP_DEFLATED
2533class TarFileCompat:
2534 """TarFile class compatible with standard module zipfile's
2535 ZipFile class.
2536 """
2537 def __init__(self, file, mode="r", compression=TAR_PLAIN):
Lars Gustäbel727bd0b2008-08-02 11:26:39 +00002538 from warnings import warnpy3k
2539 warnpy3k("the TarFileCompat class has been removed in Python 3.0",
2540 stacklevel=2)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002541 if compression == TAR_PLAIN:
2542 self.tarfile = TarFile.taropen(file, mode)
2543 elif compression == TAR_GZIPPED:
2544 self.tarfile = TarFile.gzopen(file, mode)
2545 else:
Georg Brandle4751e32006-05-18 06:11:19 +00002546 raise ValueError("unknown compression constant")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002547 if mode[0:1] == "r":
2548 members = self.tarfile.getmembers()
Raymond Hettingera1d09e22005-09-11 16:34:05 +00002549 for m in members:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002550 m.filename = m.name
2551 m.file_size = m.size
2552 m.date_time = time.gmtime(m.mtime)[:6]
2553 def namelist(self):
2554 return map(lambda m: m.name, self.infolist())
2555 def infolist(self):
2556 return filter(lambda m: m.type in REGULAR_TYPES,
2557 self.tarfile.getmembers())
2558 def printdir(self):
2559 self.tarfile.list()
2560 def testzip(self):
2561 return
2562 def getinfo(self, name):
2563 return self.tarfile.getmember(name)
2564 def read(self, name):
2565 return self.tarfile.extractfile(self.tarfile.getmember(name)).read()
2566 def write(self, filename, arcname=None, compress_type=None):
2567 self.tarfile.add(filename, arcname)
2568 def writestr(self, zinfo, bytes):
Raymond Hettingera6172712004-12-31 19:15:26 +00002569 try:
2570 from cStringIO import StringIO
2571 except ImportError:
2572 from StringIO import StringIO
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002573 import calendar
Lars Gustäbel727bd0b2008-08-02 11:26:39 +00002574 tinfo = TarInfo(zinfo.filename)
2575 tinfo.size = len(bytes)
2576 tinfo.mtime = calendar.timegm(zinfo.date_time)
2577 self.tarfile.addfile(tinfo, StringIO(bytes))
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002578 def close(self):
2579 self.tarfile.close()
2580#class TarFileCompat
2581
2582#--------------------
2583# exported functions
2584#--------------------
2585def is_tarfile(name):
2586 """Return True if name points to a tar archive that we
2587 are able to handle, else return False.
2588 """
2589 try:
2590 t = open(name)
2591 t.close()
2592 return True
2593 except TarError:
2594 return False
2595
Brett Cannon6cef0762007-05-25 20:17:15 +00002596bltn_open = open
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002597open = TarFile.open