blob: e73ac90ede0adc919be8e73f334025e314e90d97 [file] [log] [blame]
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001# -*- coding: iso-8859-1 -*-
2#-------------------------------------------------------------------
3# tarfile.py
4#-------------------------------------------------------------------
5# Copyright (C) 2002 Lars Gustäbel <lars@gustaebel.de>
6# All rights reserved.
7#
8# Permission is hereby granted, free of charge, to any person
9# obtaining a copy of this software and associated documentation
10# files (the "Software"), to deal in the Software without
11# restriction, including without limitation the rights to use,
12# copy, modify, merge, publish, distribute, sublicense, and/or sell
13# copies of the Software, and to permit persons to whom the
14# Software is furnished to do so, subject to the following
15# conditions:
16#
17# The above copyright notice and this permission notice shall be
18# included in all copies or substantial portions of the Software.
19#
20# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
21# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
22# OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
23# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
24# HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
25# WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
26# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
27# OTHER DEALINGS IN THE SOFTWARE.
28#
29"""Read from and write to tar format archives.
30"""
31
Senthil Kumaran4af1c6a2011-07-28 22:30:27 +080032__version__ = "$Revision: 85213 $"
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +000033# $Source$
34
Lars Gustäbelc64e4022007-03-13 10:47:19 +000035version = "0.9.0"
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +000036__author__ = "Lars Gustäbel (lars@gustaebel.de)"
37__date__ = "$Date$"
38__cvsid__ = "$Id$"
39__credits__ = "Gustavo Niemeyer, Niels Gustäbel, Richard Townsend."
40
41#---------
42# Imports
43#---------
44import sys
45import os
46import shutil
47import stat
48import errno
49import time
50import struct
Georg Brandl3354f282006-10-29 09:16:12 +000051import copy
Lars Gustäbelc64e4022007-03-13 10:47:19 +000052import re
Brett Cannon132fc542008-08-04 21:23:07 +000053import operator
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +000054
55try:
56 import grp, pwd
57except ImportError:
58 grp = pwd = None
59
60# from tarfile import *
61__all__ = ["TarFile", "TarInfo", "is_tarfile", "TarError"]
62
63#---------------------------------------------------------
64# tar constants
65#---------------------------------------------------------
Lars Gustäbelc64e4022007-03-13 10:47:19 +000066NUL = "\0" # the null character
67BLOCKSIZE = 512 # length of processing blocks
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +000068RECORDSIZE = BLOCKSIZE * 20 # length of records
Lars Gustäbelc64e4022007-03-13 10:47:19 +000069GNU_MAGIC = "ustar \0" # magic gnu tar string
70POSIX_MAGIC = "ustar\x0000" # magic posix tar string
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +000071
Lars Gustäbelc64e4022007-03-13 10:47:19 +000072LENGTH_NAME = 100 # maximum length of a filename
73LENGTH_LINK = 100 # maximum length of a linkname
74LENGTH_PREFIX = 155 # maximum length of the prefix field
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +000075
Lars Gustäbelc64e4022007-03-13 10:47:19 +000076REGTYPE = "0" # regular file
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +000077AREGTYPE = "\0" # regular file
Lars Gustäbelc64e4022007-03-13 10:47:19 +000078LNKTYPE = "1" # link (inside tarfile)
79SYMTYPE = "2" # symbolic link
80CHRTYPE = "3" # character special device
81BLKTYPE = "4" # block special device
82DIRTYPE = "5" # directory
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +000083FIFOTYPE = "6" # fifo special device
84CONTTYPE = "7" # contiguous file
85
Lars Gustäbelc64e4022007-03-13 10:47:19 +000086GNUTYPE_LONGNAME = "L" # GNU tar longname
87GNUTYPE_LONGLINK = "K" # GNU tar longlink
88GNUTYPE_SPARSE = "S" # GNU tar sparse file
89
90XHDTYPE = "x" # POSIX.1-2001 extended header
91XGLTYPE = "g" # POSIX.1-2001 global header
92SOLARIS_XHDTYPE = "X" # Solaris extended header
93
94USTAR_FORMAT = 0 # POSIX.1-1988 (ustar) format
95GNU_FORMAT = 1 # GNU tar format
96PAX_FORMAT = 2 # POSIX.1-2001 (pax) format
97DEFAULT_FORMAT = GNU_FORMAT
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +000098
99#---------------------------------------------------------
100# tarfile constants
101#---------------------------------------------------------
Lars Gustäbelc64e4022007-03-13 10:47:19 +0000102# File types that tarfile supports:
103SUPPORTED_TYPES = (REGTYPE, AREGTYPE, LNKTYPE,
104 SYMTYPE, DIRTYPE, FIFOTYPE,
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000105 CONTTYPE, CHRTYPE, BLKTYPE,
106 GNUTYPE_LONGNAME, GNUTYPE_LONGLINK,
107 GNUTYPE_SPARSE)
108
Lars Gustäbelc64e4022007-03-13 10:47:19 +0000109# File types that will be treated as a regular file.
110REGULAR_TYPES = (REGTYPE, AREGTYPE,
111 CONTTYPE, GNUTYPE_SPARSE)
112
113# File types that are part of the GNU tar format.
114GNU_TYPES = (GNUTYPE_LONGNAME, GNUTYPE_LONGLINK,
115 GNUTYPE_SPARSE)
116
117# Fields from a pax header that override a TarInfo attribute.
118PAX_FIELDS = ("path", "linkpath", "size", "mtime",
119 "uid", "gid", "uname", "gname")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000120
Lars Gustäbela0fcb932007-05-27 19:49:30 +0000121# Fields in a pax header that are numbers, all other fields
122# are treated as strings.
123PAX_NUMBER_FIELDS = {
124 "atime": float,
125 "ctime": float,
126 "mtime": float,
127 "uid": int,
128 "gid": int,
129 "size": int
130}
131
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000132#---------------------------------------------------------
133# Bits used in the mode field, values in octal.
134#---------------------------------------------------------
135S_IFLNK = 0120000 # symbolic link
136S_IFREG = 0100000 # regular file
137S_IFBLK = 0060000 # block device
138S_IFDIR = 0040000 # directory
139S_IFCHR = 0020000 # character device
140S_IFIFO = 0010000 # fifo
141
142TSUID = 04000 # set UID on execution
143TSGID = 02000 # set GID on execution
144TSVTX = 01000 # reserved
145
146TUREAD = 0400 # read by owner
147TUWRITE = 0200 # write by owner
148TUEXEC = 0100 # execute/search by owner
149TGREAD = 0040 # read by group
150TGWRITE = 0020 # write by group
151TGEXEC = 0010 # execute/search by group
152TOREAD = 0004 # read by other
153TOWRITE = 0002 # write by other
154TOEXEC = 0001 # execute/search by other
155
156#---------------------------------------------------------
Lars Gustäbelc64e4022007-03-13 10:47:19 +0000157# initialization
158#---------------------------------------------------------
159ENCODING = sys.getfilesystemencoding()
160if ENCODING is None:
Lars Gustäbela0fcb932007-05-27 19:49:30 +0000161 ENCODING = sys.getdefaultencoding()
Lars Gustäbelc64e4022007-03-13 10:47:19 +0000162
163#---------------------------------------------------------
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000164# Some useful functions
165#---------------------------------------------------------
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000166
Georg Brandl38c6a222006-05-10 16:26:03 +0000167def stn(s, length):
168 """Convert a python string to a null-terminated string buffer.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000169 """
Georg Brandla32e0a02006-10-24 16:54:16 +0000170 return s[:length] + (length - len(s)) * NUL
Georg Brandl38c6a222006-05-10 16:26:03 +0000171
Lars Gustäbelc64e4022007-03-13 10:47:19 +0000172def nts(s):
173 """Convert a null-terminated string field to a python string.
174 """
175 # Use the string up to the first null char.
176 p = s.find("\0")
177 if p == -1:
178 return s
179 return s[:p]
180
Georg Brandl38c6a222006-05-10 16:26:03 +0000181def nti(s):
182 """Convert a number field to a python number.
183 """
184 # There are two possible encodings for a number field, see
185 # itn() below.
186 if s[0] != chr(0200):
Georg Brandlded1c4d2006-12-20 11:55:16 +0000187 try:
Lars Gustäbelc64e4022007-03-13 10:47:19 +0000188 n = int(nts(s) or "0", 8)
Georg Brandlded1c4d2006-12-20 11:55:16 +0000189 except ValueError:
Lars Gustäbeldd866d52009-11-22 18:30:53 +0000190 raise InvalidHeaderError("invalid header")
Georg Brandl38c6a222006-05-10 16:26:03 +0000191 else:
192 n = 0L
193 for i in xrange(len(s) - 1):
194 n <<= 8
195 n += ord(s[i + 1])
196 return n
197
Lars Gustäbelc64e4022007-03-13 10:47:19 +0000198def itn(n, digits=8, format=DEFAULT_FORMAT):
Georg Brandl38c6a222006-05-10 16:26:03 +0000199 """Convert a python number to a number field.
200 """
201 # POSIX 1003.1-1988 requires numbers to be encoded as a string of
202 # octal digits followed by a null-byte, this allows values up to
203 # (8**(digits-1))-1. GNU tar allows storing numbers greater than
204 # that if necessary. A leading 0200 byte indicates this particular
205 # encoding, the following digits-1 bytes are a big-endian
206 # representation. This allows values up to (256**(digits-1))-1.
207 if 0 <= n < 8 ** (digits - 1):
208 s = "%0*o" % (digits - 1, n) + NUL
209 else:
Lars Gustäbelc64e4022007-03-13 10:47:19 +0000210 if format != GNU_FORMAT or n >= 256 ** (digits - 1):
Georg Brandle4751e32006-05-18 06:11:19 +0000211 raise ValueError("overflow in number field")
Georg Brandl38c6a222006-05-10 16:26:03 +0000212
213 if n < 0:
214 # XXX We mimic GNU tar's behaviour with negative numbers,
215 # this could raise OverflowError.
216 n = struct.unpack("L", struct.pack("l", n))[0]
217
218 s = ""
219 for i in xrange(digits - 1):
220 s = chr(n & 0377) + s
221 n >>= 8
222 s = chr(0200) + s
223 return s
224
Lars Gustäbela0fcb932007-05-27 19:49:30 +0000225def uts(s, encoding, errors):
226 """Convert a unicode object to a string.
227 """
228 if errors == "utf-8":
229 # An extra error handler similar to the -o invalid=UTF-8 option
230 # in POSIX.1-2001. Replace untranslatable characters with their
231 # UTF-8 representation.
232 try:
233 return s.encode(encoding, "strict")
234 except UnicodeEncodeError:
235 x = []
236 for c in s:
237 try:
238 x.append(c.encode(encoding, "strict"))
239 except UnicodeEncodeError:
240 x.append(c.encode("utf8"))
241 return "".join(x)
242 else:
243 return s.encode(encoding, errors)
244
Georg Brandl38c6a222006-05-10 16:26:03 +0000245def calc_chksums(buf):
246 """Calculate the checksum for a member's header by summing up all
247 characters except for the chksum field which is treated as if
248 it was filled with spaces. According to the GNU tar sources,
249 some tars (Sun and NeXT) calculate chksum with signed char,
250 which will be different if there are chars in the buffer with
251 the high bit set. So we calculate two checksums, unsigned and
252 signed.
253 """
254 unsigned_chksum = 256 + sum(struct.unpack("148B", buf[:148]) + struct.unpack("356B", buf[156:512]))
255 signed_chksum = 256 + sum(struct.unpack("148b", buf[:148]) + struct.unpack("356b", buf[156:512]))
256 return unsigned_chksum, signed_chksum
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000257
258def copyfileobj(src, dst, length=None):
259 """Copy length bytes from fileobj src to fileobj dst.
260 If length is None, copy the entire content.
261 """
262 if length == 0:
263 return
264 if length is None:
265 shutil.copyfileobj(src, dst)
266 return
267
268 BUFSIZE = 16 * 1024
269 blocks, remainder = divmod(length, BUFSIZE)
270 for b in xrange(blocks):
271 buf = src.read(BUFSIZE)
272 if len(buf) < BUFSIZE:
Georg Brandle4751e32006-05-18 06:11:19 +0000273 raise IOError("end of file reached")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000274 dst.write(buf)
275
276 if remainder != 0:
277 buf = src.read(remainder)
278 if len(buf) < remainder:
Georg Brandle4751e32006-05-18 06:11:19 +0000279 raise IOError("end of file reached")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000280 dst.write(buf)
281 return
282
283filemode_table = (
Andrew M. Kuchling8bc462f2004-10-20 11:48:42 +0000284 ((S_IFLNK, "l"),
285 (S_IFREG, "-"),
286 (S_IFBLK, "b"),
287 (S_IFDIR, "d"),
288 (S_IFCHR, "c"),
289 (S_IFIFO, "p")),
290
291 ((TUREAD, "r"),),
292 ((TUWRITE, "w"),),
293 ((TUEXEC|TSUID, "s"),
294 (TSUID, "S"),
295 (TUEXEC, "x")),
296
297 ((TGREAD, "r"),),
298 ((TGWRITE, "w"),),
299 ((TGEXEC|TSGID, "s"),
300 (TSGID, "S"),
301 (TGEXEC, "x")),
302
303 ((TOREAD, "r"),),
304 ((TOWRITE, "w"),),
305 ((TOEXEC|TSVTX, "t"),
306 (TSVTX, "T"),
307 (TOEXEC, "x"))
308)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000309
310def filemode(mode):
311 """Convert a file's mode to a string of the form
312 -rwxrwxrwx.
313 Used by TarFile.list()
314 """
Andrew M. Kuchling8bc462f2004-10-20 11:48:42 +0000315 perm = []
316 for table in filemode_table:
317 for bit, char in table:
318 if mode & bit == bit:
319 perm.append(char)
320 break
321 else:
322 perm.append("-")
323 return "".join(perm)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000324
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000325class TarError(Exception):
326 """Base exception."""
327 pass
328class ExtractError(TarError):
329 """General exception for extract errors."""
330 pass
331class ReadError(TarError):
Ezio Melottif5469cf2013-08-17 15:43:51 +0300332 """Exception for unreadable tar archives."""
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000333 pass
334class CompressionError(TarError):
335 """Exception for unavailable compression methods."""
336 pass
337class StreamError(TarError):
338 """Exception for unsupported operations on stream-like TarFiles."""
339 pass
Georg Brandlebbeed72006-12-19 22:06:46 +0000340class HeaderError(TarError):
Lars Gustäbeldd866d52009-11-22 18:30:53 +0000341 """Base exception for header errors."""
342 pass
343class EmptyHeaderError(HeaderError):
344 """Exception for empty headers."""
345 pass
346class TruncatedHeaderError(HeaderError):
347 """Exception for truncated headers."""
348 pass
349class EOFHeaderError(HeaderError):
350 """Exception for end of file headers."""
351 pass
352class InvalidHeaderError(HeaderError):
Georg Brandlebbeed72006-12-19 22:06:46 +0000353 """Exception for invalid headers."""
354 pass
Lars Gustäbeldd866d52009-11-22 18:30:53 +0000355class SubsequentHeaderError(HeaderError):
356 """Exception for missing and invalid extended headers."""
357 pass
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000358
359#---------------------------
360# internal stream interface
361#---------------------------
362class _LowLevelFile:
363 """Low-level file object. Supports reading and writing.
364 It is used instead of a regular file object for streaming
365 access.
366 """
367
368 def __init__(self, name, mode):
369 mode = {
370 "r": os.O_RDONLY,
371 "w": os.O_WRONLY | os.O_CREAT | os.O_TRUNC,
372 }[mode]
373 if hasattr(os, "O_BINARY"):
374 mode |= os.O_BINARY
Lars Gustäbel5c4c4612010-04-29 15:23:38 +0000375 self.fd = os.open(name, mode, 0666)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000376
377 def close(self):
378 os.close(self.fd)
379
380 def read(self, size):
381 return os.read(self.fd, size)
382
383 def write(self, s):
384 os.write(self.fd, s)
385
386class _Stream:
387 """Class that serves as an adapter between TarFile and
388 a stream-like object. The stream-like object only
389 needs to have a read() or write() method and is accessed
390 blockwise. Use of gzip or bzip2 compression is possible.
391 A stream-like object could be for example: sys.stdin,
392 sys.stdout, a socket, a tape device etc.
393
394 _Stream is intended to be used only internally.
395 """
396
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000397 def __init__(self, name, mode, comptype, fileobj, bufsize):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000398 """Construct a _Stream object.
399 """
400 self._extfileobj = True
401 if fileobj is None:
402 fileobj = _LowLevelFile(name, mode)
403 self._extfileobj = False
404
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000405 if comptype == '*':
406 # Enable transparent compression detection for the
407 # stream interface
408 fileobj = _StreamProxy(fileobj)
409 comptype = fileobj.getcomptype()
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000410
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000411 self.name = name or ""
412 self.mode = mode
413 self.comptype = comptype
414 self.fileobj = fileobj
415 self.bufsize = bufsize
416 self.buf = ""
417 self.pos = 0L
418 self.closed = False
419
420 if comptype == "gz":
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000421 try:
422 import zlib
423 except ImportError:
Georg Brandle4751e32006-05-18 06:11:19 +0000424 raise CompressionError("zlib module is not available")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000425 self.zlib = zlib
Gregory P. Smith88440962008-03-25 06:12:45 +0000426 self.crc = zlib.crc32("") & 0xffffffffL
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000427 if mode == "r":
428 self._init_read_gz()
429 else:
430 self._init_write_gz()
431
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000432 if comptype == "bz2":
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000433 try:
434 import bz2
435 except ImportError:
Georg Brandle4751e32006-05-18 06:11:19 +0000436 raise CompressionError("bz2 module is not available")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000437 if mode == "r":
438 self.dbuf = ""
439 self.cmp = bz2.BZ2Decompressor()
440 else:
441 self.cmp = bz2.BZ2Compressor()
442
443 def __del__(self):
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000444 if hasattr(self, "closed") and not self.closed:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000445 self.close()
446
447 def _init_write_gz(self):
448 """Initialize for writing with gzip compression.
449 """
450 self.cmp = self.zlib.compressobj(9, self.zlib.DEFLATED,
451 -self.zlib.MAX_WBITS,
452 self.zlib.DEF_MEM_LEVEL,
453 0)
454 timestamp = struct.pack("<L", long(time.time()))
455 self.__write("\037\213\010\010%s\002\377" % timestamp)
Lars Gustäbel7d4d0742011-12-21 19:27:50 +0100456 if type(self.name) is unicode:
457 self.name = self.name.encode("iso-8859-1", "replace")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000458 if self.name.endswith(".gz"):
459 self.name = self.name[:-3]
460 self.__write(self.name + NUL)
461
462 def write(self, s):
463 """Write string s to the stream.
464 """
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000465 if self.comptype == "gz":
Gregory P. Smith88440962008-03-25 06:12:45 +0000466 self.crc = self.zlib.crc32(s, self.crc) & 0xffffffffL
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000467 self.pos += len(s)
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000468 if self.comptype != "tar":
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000469 s = self.cmp.compress(s)
470 self.__write(s)
471
472 def __write(self, s):
473 """Write string s to the stream if a whole new block
474 is ready to be written.
475 """
476 self.buf += s
477 while len(self.buf) > self.bufsize:
478 self.fileobj.write(self.buf[:self.bufsize])
479 self.buf = self.buf[self.bufsize:]
480
481 def close(self):
482 """Close the _Stream object. No operation should be
483 done on it afterwards.
484 """
485 if self.closed:
486 return
487
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000488 if self.mode == "w" and self.comptype != "tar":
Martin v. Löwisc234a522004-08-22 21:28:33 +0000489 self.buf += self.cmp.flush()
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000490
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000491 if self.mode == "w" and self.buf:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000492 self.fileobj.write(self.buf)
493 self.buf = ""
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000494 if self.comptype == "gz":
Tim Petersa05f6e22006-08-02 05:20:08 +0000495 # The native zlib crc is an unsigned 32-bit integer, but
496 # the Python wrapper implicitly casts that to a signed C
497 # long. So, on a 32-bit box self.crc may "look negative",
498 # while the same crc on a 64-bit box may "look positive".
499 # To avoid irksome warnings from the `struct` module, force
500 # it to look positive on all boxes.
501 self.fileobj.write(struct.pack("<L", self.crc & 0xffffffffL))
Andrew M. Kuchling10a44492003-10-24 17:38:34 +0000502 self.fileobj.write(struct.pack("<L", self.pos & 0xffffFFFFL))
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000503
504 if not self._extfileobj:
505 self.fileobj.close()
506
507 self.closed = True
508
509 def _init_read_gz(self):
510 """Initialize for reading a gzip compressed fileobj.
511 """
512 self.cmp = self.zlib.decompressobj(-self.zlib.MAX_WBITS)
513 self.dbuf = ""
514
515 # taken from gzip.GzipFile with some alterations
516 if self.__read(2) != "\037\213":
Georg Brandle4751e32006-05-18 06:11:19 +0000517 raise ReadError("not a gzip file")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000518 if self.__read(1) != "\010":
Georg Brandle4751e32006-05-18 06:11:19 +0000519 raise CompressionError("unsupported compression method")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000520
521 flag = ord(self.__read(1))
522 self.__read(6)
523
524 if flag & 4:
525 xlen = ord(self.__read(1)) + 256 * ord(self.__read(1))
526 self.read(xlen)
527 if flag & 8:
528 while True:
529 s = self.__read(1)
530 if not s or s == NUL:
531 break
532 if flag & 16:
533 while True:
534 s = self.__read(1)
535 if not s or s == NUL:
536 break
537 if flag & 2:
538 self.__read(2)
539
540 def tell(self):
541 """Return the stream's file pointer position.
542 """
543 return self.pos
544
545 def seek(self, pos=0):
546 """Set the stream's file pointer to pos. Negative seeking
547 is forbidden.
548 """
549 if pos - self.pos >= 0:
550 blocks, remainder = divmod(pos - self.pos, self.bufsize)
551 for i in xrange(blocks):
552 self.read(self.bufsize)
553 self.read(remainder)
554 else:
Georg Brandle4751e32006-05-18 06:11:19 +0000555 raise StreamError("seeking backwards is not allowed")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000556 return self.pos
557
558 def read(self, size=None):
559 """Return the next size number of bytes from the stream.
560 If size is not defined, return all bytes of the stream
561 up to EOF.
562 """
563 if size is None:
564 t = []
565 while True:
566 buf = self._read(self.bufsize)
567 if not buf:
568 break
569 t.append(buf)
570 buf = "".join(t)
571 else:
572 buf = self._read(size)
573 self.pos += len(buf)
574 return buf
575
576 def _read(self, size):
577 """Return size bytes from the stream.
578 """
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000579 if self.comptype == "tar":
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000580 return self.__read(size)
581
582 c = len(self.dbuf)
583 t = [self.dbuf]
584 while c < size:
585 buf = self.__read(self.bufsize)
586 if not buf:
587 break
Lars Gustäbelc64e4022007-03-13 10:47:19 +0000588 try:
589 buf = self.cmp.decompress(buf)
590 except IOError:
591 raise ReadError("invalid compressed data")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000592 t.append(buf)
593 c += len(buf)
594 t = "".join(t)
595 self.dbuf = t[size:]
596 return t[:size]
597
598 def __read(self, size):
599 """Return size bytes from stream. If internal buffer is empty,
600 read another block from the stream.
601 """
602 c = len(self.buf)
603 t = [self.buf]
604 while c < size:
605 buf = self.fileobj.read(self.bufsize)
606 if not buf:
607 break
608 t.append(buf)
609 c += len(buf)
610 t = "".join(t)
611 self.buf = t[size:]
612 return t[:size]
613# class _Stream
614
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000615class _StreamProxy(object):
616 """Small proxy class that enables transparent compression
617 detection for the Stream interface (mode 'r|*').
618 """
619
620 def __init__(self, fileobj):
621 self.fileobj = fileobj
622 self.buf = self.fileobj.read(BLOCKSIZE)
623
624 def read(self, size):
625 self.read = self.fileobj.read
626 return self.buf
627
628 def getcomptype(self):
629 if self.buf.startswith("\037\213\010"):
630 return "gz"
Lars Gustäbel9a388632011-12-06 13:07:09 +0100631 if self.buf[0:3] == "BZh" and self.buf[4:10] == "1AY&SY":
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000632 return "bz2"
633 return "tar"
634
635 def close(self):
636 self.fileobj.close()
637# class StreamProxy
638
Georg Brandl49c8f4c2006-05-15 19:30:35 +0000639class _BZ2Proxy(object):
640 """Small proxy class that enables external file object
641 support for "r:bz2" and "w:bz2" modes. This is actually
642 a workaround for a limitation in bz2 module's BZ2File
643 class which (unlike gzip.GzipFile) has no support for
644 a file object argument.
645 """
646
647 blocksize = 16 * 1024
648
649 def __init__(self, fileobj, mode):
650 self.fileobj = fileobj
651 self.mode = mode
Lars Gustäbelc64e4022007-03-13 10:47:19 +0000652 self.name = getattr(self.fileobj, "name", None)
Georg Brandl49c8f4c2006-05-15 19:30:35 +0000653 self.init()
654
655 def init(self):
656 import bz2
657 self.pos = 0
658 if self.mode == "r":
659 self.bz2obj = bz2.BZ2Decompressor()
660 self.fileobj.seek(0)
661 self.buf = ""
662 else:
663 self.bz2obj = bz2.BZ2Compressor()
664
665 def read(self, size):
666 b = [self.buf]
667 x = len(self.buf)
668 while x < size:
Lars Gustäbel2020a592009-03-22 20:09:33 +0000669 raw = self.fileobj.read(self.blocksize)
670 if not raw:
Georg Brandl49c8f4c2006-05-15 19:30:35 +0000671 break
Lars Gustäbel2020a592009-03-22 20:09:33 +0000672 data = self.bz2obj.decompress(raw)
673 b.append(data)
Georg Brandl49c8f4c2006-05-15 19:30:35 +0000674 x += len(data)
675 self.buf = "".join(b)
676
677 buf = self.buf[:size]
678 self.buf = self.buf[size:]
679 self.pos += len(buf)
680 return buf
681
682 def seek(self, pos):
683 if pos < self.pos:
684 self.init()
685 self.read(pos - self.pos)
686
687 def tell(self):
688 return self.pos
689
690 def write(self, data):
691 self.pos += len(data)
692 raw = self.bz2obj.compress(data)
693 self.fileobj.write(raw)
694
695 def close(self):
696 if self.mode == "w":
697 raw = self.bz2obj.flush()
698 self.fileobj.write(raw)
Georg Brandl49c8f4c2006-05-15 19:30:35 +0000699# class _BZ2Proxy
700
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000701#------------------------
702# Extraction file object
703#------------------------
Lars Gustäbel6baa5022006-12-23 16:40:13 +0000704class _FileInFile(object):
705 """A thin wrapper around an existing file object that
706 provides a part of its data as an individual file
707 object.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000708 """
709
Lars Gustäbel6baa5022006-12-23 16:40:13 +0000710 def __init__(self, fileobj, offset, size, sparse=None):
711 self.fileobj = fileobj
712 self.offset = offset
713 self.size = size
714 self.sparse = sparse
715 self.position = 0
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000716
Lars Gustäbel6baa5022006-12-23 16:40:13 +0000717 def tell(self):
718 """Return the current file position.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000719 """
Lars Gustäbel6baa5022006-12-23 16:40:13 +0000720 return self.position
721
722 def seek(self, position):
723 """Seek to a position in the file.
724 """
725 self.position = position
726
727 def read(self, size=None):
728 """Read data from the file.
729 """
730 if size is None:
731 size = self.size - self.position
732 else:
733 size = min(size, self.size - self.position)
734
735 if self.sparse is None:
736 return self.readnormal(size)
737 else:
738 return self.readsparse(size)
739
740 def readnormal(self, size):
741 """Read operation for regular files.
742 """
743 self.fileobj.seek(self.offset + self.position)
744 self.position += size
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000745 return self.fileobj.read(size)
746
Lars Gustäbel6baa5022006-12-23 16:40:13 +0000747 def readsparse(self, size):
748 """Read operation for sparse files.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000749 """
Lars Gustäbel6baa5022006-12-23 16:40:13 +0000750 data = []
751 while size > 0:
752 buf = self.readsparsesection(size)
753 if not buf:
754 break
755 size -= len(buf)
756 data.append(buf)
757 return "".join(data)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000758
Lars Gustäbel6baa5022006-12-23 16:40:13 +0000759 def readsparsesection(self, size):
760 """Read a single section of a sparse file.
761 """
762 section = self.sparse.find(self.position)
763
764 if section is None:
765 return ""
766
767 size = min(size, section.offset + section.size - self.position)
768
769 if isinstance(section, _data):
770 realpos = section.realpos + self.position - section.offset
771 self.fileobj.seek(self.offset + realpos)
772 self.position += size
773 return self.fileobj.read(size)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000774 else:
Lars Gustäbel6baa5022006-12-23 16:40:13 +0000775 self.position += size
776 return NUL * size
777#class _FileInFile
778
779
780class ExFileObject(object):
781 """File-like object for reading an archive member.
782 Is returned by TarFile.extractfile().
783 """
784 blocksize = 1024
785
786 def __init__(self, tarfile, tarinfo):
787 self.fileobj = _FileInFile(tarfile.fileobj,
788 tarinfo.offset_data,
789 tarinfo.size,
790 getattr(tarinfo, "sparse", None))
791 self.name = tarinfo.name
792 self.mode = "r"
793 self.closed = False
794 self.size = tarinfo.size
795
796 self.position = 0
797 self.buffer = ""
798
799 def read(self, size=None):
800 """Read at most size bytes from the file. If size is not
801 present or None, read all data until EOF is reached.
802 """
803 if self.closed:
804 raise ValueError("I/O operation on closed file")
805
806 buf = ""
807 if self.buffer:
808 if size is None:
809 buf = self.buffer
810 self.buffer = ""
811 else:
812 buf = self.buffer[:size]
813 self.buffer = self.buffer[size:]
814
815 if size is None:
816 buf += self.fileobj.read()
817 else:
818 buf += self.fileobj.read(size - len(buf))
819
820 self.position += len(buf)
821 return buf
822
823 def readline(self, size=-1):
824 """Read one entire line from the file. If size is present
825 and non-negative, return a string with at most that
826 size, which may be an incomplete line.
827 """
828 if self.closed:
829 raise ValueError("I/O operation on closed file")
830
831 if "\n" in self.buffer:
832 pos = self.buffer.find("\n") + 1
833 else:
834 buffers = [self.buffer]
835 while True:
836 buf = self.fileobj.read(self.blocksize)
837 buffers.append(buf)
838 if not buf or "\n" in buf:
839 self.buffer = "".join(buffers)
840 pos = self.buffer.find("\n") + 1
841 if pos == 0:
842 # no newline found.
843 pos = len(self.buffer)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000844 break
Lars Gustäbel6baa5022006-12-23 16:40:13 +0000845
846 if size != -1:
847 pos = min(size, pos)
848
849 buf = self.buffer[:pos]
850 self.buffer = self.buffer[pos:]
851 self.position += len(buf)
852 return buf
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000853
854 def readlines(self):
Lars Gustäbel6baa5022006-12-23 16:40:13 +0000855 """Return a list with all remaining lines.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000856 """
857 result = []
858 while True:
859 line = self.readline()
860 if not line: break
861 result.append(line)
862 return result
863
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000864 def tell(self):
865 """Return the current file position.
866 """
Lars Gustäbel6baa5022006-12-23 16:40:13 +0000867 if self.closed:
868 raise ValueError("I/O operation on closed file")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000869
Lars Gustäbel6baa5022006-12-23 16:40:13 +0000870 return self.position
871
872 def seek(self, pos, whence=os.SEEK_SET):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000873 """Seek to a position in the file.
874 """
Lars Gustäbel6baa5022006-12-23 16:40:13 +0000875 if self.closed:
876 raise ValueError("I/O operation on closed file")
877
878 if whence == os.SEEK_SET:
879 self.position = min(max(pos, 0), self.size)
880 elif whence == os.SEEK_CUR:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000881 if pos < 0:
Lars Gustäbel6baa5022006-12-23 16:40:13 +0000882 self.position = max(self.position + pos, 0)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000883 else:
Lars Gustäbel6baa5022006-12-23 16:40:13 +0000884 self.position = min(self.position + pos, self.size)
885 elif whence == os.SEEK_END:
886 self.position = max(min(self.size + pos, self.size), 0)
887 else:
888 raise ValueError("Invalid argument")
889
890 self.buffer = ""
891 self.fileobj.seek(self.position)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000892
893 def close(self):
894 """Close the file object.
895 """
896 self.closed = True
Martin v. Löwisdf241532005-03-03 08:17:42 +0000897
898 def __iter__(self):
Lars Gustäbel6baa5022006-12-23 16:40:13 +0000899 """Get an iterator over the file's lines.
Martin v. Löwisdf241532005-03-03 08:17:42 +0000900 """
Lars Gustäbel6baa5022006-12-23 16:40:13 +0000901 while True:
902 line = self.readline()
903 if not line:
904 break
905 yield line
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000906#class ExFileObject
907
908#------------------
909# Exported Classes
910#------------------
911class TarInfo(object):
912 """Informational class which holds the details about an
913 archive member given by a tar header block.
914 TarInfo objects are returned by TarFile.getmember(),
915 TarFile.getmembers() and TarFile.gettarinfo() and are
916 usually created internally.
917 """
918
919 def __init__(self, name=""):
920 """Construct a TarInfo object. name is the optional name
921 of the member.
922 """
Lars Gustäbelc64e4022007-03-13 10:47:19 +0000923 self.name = name # member name
924 self.mode = 0644 # file permissions
Georg Brandl38c6a222006-05-10 16:26:03 +0000925 self.uid = 0 # user id
926 self.gid = 0 # group id
927 self.size = 0 # file size
928 self.mtime = 0 # modification time
929 self.chksum = 0 # header checksum
930 self.type = REGTYPE # member type
931 self.linkname = "" # link name
Lars Gustäbel6aab8d02010-10-04 15:37:53 +0000932 self.uname = "" # user name
933 self.gname = "" # group name
Georg Brandl38c6a222006-05-10 16:26:03 +0000934 self.devmajor = 0 # device major number
935 self.devminor = 0 # device minor number
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000936
Georg Brandl38c6a222006-05-10 16:26:03 +0000937 self.offset = 0 # the tar header starts here
938 self.offset_data = 0 # the file's data starts here
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000939
Lars Gustäbelc64e4022007-03-13 10:47:19 +0000940 self.pax_headers = {} # pax header information
941
942 # In pax headers the "name" and "linkname" field are called
943 # "path" and "linkpath".
944 def _getpath(self):
945 return self.name
946 def _setpath(self, name):
947 self.name = name
948 path = property(_getpath, _setpath)
949
950 def _getlinkpath(self):
951 return self.linkname
952 def _setlinkpath(self, linkname):
953 self.linkname = linkname
954 linkpath = property(_getlinkpath, _setlinkpath)
955
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000956 def __repr__(self):
957 return "<%s %r at %#x>" % (self.__class__.__name__,self.name,id(self))
958
Lars Gustäbela0fcb932007-05-27 19:49:30 +0000959 def get_info(self, encoding, errors):
Lars Gustäbelc64e4022007-03-13 10:47:19 +0000960 """Return the TarInfo's attributes as a dictionary.
961 """
962 info = {
Lars Gustäbelf7cda522009-08-28 19:23:44 +0000963 "name": self.name,
Lars Gustäbelc64e4022007-03-13 10:47:19 +0000964 "mode": self.mode & 07777,
965 "uid": self.uid,
966 "gid": self.gid,
967 "size": self.size,
968 "mtime": self.mtime,
969 "chksum": self.chksum,
970 "type": self.type,
Lars Gustäbelf7cda522009-08-28 19:23:44 +0000971 "linkname": self.linkname,
Lars Gustäbelc64e4022007-03-13 10:47:19 +0000972 "uname": self.uname,
973 "gname": self.gname,
974 "devmajor": self.devmajor,
975 "devminor": self.devminor
976 }
977
978 if info["type"] == DIRTYPE and not info["name"].endswith("/"):
979 info["name"] += "/"
980
Lars Gustäbela0fcb932007-05-27 19:49:30 +0000981 for key in ("name", "linkname", "uname", "gname"):
982 if type(info[key]) is unicode:
983 info[key] = info[key].encode(encoding, errors)
984
Lars Gustäbelc64e4022007-03-13 10:47:19 +0000985 return info
986
Lars Gustäbela0fcb932007-05-27 19:49:30 +0000987 def tobuf(self, format=DEFAULT_FORMAT, encoding=ENCODING, errors="strict"):
Lars Gustäbelc64e4022007-03-13 10:47:19 +0000988 """Return a tar header as a string of 512 byte blocks.
989 """
Lars Gustäbela0fcb932007-05-27 19:49:30 +0000990 info = self.get_info(encoding, errors)
991
Lars Gustäbelc64e4022007-03-13 10:47:19 +0000992 if format == USTAR_FORMAT:
Lars Gustäbela0fcb932007-05-27 19:49:30 +0000993 return self.create_ustar_header(info)
Lars Gustäbelc64e4022007-03-13 10:47:19 +0000994 elif format == GNU_FORMAT:
Lars Gustäbela0fcb932007-05-27 19:49:30 +0000995 return self.create_gnu_header(info)
Lars Gustäbelc64e4022007-03-13 10:47:19 +0000996 elif format == PAX_FORMAT:
Lars Gustäbela0fcb932007-05-27 19:49:30 +0000997 return self.create_pax_header(info, encoding, errors)
Lars Gustäbelc64e4022007-03-13 10:47:19 +0000998 else:
999 raise ValueError("invalid format")
1000
Lars Gustäbela0fcb932007-05-27 19:49:30 +00001001 def create_ustar_header(self, info):
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001002 """Return the object as a ustar header block.
1003 """
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001004 info["magic"] = POSIX_MAGIC
1005
1006 if len(info["linkname"]) > LENGTH_LINK:
1007 raise ValueError("linkname is too long")
1008
1009 if len(info["name"]) > LENGTH_NAME:
1010 info["prefix"], info["name"] = self._posix_split_name(info["name"])
1011
1012 return self._create_header(info, USTAR_FORMAT)
1013
Lars Gustäbela0fcb932007-05-27 19:49:30 +00001014 def create_gnu_header(self, info):
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001015 """Return the object as a GNU header block sequence.
1016 """
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001017 info["magic"] = GNU_MAGIC
1018
1019 buf = ""
1020 if len(info["linkname"]) > LENGTH_LINK:
1021 buf += self._create_gnu_long_header(info["linkname"], GNUTYPE_LONGLINK)
1022
1023 if len(info["name"]) > LENGTH_NAME:
1024 buf += self._create_gnu_long_header(info["name"], GNUTYPE_LONGNAME)
1025
1026 return buf + self._create_header(info, GNU_FORMAT)
1027
Lars Gustäbela0fcb932007-05-27 19:49:30 +00001028 def create_pax_header(self, info, encoding, errors):
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001029 """Return the object as a ustar header block. If it cannot be
1030 represented this way, prepend a pax extended header sequence
1031 with supplement information.
1032 """
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001033 info["magic"] = POSIX_MAGIC
1034 pax_headers = self.pax_headers.copy()
1035
1036 # Test string fields for values that exceed the field length or cannot
1037 # be represented in ASCII encoding.
1038 for name, hname, length in (
1039 ("name", "path", LENGTH_NAME), ("linkname", "linkpath", LENGTH_LINK),
1040 ("uname", "uname", 32), ("gname", "gname", 32)):
1041
Lars Gustäbela0fcb932007-05-27 19:49:30 +00001042 if hname in pax_headers:
1043 # The pax header has priority.
1044 continue
1045
1046 val = info[name].decode(encoding, errors)
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001047
1048 # Try to encode the string as ASCII.
1049 try:
1050 val.encode("ascii")
1051 except UnicodeEncodeError:
1052 pax_headers[hname] = val
1053 continue
1054
Lars Gustäbela0fcb932007-05-27 19:49:30 +00001055 if len(info[name]) > length:
1056 pax_headers[hname] = val
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001057
1058 # Test number fields for values that exceed the field limit or values
1059 # that like to be stored as float.
1060 for name, digits in (("uid", 8), ("gid", 8), ("size", 12), ("mtime", 12)):
Lars Gustäbela0fcb932007-05-27 19:49:30 +00001061 if name in pax_headers:
1062 # The pax header has priority. Avoid overflow.
1063 info[name] = 0
1064 continue
1065
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001066 val = info[name]
1067 if not 0 <= val < 8 ** (digits - 1) or isinstance(val, float):
1068 pax_headers[name] = unicode(val)
1069 info[name] = 0
1070
Lars Gustäbela0fcb932007-05-27 19:49:30 +00001071 # Create a pax extended header if necessary.
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001072 if pax_headers:
1073 buf = self._create_pax_generic_header(pax_headers)
1074 else:
1075 buf = ""
1076
1077 return buf + self._create_header(info, USTAR_FORMAT)
1078
1079 @classmethod
Lars Gustäbela0fcb932007-05-27 19:49:30 +00001080 def create_pax_global_header(cls, pax_headers):
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001081 """Return the object as a pax global header block sequence.
1082 """
Lars Gustäbela0fcb932007-05-27 19:49:30 +00001083 return cls._create_pax_generic_header(pax_headers, type=XGLTYPE)
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001084
1085 def _posix_split_name(self, name):
1086 """Split a name longer than 100 chars into a prefix
1087 and a name part.
1088 """
1089 prefix = name[:LENGTH_PREFIX + 1]
1090 while prefix and prefix[-1] != "/":
1091 prefix = prefix[:-1]
1092
1093 name = name[len(prefix):]
1094 prefix = prefix[:-1]
1095
1096 if not prefix or len(name) > LENGTH_NAME:
1097 raise ValueError("name is too long")
1098 return prefix, name
1099
1100 @staticmethod
1101 def _create_header(info, format):
1102 """Return a header block. info is a dictionary with file
1103 information, format must be one of the *_FORMAT constants.
1104 """
1105 parts = [
1106 stn(info.get("name", ""), 100),
1107 itn(info.get("mode", 0) & 07777, 8, format),
1108 itn(info.get("uid", 0), 8, format),
1109 itn(info.get("gid", 0), 8, format),
1110 itn(info.get("size", 0), 12, format),
1111 itn(info.get("mtime", 0), 12, format),
1112 " ", # checksum field
1113 info.get("type", REGTYPE),
1114 stn(info.get("linkname", ""), 100),
Lars Gustäbela0fcb932007-05-27 19:49:30 +00001115 stn(info.get("magic", POSIX_MAGIC), 8),
Lars Gustäbel6aab8d02010-10-04 15:37:53 +00001116 stn(info.get("uname", ""), 32),
1117 stn(info.get("gname", ""), 32),
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001118 itn(info.get("devmajor", 0), 8, format),
1119 itn(info.get("devminor", 0), 8, format),
1120 stn(info.get("prefix", ""), 155)
1121 ]
1122
1123 buf = struct.pack("%ds" % BLOCKSIZE, "".join(parts))
1124 chksum = calc_chksums(buf[-BLOCKSIZE:])[0]
1125 buf = buf[:-364] + "%06o\0" % chksum + buf[-357:]
1126 return buf
1127
1128 @staticmethod
1129 def _create_payload(payload):
1130 """Return the string payload filled with zero bytes
1131 up to the next 512 byte border.
1132 """
1133 blocks, remainder = divmod(len(payload), BLOCKSIZE)
1134 if remainder > 0:
1135 payload += (BLOCKSIZE - remainder) * NUL
1136 return payload
1137
1138 @classmethod
1139 def _create_gnu_long_header(cls, name, type):
1140 """Return a GNUTYPE_LONGNAME or GNUTYPE_LONGLINK sequence
1141 for name.
1142 """
1143 name += NUL
1144
1145 info = {}
1146 info["name"] = "././@LongLink"
1147 info["type"] = type
1148 info["size"] = len(name)
1149 info["magic"] = GNU_MAGIC
1150
1151 # create extended header + name blocks.
1152 return cls._create_header(info, USTAR_FORMAT) + \
1153 cls._create_payload(name)
1154
1155 @classmethod
1156 def _create_pax_generic_header(cls, pax_headers, type=XHDTYPE):
1157 """Return a POSIX.1-2001 extended or global header sequence
1158 that contains a list of keyword, value pairs. The values
1159 must be unicode objects.
1160 """
1161 records = []
1162 for keyword, value in pax_headers.iteritems():
1163 keyword = keyword.encode("utf8")
1164 value = value.encode("utf8")
1165 l = len(keyword) + len(value) + 3 # ' ' + '=' + '\n'
1166 n = p = 0
1167 while True:
1168 n = l + len(str(p))
1169 if n == p:
1170 break
1171 p = n
1172 records.append("%d %s=%s\n" % (p, keyword, value))
1173 records = "".join(records)
1174
1175 # We use a hardcoded "././@PaxHeader" name like star does
1176 # instead of the one that POSIX recommends.
1177 info = {}
1178 info["name"] = "././@PaxHeader"
1179 info["type"] = type
1180 info["size"] = len(records)
1181 info["magic"] = POSIX_MAGIC
1182
1183 # Create pax header + record blocks.
1184 return cls._create_header(info, USTAR_FORMAT) + \
1185 cls._create_payload(records)
1186
Guido van Rossum75b64e62005-01-16 00:16:11 +00001187 @classmethod
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001188 def frombuf(cls, buf):
1189 """Construct a TarInfo object from a 512 byte string buffer.
1190 """
Lars Gustäbeldd866d52009-11-22 18:30:53 +00001191 if len(buf) == 0:
1192 raise EmptyHeaderError("empty header")
Georg Brandl38c6a222006-05-10 16:26:03 +00001193 if len(buf) != BLOCKSIZE:
Lars Gustäbeldd866d52009-11-22 18:30:53 +00001194 raise TruncatedHeaderError("truncated header")
Georg Brandl38c6a222006-05-10 16:26:03 +00001195 if buf.count(NUL) == BLOCKSIZE:
Lars Gustäbeldd866d52009-11-22 18:30:53 +00001196 raise EOFHeaderError("end of file header")
Georg Brandlebbeed72006-12-19 22:06:46 +00001197
Georg Brandlded1c4d2006-12-20 11:55:16 +00001198 chksum = nti(buf[148:156])
Georg Brandlebbeed72006-12-19 22:06:46 +00001199 if chksum not in calc_chksums(buf):
Lars Gustäbeldd866d52009-11-22 18:30:53 +00001200 raise InvalidHeaderError("bad checksum")
Georg Brandl38c6a222006-05-10 16:26:03 +00001201
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001202 obj = cls()
1203 obj.buf = buf
1204 obj.name = nts(buf[0:100])
1205 obj.mode = nti(buf[100:108])
1206 obj.uid = nti(buf[108:116])
1207 obj.gid = nti(buf[116:124])
1208 obj.size = nti(buf[124:136])
1209 obj.mtime = nti(buf[136:148])
1210 obj.chksum = chksum
1211 obj.type = buf[156:157]
1212 obj.linkname = nts(buf[157:257])
1213 obj.uname = nts(buf[265:297])
1214 obj.gname = nts(buf[297:329])
1215 obj.devmajor = nti(buf[329:337])
1216 obj.devminor = nti(buf[337:345])
1217 prefix = nts(buf[345:500])
Georg Brandl3354f282006-10-29 09:16:12 +00001218
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001219 # Old V7 tar format represents a directory as a regular
1220 # file with a trailing slash.
1221 if obj.type == AREGTYPE and obj.name.endswith("/"):
1222 obj.type = DIRTYPE
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001223
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001224 # Remove redundant slashes from directories.
1225 if obj.isdir():
1226 obj.name = obj.name.rstrip("/")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001227
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001228 # Reconstruct a ustar longname.
1229 if prefix and obj.type not in GNU_TYPES:
1230 obj.name = prefix + "/" + obj.name
1231 return obj
1232
1233 @classmethod
1234 def fromtarfile(cls, tarfile):
1235 """Return the next TarInfo object from TarFile object
1236 tarfile.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001237 """
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001238 buf = tarfile.fileobj.read(BLOCKSIZE)
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001239 obj = cls.frombuf(buf)
1240 obj.offset = tarfile.fileobj.tell() - BLOCKSIZE
1241 return obj._proc_member(tarfile)
Georg Brandl3354f282006-10-29 09:16:12 +00001242
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001243 #--------------------------------------------------------------------------
1244 # The following are methods that are called depending on the type of a
1245 # member. The entry point is _proc_member() which can be overridden in a
1246 # subclass to add custom _proc_*() methods. A _proc_*() method MUST
1247 # implement the following
1248 # operations:
1249 # 1. Set self.offset_data to the position where the data blocks begin,
1250 # if there is data that follows.
1251 # 2. Set tarfile.offset to the position where the next member's header will
1252 # begin.
1253 # 3. Return self or another valid TarInfo object.
1254 def _proc_member(self, tarfile):
1255 """Choose the right processing method depending on
1256 the type and call it.
Georg Brandl3354f282006-10-29 09:16:12 +00001257 """
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001258 if self.type in (GNUTYPE_LONGNAME, GNUTYPE_LONGLINK):
1259 return self._proc_gnulong(tarfile)
1260 elif self.type == GNUTYPE_SPARSE:
1261 return self._proc_sparse(tarfile)
1262 elif self.type in (XHDTYPE, XGLTYPE, SOLARIS_XHDTYPE):
1263 return self._proc_pax(tarfile)
1264 else:
1265 return self._proc_builtin(tarfile)
Georg Brandl3354f282006-10-29 09:16:12 +00001266
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001267 def _proc_builtin(self, tarfile):
1268 """Process a builtin type or an unknown type which
1269 will be treated as a regular file.
1270 """
1271 self.offset_data = tarfile.fileobj.tell()
1272 offset = self.offset_data
1273 if self.isreg() or self.type not in SUPPORTED_TYPES:
1274 # Skip the following data blocks.
1275 offset += self._block(self.size)
1276 tarfile.offset = offset
Georg Brandl3354f282006-10-29 09:16:12 +00001277
Lars Gustäbela0fcb932007-05-27 19:49:30 +00001278 # Patch the TarInfo object with saved global
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001279 # header information.
Lars Gustäbela0fcb932007-05-27 19:49:30 +00001280 self._apply_pax_info(tarfile.pax_headers, tarfile.encoding, tarfile.errors)
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001281
1282 return self
1283
1284 def _proc_gnulong(self, tarfile):
1285 """Process the blocks that hold a GNU longname
1286 or longlink member.
1287 """
1288 buf = tarfile.fileobj.read(self._block(self.size))
1289
1290 # Fetch the next header and process it.
Lars Gustäbeldd866d52009-11-22 18:30:53 +00001291 try:
1292 next = self.fromtarfile(tarfile)
1293 except HeaderError:
1294 raise SubsequentHeaderError("missing or bad subsequent header")
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001295
1296 # Patch the TarInfo object from the next header with
1297 # the longname information.
1298 next.offset = self.offset
1299 if self.type == GNUTYPE_LONGNAME:
Lars Gustäbela0fcb932007-05-27 19:49:30 +00001300 next.name = nts(buf)
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001301 elif self.type == GNUTYPE_LONGLINK:
Lars Gustäbela0fcb932007-05-27 19:49:30 +00001302 next.linkname = nts(buf)
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001303
1304 return next
1305
1306 def _proc_sparse(self, tarfile):
1307 """Process a GNU sparse header plus extra headers.
1308 """
1309 buf = self.buf
1310 sp = _ringbuffer()
1311 pos = 386
1312 lastpos = 0L
1313 realpos = 0L
1314 # There are 4 possible sparse structs in the
1315 # first header.
1316 for i in xrange(4):
1317 try:
1318 offset = nti(buf[pos:pos + 12])
1319 numbytes = nti(buf[pos + 12:pos + 24])
1320 except ValueError:
1321 break
1322 if offset > lastpos:
1323 sp.append(_hole(lastpos, offset - lastpos))
1324 sp.append(_data(offset, numbytes, realpos))
1325 realpos += numbytes
1326 lastpos = offset + numbytes
1327 pos += 24
1328
1329 isextended = ord(buf[482])
1330 origsize = nti(buf[483:495])
1331
1332 # If the isextended flag is given,
1333 # there are extra headers to process.
1334 while isextended == 1:
1335 buf = tarfile.fileobj.read(BLOCKSIZE)
1336 pos = 0
1337 for i in xrange(21):
1338 try:
1339 offset = nti(buf[pos:pos + 12])
1340 numbytes = nti(buf[pos + 12:pos + 24])
1341 except ValueError:
1342 break
1343 if offset > lastpos:
1344 sp.append(_hole(lastpos, offset - lastpos))
1345 sp.append(_data(offset, numbytes, realpos))
1346 realpos += numbytes
1347 lastpos = offset + numbytes
1348 pos += 24
1349 isextended = ord(buf[504])
1350
1351 if lastpos < origsize:
1352 sp.append(_hole(lastpos, origsize - lastpos))
1353
1354 self.sparse = sp
1355
1356 self.offset_data = tarfile.fileobj.tell()
1357 tarfile.offset = self.offset_data + self._block(self.size)
1358 self.size = origsize
1359
1360 return self
1361
1362 def _proc_pax(self, tarfile):
1363 """Process an extended or global header as described in
1364 POSIX.1-2001.
1365 """
1366 # Read the header information.
1367 buf = tarfile.fileobj.read(self._block(self.size))
1368
1369 # A pax header stores supplemental information for either
1370 # the following file (extended) or all following files
1371 # (global).
1372 if self.type == XGLTYPE:
1373 pax_headers = tarfile.pax_headers
1374 else:
1375 pax_headers = tarfile.pax_headers.copy()
1376
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001377 # Parse pax header information. A record looks like that:
1378 # "%d %s=%s\n" % (length, keyword, value). length is the size
1379 # of the complete record including the length field itself and
Lars Gustäbela0fcb932007-05-27 19:49:30 +00001380 # the newline. keyword and value are both UTF-8 encoded strings.
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001381 regex = re.compile(r"(\d+) ([^=]+)=", re.U)
1382 pos = 0
1383 while True:
1384 match = regex.match(buf, pos)
1385 if not match:
1386 break
1387
1388 length, keyword = match.groups()
1389 length = int(length)
1390 value = buf[match.end(2) + 1:match.start(1) + length - 1]
1391
1392 keyword = keyword.decode("utf8")
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001393 value = value.decode("utf8")
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001394
1395 pax_headers[keyword] = value
1396 pos += length
1397
Lars Gustäbela0fcb932007-05-27 19:49:30 +00001398 # Fetch the next header.
Lars Gustäbeldd866d52009-11-22 18:30:53 +00001399 try:
1400 next = self.fromtarfile(tarfile)
1401 except HeaderError:
1402 raise SubsequentHeaderError("missing or bad subsequent header")
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001403
Lars Gustäbela0fcb932007-05-27 19:49:30 +00001404 if self.type in (XHDTYPE, SOLARIS_XHDTYPE):
Lars Gustäbela0fcb932007-05-27 19:49:30 +00001405 # Patch the TarInfo object with the extended header info.
1406 next._apply_pax_info(pax_headers, tarfile.encoding, tarfile.errors)
1407 next.offset = self.offset
1408
Brett Cannon132fc542008-08-04 21:23:07 +00001409 if "size" in pax_headers:
Lars Gustäbela0fcb932007-05-27 19:49:30 +00001410 # If the extended header replaces the size field,
1411 # we need to recalculate the offset where the next
1412 # header starts.
1413 offset = next.offset_data
1414 if next.isreg() or next.type not in SUPPORTED_TYPES:
1415 offset += next._block(next.size)
1416 tarfile.offset = offset
1417
1418 return next
1419
1420 def _apply_pax_info(self, pax_headers, encoding, errors):
1421 """Replace fields with supplemental information from a previous
1422 pax extended or global header.
1423 """
1424 for keyword, value in pax_headers.iteritems():
1425 if keyword not in PAX_FIELDS:
1426 continue
1427
1428 if keyword == "path":
1429 value = value.rstrip("/")
1430
1431 if keyword in PAX_NUMBER_FIELDS:
1432 try:
1433 value = PAX_NUMBER_FIELDS[keyword](value)
1434 except ValueError:
1435 value = 0
1436 else:
1437 value = uts(value, encoding, errors)
1438
1439 setattr(self, keyword, value)
1440
1441 self.pax_headers = pax_headers.copy()
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001442
1443 def _block(self, count):
1444 """Round up a byte count by BLOCKSIZE and return it,
1445 e.g. _block(834) => 1024.
1446 """
1447 blocks, remainder = divmod(count, BLOCKSIZE)
1448 if remainder:
1449 blocks += 1
1450 return blocks * BLOCKSIZE
Georg Brandl3354f282006-10-29 09:16:12 +00001451
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001452 def isreg(self):
1453 return self.type in REGULAR_TYPES
1454 def isfile(self):
1455 return self.isreg()
1456 def isdir(self):
1457 return self.type == DIRTYPE
1458 def issym(self):
1459 return self.type == SYMTYPE
1460 def islnk(self):
1461 return self.type == LNKTYPE
1462 def ischr(self):
1463 return self.type == CHRTYPE
1464 def isblk(self):
1465 return self.type == BLKTYPE
1466 def isfifo(self):
1467 return self.type == FIFOTYPE
1468 def issparse(self):
1469 return self.type == GNUTYPE_SPARSE
1470 def isdev(self):
1471 return self.type in (CHRTYPE, BLKTYPE, FIFOTYPE)
1472# class TarInfo
1473
1474class TarFile(object):
1475 """The TarFile Class provides an interface to tar archives.
1476 """
1477
1478 debug = 0 # May be set from 0 (no msgs) to 3 (all msgs)
1479
1480 dereference = False # If true, add content of linked file to the
1481 # tar file, else the link.
1482
1483 ignore_zeros = False # If true, skips empty or invalid blocks and
1484 # continues processing.
1485
Lars Gustäbel92ca7562009-12-13 11:32:27 +00001486 errorlevel = 1 # If 0, fatal errors only appear in debug
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001487 # messages (if debug >= 0). If > 0, errors
1488 # are passed to the caller as exceptions.
1489
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001490 format = DEFAULT_FORMAT # The format to use when creating an archive.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001491
Lars Gustäbela0fcb932007-05-27 19:49:30 +00001492 encoding = ENCODING # Encoding for 8-bit character strings.
1493
1494 errors = None # Error handler for unicode conversion.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001495
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001496 tarinfo = TarInfo # The default TarInfo class to use.
1497
1498 fileobject = ExFileObject # The default ExFileObject class to use.
1499
1500 def __init__(self, name=None, mode="r", fileobj=None, format=None,
1501 tarinfo=None, dereference=None, ignore_zeros=None, encoding=None,
Lars Gustäbela0fcb932007-05-27 19:49:30 +00001502 errors=None, pax_headers=None, debug=None, errorlevel=None):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001503 """Open an (uncompressed) tar archive `name'. `mode' is either 'r' to
1504 read from an existing archive, 'a' to append data to an existing
1505 file or 'w' to create a new file overwriting an existing one. `mode'
1506 defaults to 'r'.
1507 If `fileobj' is given, it is used for reading or writing data. If it
1508 can be determined, `mode' is overridden by `fileobj's mode.
1509 `fileobj' is not closed, when TarFile is closed.
1510 """
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001511 if len(mode) > 1 or mode not in "raw":
Georg Brandle4751e32006-05-18 06:11:19 +00001512 raise ValueError("mode must be 'r', 'a' or 'w'")
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001513 self.mode = mode
1514 self._mode = {"r": "rb", "a": "r+b", "w": "wb"}[mode]
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001515
1516 if not fileobj:
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001517 if self.mode == "a" and not os.path.exists(name):
Lars Gustäbel3f8aca12007-02-06 18:38:13 +00001518 # Create nonexistent files in append mode.
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001519 self.mode = "w"
1520 self._mode = "wb"
Brett Cannon6cef0762007-05-25 20:17:15 +00001521 fileobj = bltn_open(name, self._mode)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001522 self._extfileobj = False
1523 else:
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001524 if name is None and hasattr(fileobj, "name"):
1525 name = fileobj.name
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001526 if hasattr(fileobj, "mode"):
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001527 self._mode = fileobj.mode
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001528 self._extfileobj = True
Lars Gustäbel0f4a14b2007-08-28 12:31:09 +00001529 self.name = os.path.abspath(name) if name else None
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001530 self.fileobj = fileobj
1531
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001532 # Init attributes.
1533 if format is not None:
1534 self.format = format
1535 if tarinfo is not None:
1536 self.tarinfo = tarinfo
1537 if dereference is not None:
1538 self.dereference = dereference
1539 if ignore_zeros is not None:
1540 self.ignore_zeros = ignore_zeros
1541 if encoding is not None:
1542 self.encoding = encoding
Lars Gustäbela0fcb932007-05-27 19:49:30 +00001543
1544 if errors is not None:
1545 self.errors = errors
1546 elif mode == "r":
1547 self.errors = "utf-8"
1548 else:
1549 self.errors = "strict"
1550
1551 if pax_headers is not None and self.format == PAX_FORMAT:
1552 self.pax_headers = pax_headers
1553 else:
1554 self.pax_headers = {}
1555
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001556 if debug is not None:
1557 self.debug = debug
1558 if errorlevel is not None:
1559 self.errorlevel = errorlevel
1560
1561 # Init datastructures.
Georg Brandl38c6a222006-05-10 16:26:03 +00001562 self.closed = False
1563 self.members = [] # list of members as TarInfo objects
1564 self._loaded = False # flag if all members have been read
Lars Gustäbel77b2d632007-12-01 21:02:12 +00001565 self.offset = self.fileobj.tell()
1566 # current position in the archive file
Georg Brandl38c6a222006-05-10 16:26:03 +00001567 self.inodes = {} # dictionary caching the inodes of
1568 # archive members already added
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001569
Lars Gustäbel355538e2009-11-18 20:24:54 +00001570 try:
1571 if self.mode == "r":
1572 self.firstmember = None
1573 self.firstmember = self.next()
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001574
Lars Gustäbel355538e2009-11-18 20:24:54 +00001575 if self.mode == "a":
1576 # Move to the end of the archive,
1577 # before the first empty block.
Lars Gustäbel355538e2009-11-18 20:24:54 +00001578 while True:
Lars Gustäbeldd866d52009-11-22 18:30:53 +00001579 self.fileobj.seek(self.offset)
1580 try:
1581 tarinfo = self.tarinfo.fromtarfile(self)
1582 self.members.append(tarinfo)
1583 except EOFHeaderError:
1584 self.fileobj.seek(self.offset)
Lars Gustäbel355538e2009-11-18 20:24:54 +00001585 break
Lars Gustäbeldd866d52009-11-22 18:30:53 +00001586 except HeaderError, e:
1587 raise ReadError(str(e))
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001588
Lars Gustäbel355538e2009-11-18 20:24:54 +00001589 if self.mode in "aw":
1590 self._loaded = True
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001591
Lars Gustäbel355538e2009-11-18 20:24:54 +00001592 if self.pax_headers:
1593 buf = self.tarinfo.create_pax_global_header(self.pax_headers.copy())
1594 self.fileobj.write(buf)
1595 self.offset += len(buf)
1596 except:
1597 if not self._extfileobj:
1598 self.fileobj.close()
1599 self.closed = True
1600 raise
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001601
1602 def _getposix(self):
1603 return self.format == USTAR_FORMAT
1604 def _setposix(self, value):
1605 import warnings
Philip Jenveyd846f1d2009-05-08 02:28:39 +00001606 warnings.warn("use the format attribute instead", DeprecationWarning,
1607 2)
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001608 if value:
1609 self.format = USTAR_FORMAT
1610 else:
1611 self.format = GNU_FORMAT
1612 posix = property(_getposix, _setposix)
1613
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001614 #--------------------------------------------------------------------------
1615 # Below are the classmethods which act as alternate constructors to the
1616 # TarFile class. The open() method is the only one that is needed for
1617 # public use; it is the "super"-constructor and is able to select an
1618 # adequate "sub"-constructor for a particular compression using the mapping
1619 # from OPEN_METH.
1620 #
1621 # This concept allows one to subclass TarFile without losing the comfort of
1622 # the super-constructor. A sub-constructor is registered and made available
1623 # by adding it to the mapping in OPEN_METH.
1624
Guido van Rossum75b64e62005-01-16 00:16:11 +00001625 @classmethod
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001626 def open(cls, name=None, mode="r", fileobj=None, bufsize=RECORDSIZE, **kwargs):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001627 """Open a tar archive for reading, writing or appending. Return
1628 an appropriate TarFile class.
1629
1630 mode:
Martin v. Löwis78be7df2005-03-05 12:47:42 +00001631 'r' or 'r:*' open for reading with transparent compression
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001632 'r:' open for reading exclusively uncompressed
1633 'r:gz' open for reading with gzip compression
1634 'r:bz2' open for reading with bzip2 compression
Lars Gustäbel3f8aca12007-02-06 18:38:13 +00001635 'a' or 'a:' open for appending, creating the file if necessary
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001636 'w' or 'w:' open for writing without compression
1637 'w:gz' open for writing with gzip compression
1638 'w:bz2' open for writing with bzip2 compression
Martin v. Löwis78be7df2005-03-05 12:47:42 +00001639
1640 'r|*' open a stream of tar blocks with transparent compression
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001641 'r|' open an uncompressed stream of tar blocks for reading
1642 'r|gz' open a gzip compressed stream of tar blocks
1643 'r|bz2' open a bzip2 compressed stream of tar blocks
1644 'w|' open an uncompressed stream for writing
1645 'w|gz' open a gzip compressed stream for writing
1646 'w|bz2' open a bzip2 compressed stream for writing
1647 """
1648
1649 if not name and not fileobj:
Georg Brandle4751e32006-05-18 06:11:19 +00001650 raise ValueError("nothing to open")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001651
Martin v. Löwis78be7df2005-03-05 12:47:42 +00001652 if mode in ("r", "r:*"):
1653 # Find out which *open() is appropriate for opening the file.
1654 for comptype in cls.OPEN_METH:
1655 func = getattr(cls, cls.OPEN_METH[comptype])
Lars Gustäbela7ba6fc2006-12-27 10:30:46 +00001656 if fileobj is not None:
1657 saved_pos = fileobj.tell()
Martin v. Löwis78be7df2005-03-05 12:47:42 +00001658 try:
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001659 return func(name, "r", fileobj, **kwargs)
1660 except (ReadError, CompressionError), e:
Lars Gustäbela7ba6fc2006-12-27 10:30:46 +00001661 if fileobj is not None:
1662 fileobj.seek(saved_pos)
Martin v. Löwis78be7df2005-03-05 12:47:42 +00001663 continue
Georg Brandle4751e32006-05-18 06:11:19 +00001664 raise ReadError("file could not be opened successfully")
Martin v. Löwis78be7df2005-03-05 12:47:42 +00001665
1666 elif ":" in mode:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001667 filemode, comptype = mode.split(":", 1)
1668 filemode = filemode or "r"
1669 comptype = comptype or "tar"
1670
1671 # Select the *open() function according to
1672 # given compression.
1673 if comptype in cls.OPEN_METH:
1674 func = getattr(cls, cls.OPEN_METH[comptype])
1675 else:
Georg Brandle4751e32006-05-18 06:11:19 +00001676 raise CompressionError("unknown compression type %r" % comptype)
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001677 return func(name, filemode, fileobj, **kwargs)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001678
1679 elif "|" in mode:
1680 filemode, comptype = mode.split("|", 1)
1681 filemode = filemode or "r"
1682 comptype = comptype or "tar"
1683
1684 if filemode not in "rw":
Georg Brandle4751e32006-05-18 06:11:19 +00001685 raise ValueError("mode must be 'r' or 'w'")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001686
1687 t = cls(name, filemode,
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001688 _Stream(name, filemode, comptype, fileobj, bufsize),
1689 **kwargs)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001690 t._extfileobj = False
1691 return t
1692
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001693 elif mode in "aw":
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001694 return cls.taropen(name, mode, fileobj, **kwargs)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001695
Georg Brandle4751e32006-05-18 06:11:19 +00001696 raise ValueError("undiscernible mode")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001697
Guido van Rossum75b64e62005-01-16 00:16:11 +00001698 @classmethod
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001699 def taropen(cls, name, mode="r", fileobj=None, **kwargs):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001700 """Open uncompressed tar archive name for reading or writing.
1701 """
1702 if len(mode) > 1 or mode not in "raw":
Georg Brandle4751e32006-05-18 06:11:19 +00001703 raise ValueError("mode must be 'r', 'a' or 'w'")
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001704 return cls(name, mode, fileobj, **kwargs)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001705
Guido van Rossum75b64e62005-01-16 00:16:11 +00001706 @classmethod
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001707 def gzopen(cls, name, mode="r", fileobj=None, compresslevel=9, **kwargs):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001708 """Open gzip compressed tar archive name for reading or writing.
1709 Appending is not allowed.
1710 """
1711 if len(mode) > 1 or mode not in "rw":
Georg Brandle4751e32006-05-18 06:11:19 +00001712 raise ValueError("mode must be 'r' or 'w'")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001713
1714 try:
1715 import gzip
Neal Norwitz4ec68242003-04-11 03:05:56 +00001716 gzip.GzipFile
1717 except (ImportError, AttributeError):
Georg Brandle4751e32006-05-18 06:11:19 +00001718 raise CompressionError("gzip module is not available")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001719
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001720 if fileobj is None:
Brett Cannon6cef0762007-05-25 20:17:15 +00001721 fileobj = bltn_open(name, mode + "b")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001722
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001723 try:
Lars Gustäbela4b23812006-12-23 17:57:23 +00001724 t = cls.taropen(name, mode,
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001725 gzip.GzipFile(name, mode, compresslevel, fileobj),
1726 **kwargs)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001727 except IOError:
Georg Brandle4751e32006-05-18 06:11:19 +00001728 raise ReadError("not a gzip file")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001729 t._extfileobj = False
1730 return t
1731
Guido van Rossum75b64e62005-01-16 00:16:11 +00001732 @classmethod
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001733 def bz2open(cls, name, mode="r", fileobj=None, compresslevel=9, **kwargs):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001734 """Open bzip2 compressed tar archive name for reading or writing.
1735 Appending is not allowed.
1736 """
1737 if len(mode) > 1 or mode not in "rw":
Georg Brandle4751e32006-05-18 06:11:19 +00001738 raise ValueError("mode must be 'r' or 'w'.")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001739
1740 try:
1741 import bz2
1742 except ImportError:
Georg Brandle4751e32006-05-18 06:11:19 +00001743 raise CompressionError("bz2 module is not available")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001744
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001745 if fileobj is not None:
Georg Brandl49c8f4c2006-05-15 19:30:35 +00001746 fileobj = _BZ2Proxy(fileobj, mode)
1747 else:
1748 fileobj = bz2.BZ2File(name, mode, compresslevel=compresslevel)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001749
1750 try:
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001751 t = cls.taropen(name, mode, fileobj, **kwargs)
Lars Gustäbeldd866d52009-11-22 18:30:53 +00001752 except (IOError, EOFError):
Georg Brandle4751e32006-05-18 06:11:19 +00001753 raise ReadError("not a bzip2 file")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001754 t._extfileobj = False
1755 return t
1756
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001757 # All *open() methods are registered here.
1758 OPEN_METH = {
1759 "tar": "taropen", # uncompressed tar
1760 "gz": "gzopen", # gzip compressed tar
1761 "bz2": "bz2open" # bzip2 compressed tar
1762 }
1763
1764 #--------------------------------------------------------------------------
1765 # The public methods which TarFile provides:
1766
1767 def close(self):
1768 """Close the TarFile. In write-mode, two finishing zero blocks are
1769 appended to the archive.
1770 """
1771 if self.closed:
1772 return
1773
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001774 if self.mode in "aw":
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001775 self.fileobj.write(NUL * (BLOCKSIZE * 2))
1776 self.offset += (BLOCKSIZE * 2)
1777 # fill up the end with zero-blocks
1778 # (like option -b20 for tar does)
1779 blocks, remainder = divmod(self.offset, RECORDSIZE)
1780 if remainder > 0:
1781 self.fileobj.write(NUL * (RECORDSIZE - remainder))
1782
1783 if not self._extfileobj:
1784 self.fileobj.close()
1785 self.closed = True
1786
1787 def getmember(self, name):
1788 """Return a TarInfo object for member `name'. If `name' can not be
1789 found in the archive, KeyError is raised. If a member occurs more
Mark Dickinson3e4caeb2009-02-21 20:27:01 +00001790 than once in the archive, its last occurrence is assumed to be the
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001791 most up-to-date version.
1792 """
Martin v. Löwisf3c56112004-09-18 09:08:52 +00001793 tarinfo = self._getmember(name)
1794 if tarinfo is None:
Georg Brandle4751e32006-05-18 06:11:19 +00001795 raise KeyError("filename %r not found" % name)
Martin v. Löwisf3c56112004-09-18 09:08:52 +00001796 return tarinfo
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001797
1798 def getmembers(self):
1799 """Return the members of the archive as a list of TarInfo objects. The
1800 list has the same order as the members in the archive.
1801 """
1802 self._check()
1803 if not self._loaded: # if we want to obtain a list of
1804 self._load() # all members, we first have to
1805 # scan the whole archive.
1806 return self.members
1807
1808 def getnames(self):
1809 """Return the members of the archive as a list of their names. It has
1810 the same order as the list returned by getmembers().
1811 """
Martin v. Löwisf3c56112004-09-18 09:08:52 +00001812 return [tarinfo.name for tarinfo in self.getmembers()]
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001813
1814 def gettarinfo(self, name=None, arcname=None, fileobj=None):
1815 """Create a TarInfo object for either the file `name' or the file
1816 object `fileobj' (using os.fstat on its file descriptor). You can
1817 modify some of the TarInfo's attributes before you add it using
1818 addfile(). If given, `arcname' specifies an alternative name for the
1819 file in the archive.
1820 """
1821 self._check("aw")
1822
1823 # When fileobj is given, replace name by
1824 # fileobj's real name.
1825 if fileobj is not None:
1826 name = fileobj.name
1827
1828 # Building the name of the member in the archive.
1829 # Backward slashes are converted to forward slashes,
1830 # Absolute paths are turned to relative paths.
1831 if arcname is None:
1832 arcname = name
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001833 drv, arcname = os.path.splitdrive(arcname)
Lars Gustäbelf7cda522009-08-28 19:23:44 +00001834 arcname = arcname.replace(os.sep, "/")
1835 arcname = arcname.lstrip("/")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001836
1837 # Now, fill the TarInfo object with
1838 # information specific for the file.
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001839 tarinfo = self.tarinfo()
1840 tarinfo.tarfile = self
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001841
1842 # Use os.stat or os.lstat, depending on platform
1843 # and if symlinks shall be resolved.
1844 if fileobj is None:
1845 if hasattr(os, "lstat") and not self.dereference:
1846 statres = os.lstat(name)
1847 else:
1848 statres = os.stat(name)
1849 else:
1850 statres = os.fstat(fileobj.fileno())
1851 linkname = ""
1852
1853 stmd = statres.st_mode
1854 if stat.S_ISREG(stmd):
1855 inode = (statres.st_ino, statres.st_dev)
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001856 if not self.dereference and statres.st_nlink > 1 and \
1857 inode in self.inodes and arcname != self.inodes[inode]:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001858 # Is it a hardlink to an already
1859 # archived file?
1860 type = LNKTYPE
1861 linkname = self.inodes[inode]
1862 else:
1863 # The inode is added only if its valid.
1864 # For win32 it is always 0.
1865 type = REGTYPE
1866 if inode[0]:
1867 self.inodes[inode] = arcname
1868 elif stat.S_ISDIR(stmd):
1869 type = DIRTYPE
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001870 elif stat.S_ISFIFO(stmd):
1871 type = FIFOTYPE
1872 elif stat.S_ISLNK(stmd):
1873 type = SYMTYPE
1874 linkname = os.readlink(name)
1875 elif stat.S_ISCHR(stmd):
1876 type = CHRTYPE
1877 elif stat.S_ISBLK(stmd):
1878 type = BLKTYPE
1879 else:
1880 return None
1881
1882 # Fill the TarInfo object with all
1883 # information we can get.
Martin v. Löwis5dbdc592005-08-27 10:07:56 +00001884 tarinfo.name = arcname
1885 tarinfo.mode = stmd
1886 tarinfo.uid = statres.st_uid
1887 tarinfo.gid = statres.st_gid
Lars Gustäbel2ee9c6f2010-06-03 09:56:22 +00001888 if type == REGTYPE:
Martin v. Löwis61d77e02004-08-20 06:35:46 +00001889 tarinfo.size = statres.st_size
Martin v. Löwis5dbdc592005-08-27 10:07:56 +00001890 else:
1891 tarinfo.size = 0L
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001892 tarinfo.mtime = statres.st_mtime
Martin v. Löwis5dbdc592005-08-27 10:07:56 +00001893 tarinfo.type = type
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001894 tarinfo.linkname = linkname
1895 if pwd:
1896 try:
1897 tarinfo.uname = pwd.getpwuid(tarinfo.uid)[0]
1898 except KeyError:
1899 pass
1900 if grp:
1901 try:
1902 tarinfo.gname = grp.getgrgid(tarinfo.gid)[0]
1903 except KeyError:
1904 pass
1905
1906 if type in (CHRTYPE, BLKTYPE):
1907 if hasattr(os, "major") and hasattr(os, "minor"):
1908 tarinfo.devmajor = os.major(statres.st_rdev)
1909 tarinfo.devminor = os.minor(statres.st_rdev)
1910 return tarinfo
1911
1912 def list(self, verbose=True):
1913 """Print a table of contents to sys.stdout. If `verbose' is False, only
1914 the names of the members are printed. If it is True, an `ls -l'-like
1915 output is produced.
1916 """
1917 self._check()
1918
1919 for tarinfo in self:
1920 if verbose:
1921 print filemode(tarinfo.mode),
1922 print "%s/%s" % (tarinfo.uname or tarinfo.uid,
1923 tarinfo.gname or tarinfo.gid),
1924 if tarinfo.ischr() or tarinfo.isblk():
1925 print "%10s" % ("%d,%d" \
1926 % (tarinfo.devmajor, tarinfo.devminor)),
1927 else:
1928 print "%10d" % tarinfo.size,
1929 print "%d-%02d-%02d %02d:%02d:%02d" \
1930 % time.localtime(tarinfo.mtime)[:6],
1931
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001932 print tarinfo.name + ("/" if tarinfo.isdir() else ""),
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001933
1934 if verbose:
1935 if tarinfo.issym():
1936 print "->", tarinfo.linkname,
1937 if tarinfo.islnk():
1938 print "link to", tarinfo.linkname,
1939 print
1940
Lars Gustäbel21121e62009-09-12 10:28:15 +00001941 def add(self, name, arcname=None, recursive=True, exclude=None, filter=None):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001942 """Add the file `name' to the archive. `name' may be any type of file
1943 (directory, fifo, symbolic link, etc.). If given, `arcname'
1944 specifies an alternative name for the file in the archive.
1945 Directories are added recursively by default. This can be avoided by
Lars Gustäbel104490e2007-06-18 11:42:11 +00001946 setting `recursive' to False. `exclude' is a function that should
Lars Gustäbel21121e62009-09-12 10:28:15 +00001947 return True for each filename to be excluded. `filter' is a function
1948 that expects a TarInfo object argument and returns the changed
1949 TarInfo object, if it returns None the TarInfo object will be
1950 excluded from the archive.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001951 """
1952 self._check("aw")
1953
1954 if arcname is None:
1955 arcname = name
1956
Lars Gustäbel104490e2007-06-18 11:42:11 +00001957 # Exclude pathnames.
Lars Gustäbel21121e62009-09-12 10:28:15 +00001958 if exclude is not None:
1959 import warnings
1960 warnings.warn("use the filter argument instead",
1961 DeprecationWarning, 2)
1962 if exclude(name):
1963 self._dbg(2, "tarfile: Excluded %r" % name)
1964 return
Lars Gustäbel104490e2007-06-18 11:42:11 +00001965
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001966 # Skip if somebody tries to archive the archive...
Lars Gustäbela4b23812006-12-23 17:57:23 +00001967 if self.name is not None and os.path.abspath(name) == self.name:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001968 self._dbg(2, "tarfile: Skipped %r" % name)
1969 return
1970
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001971 self._dbg(1, name)
1972
1973 # Create a TarInfo object from the file.
1974 tarinfo = self.gettarinfo(name, arcname)
1975
1976 if tarinfo is None:
1977 self._dbg(1, "tarfile: Unsupported type %r" % name)
1978 return
1979
Lars Gustäbel21121e62009-09-12 10:28:15 +00001980 # Change or exclude the TarInfo object.
1981 if filter is not None:
1982 tarinfo = filter(tarinfo)
1983 if tarinfo is None:
1984 self._dbg(2, "tarfile: Excluded %r" % name)
1985 return
1986
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001987 # Append the tar header and data to the archive.
1988 if tarinfo.isreg():
Andrew Svetlovac26a2e2012-11-29 14:22:26 +02001989 with bltn_open(name, "rb") as f:
1990 self.addfile(tarinfo, f)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001991
Martin v. Löwis5dbdc592005-08-27 10:07:56 +00001992 elif tarinfo.isdir():
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001993 self.addfile(tarinfo)
1994 if recursive:
1995 for f in os.listdir(name):
Lars Gustäbel21121e62009-09-12 10:28:15 +00001996 self.add(os.path.join(name, f), os.path.join(arcname, f),
1997 recursive, exclude, filter)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001998
Martin v. Löwis5dbdc592005-08-27 10:07:56 +00001999 else:
2000 self.addfile(tarinfo)
2001
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002002 def addfile(self, tarinfo, fileobj=None):
2003 """Add the TarInfo object `tarinfo' to the archive. If `fileobj' is
2004 given, tarinfo.size bytes are read from it and added to the archive.
2005 You can create TarInfo objects using gettarinfo().
2006 On Windows platforms, `fileobj' should always be opened with mode
2007 'rb' to avoid irritation about the file size.
2008 """
2009 self._check("aw")
2010
Georg Brandl3354f282006-10-29 09:16:12 +00002011 tarinfo = copy.copy(tarinfo)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002012
Lars Gustäbela0fcb932007-05-27 19:49:30 +00002013 buf = tarinfo.tobuf(self.format, self.encoding, self.errors)
Georg Brandl3354f282006-10-29 09:16:12 +00002014 self.fileobj.write(buf)
2015 self.offset += len(buf)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002016
2017 # If there's data to follow, append it.
2018 if fileobj is not None:
2019 copyfileobj(fileobj, self.fileobj, tarinfo.size)
2020 blocks, remainder = divmod(tarinfo.size, BLOCKSIZE)
2021 if remainder > 0:
2022 self.fileobj.write(NUL * (BLOCKSIZE - remainder))
2023 blocks += 1
2024 self.offset += blocks * BLOCKSIZE
2025
Martin v. Löwisf3c56112004-09-18 09:08:52 +00002026 self.members.append(tarinfo)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002027
Martin v. Löwis00a73e72005-03-04 19:40:34 +00002028 def extractall(self, path=".", members=None):
2029 """Extract all members from the archive to the current working
2030 directory and set owner, modification time and permissions on
2031 directories afterwards. `path' specifies a different directory
2032 to extract to. `members' is optional and must be a subset of the
2033 list returned by getmembers().
2034 """
2035 directories = []
2036
2037 if members is None:
2038 members = self
2039
2040 for tarinfo in members:
2041 if tarinfo.isdir():
Lars Gustäbel0192e432008-02-05 11:51:40 +00002042 # Extract directories with a safe mode.
Martin v. Löwis00a73e72005-03-04 19:40:34 +00002043 directories.append(tarinfo)
Lars Gustäbel0192e432008-02-05 11:51:40 +00002044 tarinfo = copy.copy(tarinfo)
2045 tarinfo.mode = 0700
2046 self.extract(tarinfo, path)
Martin v. Löwis00a73e72005-03-04 19:40:34 +00002047
2048 # Reverse sort directories.
Brett Cannon132fc542008-08-04 21:23:07 +00002049 directories.sort(key=operator.attrgetter('name'))
Martin v. Löwis00a73e72005-03-04 19:40:34 +00002050 directories.reverse()
2051
2052 # Set correct owner, mtime and filemode on directories.
2053 for tarinfo in directories:
Lars Gustäbel2ee1c762008-01-04 14:00:33 +00002054 dirpath = os.path.join(path, tarinfo.name)
Martin v. Löwis00a73e72005-03-04 19:40:34 +00002055 try:
Lars Gustäbel2ee1c762008-01-04 14:00:33 +00002056 self.chown(tarinfo, dirpath)
2057 self.utime(tarinfo, dirpath)
2058 self.chmod(tarinfo, dirpath)
Martin v. Löwis00a73e72005-03-04 19:40:34 +00002059 except ExtractError, e:
2060 if self.errorlevel > 1:
2061 raise
2062 else:
2063 self._dbg(1, "tarfile: %s" % e)
2064
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002065 def extract(self, member, path=""):
2066 """Extract a member from the archive to the current working directory,
2067 using its full name. Its file information is extracted as accurately
2068 as possible. `member' may be a filename or a TarInfo object. You can
2069 specify a different directory using `path'.
2070 """
2071 self._check("r")
2072
Lars Gustäbelc64e4022007-03-13 10:47:19 +00002073 if isinstance(member, basestring):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002074 tarinfo = self.getmember(member)
Lars Gustäbelc64e4022007-03-13 10:47:19 +00002075 else:
2076 tarinfo = member
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002077
Neal Norwitza4f651a2004-07-20 22:07:44 +00002078 # Prepare the link target for makelink().
2079 if tarinfo.islnk():
2080 tarinfo._link_target = os.path.join(path, tarinfo.linkname)
2081
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002082 try:
2083 self._extract_member(tarinfo, os.path.join(path, tarinfo.name))
2084 except EnvironmentError, e:
2085 if self.errorlevel > 0:
2086 raise
2087 else:
2088 if e.filename is None:
2089 self._dbg(1, "tarfile: %s" % e.strerror)
2090 else:
2091 self._dbg(1, "tarfile: %s %r" % (e.strerror, e.filename))
2092 except ExtractError, e:
2093 if self.errorlevel > 1:
2094 raise
2095 else:
2096 self._dbg(1, "tarfile: %s" % e)
2097
2098 def extractfile(self, member):
2099 """Extract a member from the archive as a file object. `member' may be
2100 a filename or a TarInfo object. If `member' is a regular file, a
2101 file-like object is returned. If `member' is a link, a file-like
2102 object is constructed from the link's target. If `member' is none of
2103 the above, None is returned.
2104 The file-like object is read-only and provides the following
2105 methods: read(), readline(), readlines(), seek() and tell()
2106 """
2107 self._check("r")
2108
Lars Gustäbelc64e4022007-03-13 10:47:19 +00002109 if isinstance(member, basestring):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002110 tarinfo = self.getmember(member)
Lars Gustäbelc64e4022007-03-13 10:47:19 +00002111 else:
2112 tarinfo = member
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002113
2114 if tarinfo.isreg():
2115 return self.fileobject(self, tarinfo)
2116
2117 elif tarinfo.type not in SUPPORTED_TYPES:
2118 # If a member's type is unknown, it is treated as a
2119 # regular file.
2120 return self.fileobject(self, tarinfo)
2121
2122 elif tarinfo.islnk() or tarinfo.issym():
2123 if isinstance(self.fileobj, _Stream):
2124 # A small but ugly workaround for the case that someone tries
2125 # to extract a (sym)link as a file-object from a non-seekable
2126 # stream of tar blocks.
Georg Brandle4751e32006-05-18 06:11:19 +00002127 raise StreamError("cannot extract (sym)link as file object")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002128 else:
Georg Brandl7eb4b7d2005-07-22 21:49:32 +00002129 # A (sym)link's file object is its target's file object.
Lars Gustäbel4da7d412010-06-03 12:34:14 +00002130 return self.extractfile(self._find_link_target(tarinfo))
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002131 else:
2132 # If there's no data associated with the member (directory, chrdev,
2133 # blkdev, etc.), return None instead of a file object.
2134 return None
2135
2136 def _extract_member(self, tarinfo, targetpath):
2137 """Extract the TarInfo object tarinfo to a physical
2138 file called targetpath.
2139 """
2140 # Fetch the TarInfo object for the given name
2141 # and build the destination pathname, replacing
2142 # forward slashes to platform specific separators.
Lars Gustäbelf7cda522009-08-28 19:23:44 +00002143 targetpath = targetpath.rstrip("/")
2144 targetpath = targetpath.replace("/", os.sep)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002145
2146 # Create all upper directories.
2147 upperdirs = os.path.dirname(targetpath)
2148 if upperdirs and not os.path.exists(upperdirs):
Lars Gustäbel0192e432008-02-05 11:51:40 +00002149 # Create directories that are not part of the archive with
2150 # default permissions.
Lars Gustäbeld2e22902007-01-23 11:17:33 +00002151 os.makedirs(upperdirs)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002152
2153 if tarinfo.islnk() or tarinfo.issym():
2154 self._dbg(1, "%s -> %s" % (tarinfo.name, tarinfo.linkname))
2155 else:
2156 self._dbg(1, tarinfo.name)
2157
2158 if tarinfo.isreg():
2159 self.makefile(tarinfo, targetpath)
2160 elif tarinfo.isdir():
2161 self.makedir(tarinfo, targetpath)
2162 elif tarinfo.isfifo():
2163 self.makefifo(tarinfo, targetpath)
2164 elif tarinfo.ischr() or tarinfo.isblk():
2165 self.makedev(tarinfo, targetpath)
2166 elif tarinfo.islnk() or tarinfo.issym():
2167 self.makelink(tarinfo, targetpath)
2168 elif tarinfo.type not in SUPPORTED_TYPES:
2169 self.makeunknown(tarinfo, targetpath)
2170 else:
2171 self.makefile(tarinfo, targetpath)
2172
2173 self.chown(tarinfo, targetpath)
2174 if not tarinfo.issym():
2175 self.chmod(tarinfo, targetpath)
2176 self.utime(tarinfo, targetpath)
2177
2178 #--------------------------------------------------------------------------
2179 # Below are the different file methods. They are called via
2180 # _extract_member() when extract() is called. They can be replaced in a
2181 # subclass to implement other functionality.
2182
2183 def makedir(self, tarinfo, targetpath):
2184 """Make a directory called targetpath.
2185 """
2186 try:
Lars Gustäbel0192e432008-02-05 11:51:40 +00002187 # Use a safe mode for the directory, the real mode is set
2188 # later in _extract_member().
2189 os.mkdir(targetpath, 0700)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002190 except EnvironmentError, e:
2191 if e.errno != errno.EEXIST:
2192 raise
2193
2194 def makefile(self, tarinfo, targetpath):
2195 """Make a file called targetpath.
2196 """
2197 source = self.extractfile(tarinfo)
Andrew Svetlovac26a2e2012-11-29 14:22:26 +02002198 try:
2199 with bltn_open(targetpath, "wb") as target:
2200 copyfileobj(source, target)
2201 finally:
2202 source.close()
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002203
2204 def makeunknown(self, tarinfo, targetpath):
2205 """Make a file from a TarInfo object with an unknown type
2206 at targetpath.
2207 """
2208 self.makefile(tarinfo, targetpath)
2209 self._dbg(1, "tarfile: Unknown file type %r, " \
2210 "extracted as regular file." % tarinfo.type)
2211
2212 def makefifo(self, tarinfo, targetpath):
2213 """Make a fifo called targetpath.
2214 """
2215 if hasattr(os, "mkfifo"):
2216 os.mkfifo(targetpath)
2217 else:
Georg Brandle4751e32006-05-18 06:11:19 +00002218 raise ExtractError("fifo not supported by system")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002219
2220 def makedev(self, tarinfo, targetpath):
2221 """Make a character or block device called targetpath.
2222 """
2223 if not hasattr(os, "mknod") or not hasattr(os, "makedev"):
Georg Brandle4751e32006-05-18 06:11:19 +00002224 raise ExtractError("special devices not supported by system")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002225
2226 mode = tarinfo.mode
2227 if tarinfo.isblk():
2228 mode |= stat.S_IFBLK
2229 else:
2230 mode |= stat.S_IFCHR
2231
2232 os.mknod(targetpath, mode,
2233 os.makedev(tarinfo.devmajor, tarinfo.devminor))
2234
2235 def makelink(self, tarinfo, targetpath):
2236 """Make a (symbolic) link called targetpath. If it cannot be created
2237 (platform limitation), we try to make a copy of the referenced file
2238 instead of a link.
2239 """
Lars Gustäbel4da7d412010-06-03 12:34:14 +00002240 if hasattr(os, "symlink") and hasattr(os, "link"):
2241 # For systems that support symbolic and hard links.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002242 if tarinfo.issym():
Senthil Kumaran4dd89ce2011-05-17 10:12:18 +08002243 if os.path.lexists(targetpath):
Senthil Kumaran011525e2011-04-28 15:30:31 +08002244 os.unlink(targetpath)
Lars Gustäbelf7cda522009-08-28 19:23:44 +00002245 os.symlink(tarinfo.linkname, targetpath)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002246 else:
Neal Norwitza4f651a2004-07-20 22:07:44 +00002247 # See extract().
Lars Gustäbel4da7d412010-06-03 12:34:14 +00002248 if os.path.exists(tarinfo._link_target):
Senthil Kumaran4dd89ce2011-05-17 10:12:18 +08002249 if os.path.lexists(targetpath):
2250 os.unlink(targetpath)
Lars Gustäbel4da7d412010-06-03 12:34:14 +00002251 os.link(tarinfo._link_target, targetpath)
2252 else:
2253 self._extract_member(self._find_link_target(tarinfo), targetpath)
2254 else:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002255 try:
Lars Gustäbel4da7d412010-06-03 12:34:14 +00002256 self._extract_member(self._find_link_target(tarinfo), targetpath)
2257 except KeyError:
2258 raise ExtractError("unable to resolve link inside archive")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002259
2260 def chown(self, tarinfo, targetpath):
2261 """Set owner of targetpath according to tarinfo.
2262 """
2263 if pwd and hasattr(os, "geteuid") and os.geteuid() == 0:
2264 # We have to be root to do so.
2265 try:
2266 g = grp.getgrnam(tarinfo.gname)[2]
2267 except KeyError:
Lars Gustäbel8babfdf2011-09-05 17:04:18 +02002268 g = tarinfo.gid
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002269 try:
2270 u = pwd.getpwnam(tarinfo.uname)[2]
2271 except KeyError:
Lars Gustäbel8babfdf2011-09-05 17:04:18 +02002272 u = tarinfo.uid
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002273 try:
2274 if tarinfo.issym() and hasattr(os, "lchown"):
2275 os.lchown(targetpath, u, g)
2276 else:
Andrew MacIntyre7970d202003-02-19 12:51:34 +00002277 if sys.platform != "os2emx":
2278 os.chown(targetpath, u, g)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002279 except EnvironmentError, e:
Georg Brandle4751e32006-05-18 06:11:19 +00002280 raise ExtractError("could not change owner")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002281
2282 def chmod(self, tarinfo, targetpath):
2283 """Set file permissions of targetpath according to tarinfo.
2284 """
Jack Jansen834eff62003-03-07 12:47:06 +00002285 if hasattr(os, 'chmod'):
2286 try:
2287 os.chmod(targetpath, tarinfo.mode)
2288 except EnvironmentError, e:
Georg Brandle4751e32006-05-18 06:11:19 +00002289 raise ExtractError("could not change mode")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002290
2291 def utime(self, tarinfo, targetpath):
2292 """Set modification time of targetpath according to tarinfo.
2293 """
Jack Jansen834eff62003-03-07 12:47:06 +00002294 if not hasattr(os, 'utime'):
Tim Petersf9347782003-03-07 15:36:41 +00002295 return
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002296 try:
2297 os.utime(targetpath, (tarinfo.mtime, tarinfo.mtime))
2298 except EnvironmentError, e:
Georg Brandle4751e32006-05-18 06:11:19 +00002299 raise ExtractError("could not change modification time")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002300
2301 #--------------------------------------------------------------------------
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002302 def next(self):
2303 """Return the next member of the archive as a TarInfo object, when
2304 TarFile is opened for reading. Return None if there is no more
2305 available.
2306 """
2307 self._check("ra")
2308 if self.firstmember is not None:
2309 m = self.firstmember
2310 self.firstmember = None
2311 return m
2312
2313 # Read the next block.
Andrew M. Kuchling864bba12004-07-10 22:02:11 +00002314 self.fileobj.seek(self.offset)
Lars Gustäbeldd866d52009-11-22 18:30:53 +00002315 tarinfo = None
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002316 while True:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002317 try:
Lars Gustäbelc64e4022007-03-13 10:47:19 +00002318 tarinfo = self.tarinfo.fromtarfile(self)
Lars Gustäbeldd866d52009-11-22 18:30:53 +00002319 except EOFHeaderError, e:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002320 if self.ignore_zeros:
Georg Brandlebbeed72006-12-19 22:06:46 +00002321 self._dbg(2, "0x%X: %s" % (self.offset, e))
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002322 self.offset += BLOCKSIZE
2323 continue
Lars Gustäbeldd866d52009-11-22 18:30:53 +00002324 except InvalidHeaderError, e:
2325 if self.ignore_zeros:
2326 self._dbg(2, "0x%X: %s" % (self.offset, e))
2327 self.offset += BLOCKSIZE
2328 continue
2329 elif self.offset == 0:
2330 raise ReadError(str(e))
2331 except EmptyHeaderError:
2332 if self.offset == 0:
2333 raise ReadError("empty file")
2334 except TruncatedHeaderError, e:
2335 if self.offset == 0:
2336 raise ReadError(str(e))
2337 except SubsequentHeaderError, e:
2338 raise ReadError(str(e))
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002339 break
2340
Lars Gustäbeldd866d52009-11-22 18:30:53 +00002341 if tarinfo is not None:
2342 self.members.append(tarinfo)
2343 else:
2344 self._loaded = True
2345
Georg Brandl38c6a222006-05-10 16:26:03 +00002346 return tarinfo
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002347
2348 #--------------------------------------------------------------------------
2349 # Little helper methods:
2350
Lars Gustäbel4da7d412010-06-03 12:34:14 +00002351 def _getmember(self, name, tarinfo=None, normalize=False):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002352 """Find an archive member by name from bottom to top.
2353 If tarinfo is given, it is used as the starting point.
2354 """
Martin v. Löwisf3c56112004-09-18 09:08:52 +00002355 # Ensure that all members have been loaded.
2356 members = self.getmembers()
2357
Lars Gustäbel4da7d412010-06-03 12:34:14 +00002358 # Limit the member search list up to tarinfo.
2359 if tarinfo is not None:
2360 members = members[:members.index(tarinfo)]
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002361
Lars Gustäbel4da7d412010-06-03 12:34:14 +00002362 if normalize:
2363 name = os.path.normpath(name)
2364
2365 for member in reversed(members):
2366 if normalize:
2367 member_name = os.path.normpath(member.name)
2368 else:
2369 member_name = member.name
2370
2371 if name == member_name:
2372 return member
Andrew M. Kuchling864bba12004-07-10 22:02:11 +00002373
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002374 def _load(self):
2375 """Read through the entire archive file and look for readable
2376 members.
2377 """
2378 while True:
2379 tarinfo = self.next()
2380 if tarinfo is None:
2381 break
2382 self._loaded = True
2383
2384 def _check(self, mode=None):
2385 """Check if TarFile is still open, and if the operation's mode
2386 corresponds to TarFile's mode.
2387 """
2388 if self.closed:
Georg Brandle4751e32006-05-18 06:11:19 +00002389 raise IOError("%s is closed" % self.__class__.__name__)
Lars Gustäbelc64e4022007-03-13 10:47:19 +00002390 if mode is not None and self.mode not in mode:
2391 raise IOError("bad operation for mode %r" % self.mode)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002392
Lars Gustäbel4da7d412010-06-03 12:34:14 +00002393 def _find_link_target(self, tarinfo):
2394 """Find the target member of a symlink or hardlink member in the
2395 archive.
2396 """
2397 if tarinfo.issym():
2398 # Always search the entire archive.
Lars Gustäbel231d4742012-04-24 22:42:08 +02002399 linkname = "/".join(filter(None, (os.path.dirname(tarinfo.name), tarinfo.linkname)))
Lars Gustäbel4da7d412010-06-03 12:34:14 +00002400 limit = None
2401 else:
2402 # Search the archive before the link, because a hard link is
2403 # just a reference to an already archived file.
2404 linkname = tarinfo.linkname
2405 limit = tarinfo
2406
2407 member = self._getmember(linkname, tarinfo=limit, normalize=True)
2408 if member is None:
2409 raise KeyError("linkname %r not found" % linkname)
2410 return member
2411
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002412 def __iter__(self):
2413 """Provide an iterator object.
2414 """
2415 if self._loaded:
2416 return iter(self.members)
2417 else:
2418 return TarIter(self)
2419
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002420 def _dbg(self, level, msg):
2421 """Write debugging output to sys.stderr.
2422 """
2423 if level <= self.debug:
2424 print >> sys.stderr, msg
Lars Gustäbel64581042010-03-03 11:55:48 +00002425
2426 def __enter__(self):
2427 self._check()
2428 return self
2429
2430 def __exit__(self, type, value, traceback):
2431 if type is None:
2432 self.close()
2433 else:
2434 # An exception occurred. We must not call close() because
2435 # it would try to write end-of-archive blocks and padding.
2436 if not self._extfileobj:
2437 self.fileobj.close()
2438 self.closed = True
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002439# class TarFile
2440
2441class TarIter:
2442 """Iterator Class.
2443
2444 for tarinfo in TarFile(...):
2445 suite...
2446 """
2447
2448 def __init__(self, tarfile):
2449 """Construct a TarIter object.
2450 """
2451 self.tarfile = tarfile
Martin v. Löwis637431b2005-03-03 23:12:42 +00002452 self.index = 0
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002453 def __iter__(self):
2454 """Return iterator object.
2455 """
2456 return self
2457 def next(self):
2458 """Return the next item using TarFile's next() method.
2459 When all members have been read, set TarFile as _loaded.
2460 """
Martin v. Löwis637431b2005-03-03 23:12:42 +00002461 # Fix for SF #1100429: Under rare circumstances it can
2462 # happen that getmembers() is called during iteration,
2463 # which will cause TarIter to stop prematurely.
Serhiy Storchakace34ba62013-05-09 14:22:05 +03002464
2465 if self.index == 0 and self.tarfile.firstmember is not None:
2466 tarinfo = self.tarfile.next()
2467 elif self.index < len(self.tarfile.members):
2468 tarinfo = self.tarfile.members[self.index]
2469 elif not self.tarfile._loaded:
Martin v. Löwis637431b2005-03-03 23:12:42 +00002470 tarinfo = self.tarfile.next()
2471 if not tarinfo:
2472 self.tarfile._loaded = True
2473 raise StopIteration
2474 else:
Serhiy Storchakace34ba62013-05-09 14:22:05 +03002475 raise StopIteration
Martin v. Löwis637431b2005-03-03 23:12:42 +00002476 self.index += 1
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002477 return tarinfo
2478
2479# Helper classes for sparse file support
2480class _section:
2481 """Base class for _data and _hole.
2482 """
2483 def __init__(self, offset, size):
2484 self.offset = offset
2485 self.size = size
2486 def __contains__(self, offset):
2487 return self.offset <= offset < self.offset + self.size
2488
2489class _data(_section):
2490 """Represent a data section in a sparse file.
2491 """
2492 def __init__(self, offset, size, realpos):
2493 _section.__init__(self, offset, size)
2494 self.realpos = realpos
2495
2496class _hole(_section):
2497 """Represent a hole section in a sparse file.
2498 """
2499 pass
2500
2501class _ringbuffer(list):
2502 """Ringbuffer class which increases performance
2503 over a regular list.
2504 """
2505 def __init__(self):
2506 self.idx = 0
2507 def find(self, offset):
2508 idx = self.idx
2509 while True:
2510 item = self[idx]
2511 if offset in item:
2512 break
2513 idx += 1
2514 if idx == len(self):
2515 idx = 0
2516 if idx == self.idx:
2517 # End of File
2518 return None
2519 self.idx = idx
2520 return item
2521
2522#---------------------------------------------
2523# zipfile compatible TarFile class
2524#---------------------------------------------
2525TAR_PLAIN = 0 # zipfile.ZIP_STORED
2526TAR_GZIPPED = 8 # zipfile.ZIP_DEFLATED
2527class TarFileCompat:
2528 """TarFile class compatible with standard module zipfile's
2529 ZipFile class.
2530 """
2531 def __init__(self, file, mode="r", compression=TAR_PLAIN):
Lars Gustäbel727bd0b2008-08-02 11:26:39 +00002532 from warnings import warnpy3k
2533 warnpy3k("the TarFileCompat class has been removed in Python 3.0",
2534 stacklevel=2)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002535 if compression == TAR_PLAIN:
2536 self.tarfile = TarFile.taropen(file, mode)
2537 elif compression == TAR_GZIPPED:
2538 self.tarfile = TarFile.gzopen(file, mode)
2539 else:
Georg Brandle4751e32006-05-18 06:11:19 +00002540 raise ValueError("unknown compression constant")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002541 if mode[0:1] == "r":
2542 members = self.tarfile.getmembers()
Raymond Hettingera1d09e22005-09-11 16:34:05 +00002543 for m in members:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002544 m.filename = m.name
2545 m.file_size = m.size
2546 m.date_time = time.gmtime(m.mtime)[:6]
2547 def namelist(self):
2548 return map(lambda m: m.name, self.infolist())
2549 def infolist(self):
2550 return filter(lambda m: m.type in REGULAR_TYPES,
2551 self.tarfile.getmembers())
2552 def printdir(self):
2553 self.tarfile.list()
2554 def testzip(self):
2555 return
2556 def getinfo(self, name):
2557 return self.tarfile.getmember(name)
2558 def read(self, name):
2559 return self.tarfile.extractfile(self.tarfile.getmember(name)).read()
2560 def write(self, filename, arcname=None, compress_type=None):
2561 self.tarfile.add(filename, arcname)
2562 def writestr(self, zinfo, bytes):
Raymond Hettingera6172712004-12-31 19:15:26 +00002563 try:
2564 from cStringIO import StringIO
2565 except ImportError:
2566 from StringIO import StringIO
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002567 import calendar
Lars Gustäbel727bd0b2008-08-02 11:26:39 +00002568 tinfo = TarInfo(zinfo.filename)
2569 tinfo.size = len(bytes)
2570 tinfo.mtime = calendar.timegm(zinfo.date_time)
2571 self.tarfile.addfile(tinfo, StringIO(bytes))
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002572 def close(self):
2573 self.tarfile.close()
2574#class TarFileCompat
2575
2576#--------------------
2577# exported functions
2578#--------------------
2579def is_tarfile(name):
2580 """Return True if name points to a tar archive that we
2581 are able to handle, else return False.
2582 """
2583 try:
2584 t = open(name)
2585 t.close()
2586 return True
2587 except TarError:
2588 return False
2589
Brett Cannon6cef0762007-05-25 20:17:15 +00002590bltn_open = open
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002591open = TarFile.open