blob: b0d1292783381e49c7667da3c0233240f9e9cc8b [file] [log] [blame]
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001# -*- coding: iso-8859-1 -*-
2#-------------------------------------------------------------------
3# tarfile.py
4#-------------------------------------------------------------------
5# Copyright (C) 2002 Lars Gustäbel <lars@gustaebel.de>
6# All rights reserved.
7#
8# Permission is hereby granted, free of charge, to any person
9# obtaining a copy of this software and associated documentation
10# files (the "Software"), to deal in the Software without
11# restriction, including without limitation the rights to use,
12# copy, modify, merge, publish, distribute, sublicense, and/or sell
13# copies of the Software, and to permit persons to whom the
14# Software is furnished to do so, subject to the following
15# conditions:
16#
17# The above copyright notice and this permission notice shall be
18# included in all copies or substantial portions of the Software.
19#
20# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
21# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
22# OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
23# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
24# HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
25# WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
26# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
27# OTHER DEALINGS IN THE SOFTWARE.
28#
29"""Read from and write to tar format archives.
30"""
31
Senthil Kumaran4af1c6a2011-07-28 22:30:27 +080032__version__ = "$Revision: 85213 $"
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +000033# $Source$
34
Lars Gustäbelc64e4022007-03-13 10:47:19 +000035version = "0.9.0"
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +000036__author__ = "Lars Gustäbel (lars@gustaebel.de)"
37__date__ = "$Date$"
38__cvsid__ = "$Id$"
39__credits__ = "Gustavo Niemeyer, Niels Gustäbel, Richard Townsend."
40
41#---------
42# Imports
43#---------
44import sys
45import os
46import shutil
47import stat
48import errno
49import time
50import struct
Georg Brandl3354f282006-10-29 09:16:12 +000051import copy
Lars Gustäbelc64e4022007-03-13 10:47:19 +000052import re
Brett Cannon132fc542008-08-04 21:23:07 +000053import operator
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +000054
55try:
56 import grp, pwd
57except ImportError:
58 grp = pwd = None
59
60# from tarfile import *
61__all__ = ["TarFile", "TarInfo", "is_tarfile", "TarError"]
62
63#---------------------------------------------------------
64# tar constants
65#---------------------------------------------------------
Lars Gustäbelc64e4022007-03-13 10:47:19 +000066NUL = "\0" # the null character
67BLOCKSIZE = 512 # length of processing blocks
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +000068RECORDSIZE = BLOCKSIZE * 20 # length of records
Lars Gustäbelc64e4022007-03-13 10:47:19 +000069GNU_MAGIC = "ustar \0" # magic gnu tar string
70POSIX_MAGIC = "ustar\x0000" # magic posix tar string
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +000071
Lars Gustäbelc64e4022007-03-13 10:47:19 +000072LENGTH_NAME = 100 # maximum length of a filename
73LENGTH_LINK = 100 # maximum length of a linkname
74LENGTH_PREFIX = 155 # maximum length of the prefix field
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +000075
Lars Gustäbelc64e4022007-03-13 10:47:19 +000076REGTYPE = "0" # regular file
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +000077AREGTYPE = "\0" # regular file
Lars Gustäbelc64e4022007-03-13 10:47:19 +000078LNKTYPE = "1" # link (inside tarfile)
79SYMTYPE = "2" # symbolic link
80CHRTYPE = "3" # character special device
81BLKTYPE = "4" # block special device
82DIRTYPE = "5" # directory
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +000083FIFOTYPE = "6" # fifo special device
84CONTTYPE = "7" # contiguous file
85
Lars Gustäbelc64e4022007-03-13 10:47:19 +000086GNUTYPE_LONGNAME = "L" # GNU tar longname
87GNUTYPE_LONGLINK = "K" # GNU tar longlink
88GNUTYPE_SPARSE = "S" # GNU tar sparse file
89
90XHDTYPE = "x" # POSIX.1-2001 extended header
91XGLTYPE = "g" # POSIX.1-2001 global header
92SOLARIS_XHDTYPE = "X" # Solaris extended header
93
94USTAR_FORMAT = 0 # POSIX.1-1988 (ustar) format
95GNU_FORMAT = 1 # GNU tar format
96PAX_FORMAT = 2 # POSIX.1-2001 (pax) format
97DEFAULT_FORMAT = GNU_FORMAT
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +000098
99#---------------------------------------------------------
100# tarfile constants
101#---------------------------------------------------------
Lars Gustäbelc64e4022007-03-13 10:47:19 +0000102# File types that tarfile supports:
103SUPPORTED_TYPES = (REGTYPE, AREGTYPE, LNKTYPE,
104 SYMTYPE, DIRTYPE, FIFOTYPE,
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000105 CONTTYPE, CHRTYPE, BLKTYPE,
106 GNUTYPE_LONGNAME, GNUTYPE_LONGLINK,
107 GNUTYPE_SPARSE)
108
Lars Gustäbelc64e4022007-03-13 10:47:19 +0000109# File types that will be treated as a regular file.
110REGULAR_TYPES = (REGTYPE, AREGTYPE,
111 CONTTYPE, GNUTYPE_SPARSE)
112
113# File types that are part of the GNU tar format.
114GNU_TYPES = (GNUTYPE_LONGNAME, GNUTYPE_LONGLINK,
115 GNUTYPE_SPARSE)
116
117# Fields from a pax header that override a TarInfo attribute.
118PAX_FIELDS = ("path", "linkpath", "size", "mtime",
119 "uid", "gid", "uname", "gname")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000120
Lars Gustäbela0fcb932007-05-27 19:49:30 +0000121# Fields in a pax header that are numbers, all other fields
122# are treated as strings.
123PAX_NUMBER_FIELDS = {
124 "atime": float,
125 "ctime": float,
126 "mtime": float,
127 "uid": int,
128 "gid": int,
129 "size": int
130}
131
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000132#---------------------------------------------------------
133# Bits used in the mode field, values in octal.
134#---------------------------------------------------------
135S_IFLNK = 0120000 # symbolic link
136S_IFREG = 0100000 # regular file
137S_IFBLK = 0060000 # block device
138S_IFDIR = 0040000 # directory
139S_IFCHR = 0020000 # character device
140S_IFIFO = 0010000 # fifo
141
142TSUID = 04000 # set UID on execution
143TSGID = 02000 # set GID on execution
144TSVTX = 01000 # reserved
145
146TUREAD = 0400 # read by owner
147TUWRITE = 0200 # write by owner
148TUEXEC = 0100 # execute/search by owner
149TGREAD = 0040 # read by group
150TGWRITE = 0020 # write by group
151TGEXEC = 0010 # execute/search by group
152TOREAD = 0004 # read by other
153TOWRITE = 0002 # write by other
154TOEXEC = 0001 # execute/search by other
155
156#---------------------------------------------------------
Lars Gustäbelc64e4022007-03-13 10:47:19 +0000157# initialization
158#---------------------------------------------------------
159ENCODING = sys.getfilesystemencoding()
160if ENCODING is None:
Lars Gustäbela0fcb932007-05-27 19:49:30 +0000161 ENCODING = sys.getdefaultencoding()
Lars Gustäbelc64e4022007-03-13 10:47:19 +0000162
163#---------------------------------------------------------
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000164# Some useful functions
165#---------------------------------------------------------
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000166
Georg Brandl38c6a222006-05-10 16:26:03 +0000167def stn(s, length):
168 """Convert a python string to a null-terminated string buffer.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000169 """
Georg Brandla32e0a02006-10-24 16:54:16 +0000170 return s[:length] + (length - len(s)) * NUL
Georg Brandl38c6a222006-05-10 16:26:03 +0000171
Lars Gustäbelc64e4022007-03-13 10:47:19 +0000172def nts(s):
173 """Convert a null-terminated string field to a python string.
174 """
175 # Use the string up to the first null char.
176 p = s.find("\0")
177 if p == -1:
178 return s
179 return s[:p]
180
Georg Brandl38c6a222006-05-10 16:26:03 +0000181def nti(s):
182 """Convert a number field to a python number.
183 """
184 # There are two possible encodings for a number field, see
185 # itn() below.
186 if s[0] != chr(0200):
Georg Brandlded1c4d2006-12-20 11:55:16 +0000187 try:
Lars Gustäbelc64e4022007-03-13 10:47:19 +0000188 n = int(nts(s) or "0", 8)
Georg Brandlded1c4d2006-12-20 11:55:16 +0000189 except ValueError:
Lars Gustäbeldd866d52009-11-22 18:30:53 +0000190 raise InvalidHeaderError("invalid header")
Georg Brandl38c6a222006-05-10 16:26:03 +0000191 else:
192 n = 0L
193 for i in xrange(len(s) - 1):
194 n <<= 8
195 n += ord(s[i + 1])
196 return n
197
Lars Gustäbelc64e4022007-03-13 10:47:19 +0000198def itn(n, digits=8, format=DEFAULT_FORMAT):
Georg Brandl38c6a222006-05-10 16:26:03 +0000199 """Convert a python number to a number field.
200 """
201 # POSIX 1003.1-1988 requires numbers to be encoded as a string of
202 # octal digits followed by a null-byte, this allows values up to
203 # (8**(digits-1))-1. GNU tar allows storing numbers greater than
204 # that if necessary. A leading 0200 byte indicates this particular
205 # encoding, the following digits-1 bytes are a big-endian
206 # representation. This allows values up to (256**(digits-1))-1.
207 if 0 <= n < 8 ** (digits - 1):
208 s = "%0*o" % (digits - 1, n) + NUL
209 else:
Lars Gustäbelc64e4022007-03-13 10:47:19 +0000210 if format != GNU_FORMAT or n >= 256 ** (digits - 1):
Georg Brandle4751e32006-05-18 06:11:19 +0000211 raise ValueError("overflow in number field")
Georg Brandl38c6a222006-05-10 16:26:03 +0000212
213 if n < 0:
214 # XXX We mimic GNU tar's behaviour with negative numbers,
215 # this could raise OverflowError.
216 n = struct.unpack("L", struct.pack("l", n))[0]
217
218 s = ""
219 for i in xrange(digits - 1):
220 s = chr(n & 0377) + s
221 n >>= 8
222 s = chr(0200) + s
223 return s
224
Lars Gustäbela0fcb932007-05-27 19:49:30 +0000225def uts(s, encoding, errors):
226 """Convert a unicode object to a string.
227 """
228 if errors == "utf-8":
229 # An extra error handler similar to the -o invalid=UTF-8 option
230 # in POSIX.1-2001. Replace untranslatable characters with their
231 # UTF-8 representation.
232 try:
233 return s.encode(encoding, "strict")
234 except UnicodeEncodeError:
235 x = []
236 for c in s:
237 try:
238 x.append(c.encode(encoding, "strict"))
239 except UnicodeEncodeError:
240 x.append(c.encode("utf8"))
241 return "".join(x)
242 else:
243 return s.encode(encoding, errors)
244
Georg Brandl38c6a222006-05-10 16:26:03 +0000245def calc_chksums(buf):
246 """Calculate the checksum for a member's header by summing up all
247 characters except for the chksum field which is treated as if
248 it was filled with spaces. According to the GNU tar sources,
249 some tars (Sun and NeXT) calculate chksum with signed char,
250 which will be different if there are chars in the buffer with
251 the high bit set. So we calculate two checksums, unsigned and
252 signed.
253 """
254 unsigned_chksum = 256 + sum(struct.unpack("148B", buf[:148]) + struct.unpack("356B", buf[156:512]))
255 signed_chksum = 256 + sum(struct.unpack("148b", buf[:148]) + struct.unpack("356b", buf[156:512]))
256 return unsigned_chksum, signed_chksum
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000257
258def copyfileobj(src, dst, length=None):
259 """Copy length bytes from fileobj src to fileobj dst.
260 If length is None, copy the entire content.
261 """
262 if length == 0:
263 return
264 if length is None:
265 shutil.copyfileobj(src, dst)
266 return
267
268 BUFSIZE = 16 * 1024
269 blocks, remainder = divmod(length, BUFSIZE)
270 for b in xrange(blocks):
271 buf = src.read(BUFSIZE)
272 if len(buf) < BUFSIZE:
Georg Brandle4751e32006-05-18 06:11:19 +0000273 raise IOError("end of file reached")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000274 dst.write(buf)
275
276 if remainder != 0:
277 buf = src.read(remainder)
278 if len(buf) < remainder:
Georg Brandle4751e32006-05-18 06:11:19 +0000279 raise IOError("end of file reached")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000280 dst.write(buf)
281 return
282
283filemode_table = (
Andrew M. Kuchling8bc462f2004-10-20 11:48:42 +0000284 ((S_IFLNK, "l"),
285 (S_IFREG, "-"),
286 (S_IFBLK, "b"),
287 (S_IFDIR, "d"),
288 (S_IFCHR, "c"),
289 (S_IFIFO, "p")),
290
291 ((TUREAD, "r"),),
292 ((TUWRITE, "w"),),
293 ((TUEXEC|TSUID, "s"),
294 (TSUID, "S"),
295 (TUEXEC, "x")),
296
297 ((TGREAD, "r"),),
298 ((TGWRITE, "w"),),
299 ((TGEXEC|TSGID, "s"),
300 (TSGID, "S"),
301 (TGEXEC, "x")),
302
303 ((TOREAD, "r"),),
304 ((TOWRITE, "w"),),
305 ((TOEXEC|TSVTX, "t"),
306 (TSVTX, "T"),
307 (TOEXEC, "x"))
308)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000309
310def filemode(mode):
311 """Convert a file's mode to a string of the form
312 -rwxrwxrwx.
313 Used by TarFile.list()
314 """
Andrew M. Kuchling8bc462f2004-10-20 11:48:42 +0000315 perm = []
316 for table in filemode_table:
317 for bit, char in table:
318 if mode & bit == bit:
319 perm.append(char)
320 break
321 else:
322 perm.append("-")
323 return "".join(perm)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000324
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000325class TarError(Exception):
326 """Base exception."""
327 pass
328class ExtractError(TarError):
329 """General exception for extract errors."""
330 pass
331class ReadError(TarError):
Ezio Melottif5469cf2013-08-17 15:43:51 +0300332 """Exception for unreadable tar archives."""
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000333 pass
334class CompressionError(TarError):
335 """Exception for unavailable compression methods."""
336 pass
337class StreamError(TarError):
338 """Exception for unsupported operations on stream-like TarFiles."""
339 pass
Georg Brandlebbeed72006-12-19 22:06:46 +0000340class HeaderError(TarError):
Lars Gustäbeldd866d52009-11-22 18:30:53 +0000341 """Base exception for header errors."""
342 pass
343class EmptyHeaderError(HeaderError):
344 """Exception for empty headers."""
345 pass
346class TruncatedHeaderError(HeaderError):
347 """Exception for truncated headers."""
348 pass
349class EOFHeaderError(HeaderError):
350 """Exception for end of file headers."""
351 pass
352class InvalidHeaderError(HeaderError):
Georg Brandlebbeed72006-12-19 22:06:46 +0000353 """Exception for invalid headers."""
354 pass
Lars Gustäbeldd866d52009-11-22 18:30:53 +0000355class SubsequentHeaderError(HeaderError):
356 """Exception for missing and invalid extended headers."""
357 pass
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000358
359#---------------------------
360# internal stream interface
361#---------------------------
362class _LowLevelFile:
363 """Low-level file object. Supports reading and writing.
364 It is used instead of a regular file object for streaming
365 access.
366 """
367
368 def __init__(self, name, mode):
369 mode = {
370 "r": os.O_RDONLY,
371 "w": os.O_WRONLY | os.O_CREAT | os.O_TRUNC,
372 }[mode]
373 if hasattr(os, "O_BINARY"):
374 mode |= os.O_BINARY
Lars Gustäbel5c4c4612010-04-29 15:23:38 +0000375 self.fd = os.open(name, mode, 0666)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000376
377 def close(self):
378 os.close(self.fd)
379
380 def read(self, size):
381 return os.read(self.fd, size)
382
383 def write(self, s):
384 os.write(self.fd, s)
385
386class _Stream:
387 """Class that serves as an adapter between TarFile and
388 a stream-like object. The stream-like object only
389 needs to have a read() or write() method and is accessed
390 blockwise. Use of gzip or bzip2 compression is possible.
391 A stream-like object could be for example: sys.stdin,
392 sys.stdout, a socket, a tape device etc.
393
394 _Stream is intended to be used only internally.
395 """
396
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000397 def __init__(self, name, mode, comptype, fileobj, bufsize):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000398 """Construct a _Stream object.
399 """
400 self._extfileobj = True
401 if fileobj is None:
402 fileobj = _LowLevelFile(name, mode)
403 self._extfileobj = False
404
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000405 if comptype == '*':
406 # Enable transparent compression detection for the
407 # stream interface
408 fileobj = _StreamProxy(fileobj)
409 comptype = fileobj.getcomptype()
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000410
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000411 self.name = name or ""
412 self.mode = mode
413 self.comptype = comptype
414 self.fileobj = fileobj
415 self.bufsize = bufsize
416 self.buf = ""
417 self.pos = 0L
418 self.closed = False
419
Benjamin Peterson7fd59e02014-08-27 20:31:21 -0400420 try:
421 if comptype == "gz":
422 try:
423 import zlib
424 except ImportError:
425 raise CompressionError("zlib module is not available")
426 self.zlib = zlib
427 self.crc = zlib.crc32("") & 0xffffffffL
428 if mode == "r":
429 self._init_read_gz()
430 else:
431 self._init_write_gz()
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000432
Benjamin Peterson7fd59e02014-08-27 20:31:21 -0400433 elif comptype == "bz2":
434 try:
435 import bz2
436 except ImportError:
437 raise CompressionError("bz2 module is not available")
438 if mode == "r":
439 self.dbuf = ""
440 self.cmp = bz2.BZ2Decompressor()
441 else:
442 self.cmp = bz2.BZ2Compressor()
443 except:
444 if not self._extfileobj:
445 self.fileobj.close()
446 self.closed = True
447 raise
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000448
449 def __del__(self):
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000450 if hasattr(self, "closed") and not self.closed:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000451 self.close()
452
453 def _init_write_gz(self):
454 """Initialize for writing with gzip compression.
455 """
456 self.cmp = self.zlib.compressobj(9, self.zlib.DEFLATED,
457 -self.zlib.MAX_WBITS,
458 self.zlib.DEF_MEM_LEVEL,
459 0)
460 timestamp = struct.pack("<L", long(time.time()))
461 self.__write("\037\213\010\010%s\002\377" % timestamp)
Lars Gustäbel7d4d0742011-12-21 19:27:50 +0100462 if type(self.name) is unicode:
463 self.name = self.name.encode("iso-8859-1", "replace")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000464 if self.name.endswith(".gz"):
465 self.name = self.name[:-3]
466 self.__write(self.name + NUL)
467
468 def write(self, s):
469 """Write string s to the stream.
470 """
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000471 if self.comptype == "gz":
Gregory P. Smith88440962008-03-25 06:12:45 +0000472 self.crc = self.zlib.crc32(s, self.crc) & 0xffffffffL
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000473 self.pos += len(s)
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000474 if self.comptype != "tar":
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000475 s = self.cmp.compress(s)
476 self.__write(s)
477
478 def __write(self, s):
479 """Write string s to the stream if a whole new block
480 is ready to be written.
481 """
482 self.buf += s
483 while len(self.buf) > self.bufsize:
484 self.fileobj.write(self.buf[:self.bufsize])
485 self.buf = self.buf[self.bufsize:]
486
487 def close(self):
488 """Close the _Stream object. No operation should be
489 done on it afterwards.
490 """
491 if self.closed:
492 return
493
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000494 if self.mode == "w" and self.comptype != "tar":
Martin v. Löwisc234a522004-08-22 21:28:33 +0000495 self.buf += self.cmp.flush()
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000496
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000497 if self.mode == "w" and self.buf:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000498 self.fileobj.write(self.buf)
499 self.buf = ""
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000500 if self.comptype == "gz":
Tim Petersa05f6e22006-08-02 05:20:08 +0000501 # The native zlib crc is an unsigned 32-bit integer, but
502 # the Python wrapper implicitly casts that to a signed C
503 # long. So, on a 32-bit box self.crc may "look negative",
504 # while the same crc on a 64-bit box may "look positive".
505 # To avoid irksome warnings from the `struct` module, force
506 # it to look positive on all boxes.
507 self.fileobj.write(struct.pack("<L", self.crc & 0xffffffffL))
Andrew M. Kuchling10a44492003-10-24 17:38:34 +0000508 self.fileobj.write(struct.pack("<L", self.pos & 0xffffFFFFL))
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000509
510 if not self._extfileobj:
511 self.fileobj.close()
512
513 self.closed = True
514
515 def _init_read_gz(self):
516 """Initialize for reading a gzip compressed fileobj.
517 """
518 self.cmp = self.zlib.decompressobj(-self.zlib.MAX_WBITS)
519 self.dbuf = ""
520
521 # taken from gzip.GzipFile with some alterations
522 if self.__read(2) != "\037\213":
Georg Brandle4751e32006-05-18 06:11:19 +0000523 raise ReadError("not a gzip file")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000524 if self.__read(1) != "\010":
Georg Brandle4751e32006-05-18 06:11:19 +0000525 raise CompressionError("unsupported compression method")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000526
527 flag = ord(self.__read(1))
528 self.__read(6)
529
530 if flag & 4:
531 xlen = ord(self.__read(1)) + 256 * ord(self.__read(1))
532 self.read(xlen)
533 if flag & 8:
534 while True:
535 s = self.__read(1)
536 if not s or s == NUL:
537 break
538 if flag & 16:
539 while True:
540 s = self.__read(1)
541 if not s or s == NUL:
542 break
543 if flag & 2:
544 self.__read(2)
545
546 def tell(self):
547 """Return the stream's file pointer position.
548 """
549 return self.pos
550
551 def seek(self, pos=0):
552 """Set the stream's file pointer to pos. Negative seeking
553 is forbidden.
554 """
555 if pos - self.pos >= 0:
556 blocks, remainder = divmod(pos - self.pos, self.bufsize)
557 for i in xrange(blocks):
558 self.read(self.bufsize)
559 self.read(remainder)
560 else:
Georg Brandle4751e32006-05-18 06:11:19 +0000561 raise StreamError("seeking backwards is not allowed")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000562 return self.pos
563
564 def read(self, size=None):
565 """Return the next size number of bytes from the stream.
566 If size is not defined, return all bytes of the stream
567 up to EOF.
568 """
569 if size is None:
570 t = []
571 while True:
572 buf = self._read(self.bufsize)
573 if not buf:
574 break
575 t.append(buf)
576 buf = "".join(t)
577 else:
578 buf = self._read(size)
579 self.pos += len(buf)
580 return buf
581
582 def _read(self, size):
583 """Return size bytes from the stream.
584 """
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000585 if self.comptype == "tar":
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000586 return self.__read(size)
587
588 c = len(self.dbuf)
589 t = [self.dbuf]
590 while c < size:
591 buf = self.__read(self.bufsize)
592 if not buf:
593 break
Lars Gustäbelc64e4022007-03-13 10:47:19 +0000594 try:
595 buf = self.cmp.decompress(buf)
596 except IOError:
597 raise ReadError("invalid compressed data")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000598 t.append(buf)
599 c += len(buf)
600 t = "".join(t)
601 self.dbuf = t[size:]
602 return t[:size]
603
604 def __read(self, size):
605 """Return size bytes from stream. If internal buffer is empty,
606 read another block from the stream.
607 """
608 c = len(self.buf)
609 t = [self.buf]
610 while c < size:
611 buf = self.fileobj.read(self.bufsize)
612 if not buf:
613 break
614 t.append(buf)
615 c += len(buf)
616 t = "".join(t)
617 self.buf = t[size:]
618 return t[:size]
619# class _Stream
620
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000621class _StreamProxy(object):
622 """Small proxy class that enables transparent compression
623 detection for the Stream interface (mode 'r|*').
624 """
625
626 def __init__(self, fileobj):
627 self.fileobj = fileobj
628 self.buf = self.fileobj.read(BLOCKSIZE)
629
630 def read(self, size):
631 self.read = self.fileobj.read
632 return self.buf
633
634 def getcomptype(self):
635 if self.buf.startswith("\037\213\010"):
636 return "gz"
Lars Gustäbel9a388632011-12-06 13:07:09 +0100637 if self.buf[0:3] == "BZh" and self.buf[4:10] == "1AY&SY":
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000638 return "bz2"
639 return "tar"
640
641 def close(self):
642 self.fileobj.close()
643# class StreamProxy
644
Georg Brandl49c8f4c2006-05-15 19:30:35 +0000645class _BZ2Proxy(object):
646 """Small proxy class that enables external file object
647 support for "r:bz2" and "w:bz2" modes. This is actually
648 a workaround for a limitation in bz2 module's BZ2File
649 class which (unlike gzip.GzipFile) has no support for
650 a file object argument.
651 """
652
653 blocksize = 16 * 1024
654
655 def __init__(self, fileobj, mode):
656 self.fileobj = fileobj
657 self.mode = mode
Lars Gustäbelc64e4022007-03-13 10:47:19 +0000658 self.name = getattr(self.fileobj, "name", None)
Georg Brandl49c8f4c2006-05-15 19:30:35 +0000659 self.init()
660
661 def init(self):
662 import bz2
663 self.pos = 0
664 if self.mode == "r":
665 self.bz2obj = bz2.BZ2Decompressor()
666 self.fileobj.seek(0)
667 self.buf = ""
668 else:
669 self.bz2obj = bz2.BZ2Compressor()
670
671 def read(self, size):
672 b = [self.buf]
673 x = len(self.buf)
674 while x < size:
Lars Gustäbel2020a592009-03-22 20:09:33 +0000675 raw = self.fileobj.read(self.blocksize)
676 if not raw:
Georg Brandl49c8f4c2006-05-15 19:30:35 +0000677 break
Lars Gustäbel2020a592009-03-22 20:09:33 +0000678 data = self.bz2obj.decompress(raw)
679 b.append(data)
Georg Brandl49c8f4c2006-05-15 19:30:35 +0000680 x += len(data)
681 self.buf = "".join(b)
682
683 buf = self.buf[:size]
684 self.buf = self.buf[size:]
685 self.pos += len(buf)
686 return buf
687
688 def seek(self, pos):
689 if pos < self.pos:
690 self.init()
691 self.read(pos - self.pos)
692
693 def tell(self):
694 return self.pos
695
696 def write(self, data):
697 self.pos += len(data)
698 raw = self.bz2obj.compress(data)
699 self.fileobj.write(raw)
700
701 def close(self):
702 if self.mode == "w":
703 raw = self.bz2obj.flush()
704 self.fileobj.write(raw)
Georg Brandl49c8f4c2006-05-15 19:30:35 +0000705# class _BZ2Proxy
706
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000707#------------------------
708# Extraction file object
709#------------------------
Lars Gustäbel6baa5022006-12-23 16:40:13 +0000710class _FileInFile(object):
711 """A thin wrapper around an existing file object that
712 provides a part of its data as an individual file
713 object.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000714 """
715
Lars Gustäbel6baa5022006-12-23 16:40:13 +0000716 def __init__(self, fileobj, offset, size, sparse=None):
717 self.fileobj = fileobj
718 self.offset = offset
719 self.size = size
720 self.sparse = sparse
721 self.position = 0
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000722
Lars Gustäbel6baa5022006-12-23 16:40:13 +0000723 def tell(self):
724 """Return the current file position.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000725 """
Lars Gustäbel6baa5022006-12-23 16:40:13 +0000726 return self.position
727
728 def seek(self, position):
729 """Seek to a position in the file.
730 """
731 self.position = position
732
733 def read(self, size=None):
734 """Read data from the file.
735 """
736 if size is None:
737 size = self.size - self.position
738 else:
739 size = min(size, self.size - self.position)
740
741 if self.sparse is None:
742 return self.readnormal(size)
743 else:
744 return self.readsparse(size)
745
746 def readnormal(self, size):
747 """Read operation for regular files.
748 """
749 self.fileobj.seek(self.offset + self.position)
750 self.position += size
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000751 return self.fileobj.read(size)
752
Lars Gustäbel6baa5022006-12-23 16:40:13 +0000753 def readsparse(self, size):
754 """Read operation for sparse files.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000755 """
Lars Gustäbel6baa5022006-12-23 16:40:13 +0000756 data = []
757 while size > 0:
758 buf = self.readsparsesection(size)
759 if not buf:
760 break
761 size -= len(buf)
762 data.append(buf)
763 return "".join(data)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000764
Lars Gustäbel6baa5022006-12-23 16:40:13 +0000765 def readsparsesection(self, size):
766 """Read a single section of a sparse file.
767 """
768 section = self.sparse.find(self.position)
769
770 if section is None:
771 return ""
772
773 size = min(size, section.offset + section.size - self.position)
774
775 if isinstance(section, _data):
776 realpos = section.realpos + self.position - section.offset
777 self.fileobj.seek(self.offset + realpos)
778 self.position += size
779 return self.fileobj.read(size)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000780 else:
Lars Gustäbel6baa5022006-12-23 16:40:13 +0000781 self.position += size
782 return NUL * size
783#class _FileInFile
784
785
786class ExFileObject(object):
787 """File-like object for reading an archive member.
788 Is returned by TarFile.extractfile().
789 """
790 blocksize = 1024
791
792 def __init__(self, tarfile, tarinfo):
793 self.fileobj = _FileInFile(tarfile.fileobj,
794 tarinfo.offset_data,
795 tarinfo.size,
796 getattr(tarinfo, "sparse", None))
797 self.name = tarinfo.name
798 self.mode = "r"
799 self.closed = False
800 self.size = tarinfo.size
801
802 self.position = 0
803 self.buffer = ""
804
805 def read(self, size=None):
806 """Read at most size bytes from the file. If size is not
807 present or None, read all data until EOF is reached.
808 """
809 if self.closed:
810 raise ValueError("I/O operation on closed file")
811
812 buf = ""
813 if self.buffer:
814 if size is None:
815 buf = self.buffer
816 self.buffer = ""
817 else:
818 buf = self.buffer[:size]
819 self.buffer = self.buffer[size:]
820
821 if size is None:
822 buf += self.fileobj.read()
823 else:
824 buf += self.fileobj.read(size - len(buf))
825
826 self.position += len(buf)
827 return buf
828
829 def readline(self, size=-1):
830 """Read one entire line from the file. If size is present
831 and non-negative, return a string with at most that
832 size, which may be an incomplete line.
833 """
834 if self.closed:
835 raise ValueError("I/O operation on closed file")
836
837 if "\n" in self.buffer:
838 pos = self.buffer.find("\n") + 1
839 else:
840 buffers = [self.buffer]
841 while True:
842 buf = self.fileobj.read(self.blocksize)
843 buffers.append(buf)
844 if not buf or "\n" in buf:
845 self.buffer = "".join(buffers)
846 pos = self.buffer.find("\n") + 1
847 if pos == 0:
848 # no newline found.
849 pos = len(self.buffer)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000850 break
Lars Gustäbel6baa5022006-12-23 16:40:13 +0000851
852 if size != -1:
853 pos = min(size, pos)
854
855 buf = self.buffer[:pos]
856 self.buffer = self.buffer[pos:]
857 self.position += len(buf)
858 return buf
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000859
860 def readlines(self):
Lars Gustäbel6baa5022006-12-23 16:40:13 +0000861 """Return a list with all remaining lines.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000862 """
863 result = []
864 while True:
865 line = self.readline()
866 if not line: break
867 result.append(line)
868 return result
869
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000870 def tell(self):
871 """Return the current file position.
872 """
Lars Gustäbel6baa5022006-12-23 16:40:13 +0000873 if self.closed:
874 raise ValueError("I/O operation on closed file")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000875
Lars Gustäbel6baa5022006-12-23 16:40:13 +0000876 return self.position
877
878 def seek(self, pos, whence=os.SEEK_SET):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000879 """Seek to a position in the file.
880 """
Lars Gustäbel6baa5022006-12-23 16:40:13 +0000881 if self.closed:
882 raise ValueError("I/O operation on closed file")
883
884 if whence == os.SEEK_SET:
885 self.position = min(max(pos, 0), self.size)
886 elif whence == os.SEEK_CUR:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000887 if pos < 0:
Lars Gustäbel6baa5022006-12-23 16:40:13 +0000888 self.position = max(self.position + pos, 0)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000889 else:
Lars Gustäbel6baa5022006-12-23 16:40:13 +0000890 self.position = min(self.position + pos, self.size)
891 elif whence == os.SEEK_END:
892 self.position = max(min(self.size + pos, self.size), 0)
893 else:
894 raise ValueError("Invalid argument")
895
896 self.buffer = ""
897 self.fileobj.seek(self.position)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000898
899 def close(self):
900 """Close the file object.
901 """
902 self.closed = True
Martin v. Löwisdf241532005-03-03 08:17:42 +0000903
904 def __iter__(self):
Lars Gustäbel6baa5022006-12-23 16:40:13 +0000905 """Get an iterator over the file's lines.
Martin v. Löwisdf241532005-03-03 08:17:42 +0000906 """
Lars Gustäbel6baa5022006-12-23 16:40:13 +0000907 while True:
908 line = self.readline()
909 if not line:
910 break
911 yield line
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000912#class ExFileObject
913
914#------------------
915# Exported Classes
916#------------------
917class TarInfo(object):
918 """Informational class which holds the details about an
919 archive member given by a tar header block.
920 TarInfo objects are returned by TarFile.getmember(),
921 TarFile.getmembers() and TarFile.gettarinfo() and are
922 usually created internally.
923 """
924
925 def __init__(self, name=""):
926 """Construct a TarInfo object. name is the optional name
927 of the member.
928 """
Lars Gustäbelc64e4022007-03-13 10:47:19 +0000929 self.name = name # member name
930 self.mode = 0644 # file permissions
Georg Brandl38c6a222006-05-10 16:26:03 +0000931 self.uid = 0 # user id
932 self.gid = 0 # group id
933 self.size = 0 # file size
934 self.mtime = 0 # modification time
935 self.chksum = 0 # header checksum
936 self.type = REGTYPE # member type
937 self.linkname = "" # link name
Lars Gustäbel6aab8d02010-10-04 15:37:53 +0000938 self.uname = "" # user name
939 self.gname = "" # group name
Georg Brandl38c6a222006-05-10 16:26:03 +0000940 self.devmajor = 0 # device major number
941 self.devminor = 0 # device minor number
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000942
Georg Brandl38c6a222006-05-10 16:26:03 +0000943 self.offset = 0 # the tar header starts here
944 self.offset_data = 0 # the file's data starts here
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000945
Lars Gustäbelc64e4022007-03-13 10:47:19 +0000946 self.pax_headers = {} # pax header information
947
948 # In pax headers the "name" and "linkname" field are called
949 # "path" and "linkpath".
950 def _getpath(self):
951 return self.name
952 def _setpath(self, name):
953 self.name = name
954 path = property(_getpath, _setpath)
955
956 def _getlinkpath(self):
957 return self.linkname
958 def _setlinkpath(self, linkname):
959 self.linkname = linkname
960 linkpath = property(_getlinkpath, _setlinkpath)
961
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000962 def __repr__(self):
963 return "<%s %r at %#x>" % (self.__class__.__name__,self.name,id(self))
964
Lars Gustäbela0fcb932007-05-27 19:49:30 +0000965 def get_info(self, encoding, errors):
Lars Gustäbelc64e4022007-03-13 10:47:19 +0000966 """Return the TarInfo's attributes as a dictionary.
967 """
968 info = {
Lars Gustäbelf7cda522009-08-28 19:23:44 +0000969 "name": self.name,
Lars Gustäbelc64e4022007-03-13 10:47:19 +0000970 "mode": self.mode & 07777,
971 "uid": self.uid,
972 "gid": self.gid,
973 "size": self.size,
974 "mtime": self.mtime,
975 "chksum": self.chksum,
976 "type": self.type,
Lars Gustäbelf7cda522009-08-28 19:23:44 +0000977 "linkname": self.linkname,
Lars Gustäbelc64e4022007-03-13 10:47:19 +0000978 "uname": self.uname,
979 "gname": self.gname,
980 "devmajor": self.devmajor,
981 "devminor": self.devminor
982 }
983
984 if info["type"] == DIRTYPE and not info["name"].endswith("/"):
985 info["name"] += "/"
986
Lars Gustäbela0fcb932007-05-27 19:49:30 +0000987 for key in ("name", "linkname", "uname", "gname"):
988 if type(info[key]) is unicode:
989 info[key] = info[key].encode(encoding, errors)
990
Lars Gustäbelc64e4022007-03-13 10:47:19 +0000991 return info
992
Lars Gustäbela0fcb932007-05-27 19:49:30 +0000993 def tobuf(self, format=DEFAULT_FORMAT, encoding=ENCODING, errors="strict"):
Lars Gustäbelc64e4022007-03-13 10:47:19 +0000994 """Return a tar header as a string of 512 byte blocks.
995 """
Lars Gustäbela0fcb932007-05-27 19:49:30 +0000996 info = self.get_info(encoding, errors)
997
Lars Gustäbelc64e4022007-03-13 10:47:19 +0000998 if format == USTAR_FORMAT:
Lars Gustäbela0fcb932007-05-27 19:49:30 +0000999 return self.create_ustar_header(info)
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001000 elif format == GNU_FORMAT:
Lars Gustäbela0fcb932007-05-27 19:49:30 +00001001 return self.create_gnu_header(info)
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001002 elif format == PAX_FORMAT:
Lars Gustäbela0fcb932007-05-27 19:49:30 +00001003 return self.create_pax_header(info, encoding, errors)
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001004 else:
1005 raise ValueError("invalid format")
1006
Lars Gustäbela0fcb932007-05-27 19:49:30 +00001007 def create_ustar_header(self, info):
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001008 """Return the object as a ustar header block.
1009 """
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001010 info["magic"] = POSIX_MAGIC
1011
1012 if len(info["linkname"]) > LENGTH_LINK:
1013 raise ValueError("linkname is too long")
1014
1015 if len(info["name"]) > LENGTH_NAME:
1016 info["prefix"], info["name"] = self._posix_split_name(info["name"])
1017
1018 return self._create_header(info, USTAR_FORMAT)
1019
Lars Gustäbela0fcb932007-05-27 19:49:30 +00001020 def create_gnu_header(self, info):
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001021 """Return the object as a GNU header block sequence.
1022 """
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001023 info["magic"] = GNU_MAGIC
1024
1025 buf = ""
1026 if len(info["linkname"]) > LENGTH_LINK:
1027 buf += self._create_gnu_long_header(info["linkname"], GNUTYPE_LONGLINK)
1028
1029 if len(info["name"]) > LENGTH_NAME:
1030 buf += self._create_gnu_long_header(info["name"], GNUTYPE_LONGNAME)
1031
1032 return buf + self._create_header(info, GNU_FORMAT)
1033
Lars Gustäbela0fcb932007-05-27 19:49:30 +00001034 def create_pax_header(self, info, encoding, errors):
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001035 """Return the object as a ustar header block. If it cannot be
1036 represented this way, prepend a pax extended header sequence
1037 with supplement information.
1038 """
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001039 info["magic"] = POSIX_MAGIC
1040 pax_headers = self.pax_headers.copy()
1041
1042 # Test string fields for values that exceed the field length or cannot
1043 # be represented in ASCII encoding.
1044 for name, hname, length in (
1045 ("name", "path", LENGTH_NAME), ("linkname", "linkpath", LENGTH_LINK),
1046 ("uname", "uname", 32), ("gname", "gname", 32)):
1047
Lars Gustäbela0fcb932007-05-27 19:49:30 +00001048 if hname in pax_headers:
1049 # The pax header has priority.
1050 continue
1051
1052 val = info[name].decode(encoding, errors)
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001053
1054 # Try to encode the string as ASCII.
1055 try:
1056 val.encode("ascii")
1057 except UnicodeEncodeError:
1058 pax_headers[hname] = val
1059 continue
1060
Lars Gustäbela0fcb932007-05-27 19:49:30 +00001061 if len(info[name]) > length:
1062 pax_headers[hname] = val
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001063
1064 # Test number fields for values that exceed the field limit or values
1065 # that like to be stored as float.
1066 for name, digits in (("uid", 8), ("gid", 8), ("size", 12), ("mtime", 12)):
Lars Gustäbela0fcb932007-05-27 19:49:30 +00001067 if name in pax_headers:
1068 # The pax header has priority. Avoid overflow.
1069 info[name] = 0
1070 continue
1071
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001072 val = info[name]
1073 if not 0 <= val < 8 ** (digits - 1) or isinstance(val, float):
1074 pax_headers[name] = unicode(val)
1075 info[name] = 0
1076
Lars Gustäbela0fcb932007-05-27 19:49:30 +00001077 # Create a pax extended header if necessary.
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001078 if pax_headers:
1079 buf = self._create_pax_generic_header(pax_headers)
1080 else:
1081 buf = ""
1082
1083 return buf + self._create_header(info, USTAR_FORMAT)
1084
1085 @classmethod
Lars Gustäbela0fcb932007-05-27 19:49:30 +00001086 def create_pax_global_header(cls, pax_headers):
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001087 """Return the object as a pax global header block sequence.
1088 """
Lars Gustäbela0fcb932007-05-27 19:49:30 +00001089 return cls._create_pax_generic_header(pax_headers, type=XGLTYPE)
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001090
1091 def _posix_split_name(self, name):
1092 """Split a name longer than 100 chars into a prefix
1093 and a name part.
1094 """
1095 prefix = name[:LENGTH_PREFIX + 1]
1096 while prefix and prefix[-1] != "/":
1097 prefix = prefix[:-1]
1098
1099 name = name[len(prefix):]
1100 prefix = prefix[:-1]
1101
1102 if not prefix or len(name) > LENGTH_NAME:
1103 raise ValueError("name is too long")
1104 return prefix, name
1105
1106 @staticmethod
1107 def _create_header(info, format):
1108 """Return a header block. info is a dictionary with file
1109 information, format must be one of the *_FORMAT constants.
1110 """
1111 parts = [
1112 stn(info.get("name", ""), 100),
1113 itn(info.get("mode", 0) & 07777, 8, format),
1114 itn(info.get("uid", 0), 8, format),
1115 itn(info.get("gid", 0), 8, format),
1116 itn(info.get("size", 0), 12, format),
1117 itn(info.get("mtime", 0), 12, format),
1118 " ", # checksum field
1119 info.get("type", REGTYPE),
1120 stn(info.get("linkname", ""), 100),
Lars Gustäbela0fcb932007-05-27 19:49:30 +00001121 stn(info.get("magic", POSIX_MAGIC), 8),
Lars Gustäbel6aab8d02010-10-04 15:37:53 +00001122 stn(info.get("uname", ""), 32),
1123 stn(info.get("gname", ""), 32),
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001124 itn(info.get("devmajor", 0), 8, format),
1125 itn(info.get("devminor", 0), 8, format),
1126 stn(info.get("prefix", ""), 155)
1127 ]
1128
1129 buf = struct.pack("%ds" % BLOCKSIZE, "".join(parts))
1130 chksum = calc_chksums(buf[-BLOCKSIZE:])[0]
1131 buf = buf[:-364] + "%06o\0" % chksum + buf[-357:]
1132 return buf
1133
1134 @staticmethod
1135 def _create_payload(payload):
1136 """Return the string payload filled with zero bytes
1137 up to the next 512 byte border.
1138 """
1139 blocks, remainder = divmod(len(payload), BLOCKSIZE)
1140 if remainder > 0:
1141 payload += (BLOCKSIZE - remainder) * NUL
1142 return payload
1143
1144 @classmethod
1145 def _create_gnu_long_header(cls, name, type):
1146 """Return a GNUTYPE_LONGNAME or GNUTYPE_LONGLINK sequence
1147 for name.
1148 """
1149 name += NUL
1150
1151 info = {}
1152 info["name"] = "././@LongLink"
1153 info["type"] = type
1154 info["size"] = len(name)
1155 info["magic"] = GNU_MAGIC
1156
1157 # create extended header + name blocks.
1158 return cls._create_header(info, USTAR_FORMAT) + \
1159 cls._create_payload(name)
1160
1161 @classmethod
1162 def _create_pax_generic_header(cls, pax_headers, type=XHDTYPE):
1163 """Return a POSIX.1-2001 extended or global header sequence
1164 that contains a list of keyword, value pairs. The values
1165 must be unicode objects.
1166 """
1167 records = []
1168 for keyword, value in pax_headers.iteritems():
1169 keyword = keyword.encode("utf8")
1170 value = value.encode("utf8")
1171 l = len(keyword) + len(value) + 3 # ' ' + '=' + '\n'
1172 n = p = 0
1173 while True:
1174 n = l + len(str(p))
1175 if n == p:
1176 break
1177 p = n
1178 records.append("%d %s=%s\n" % (p, keyword, value))
1179 records = "".join(records)
1180
1181 # We use a hardcoded "././@PaxHeader" name like star does
1182 # instead of the one that POSIX recommends.
1183 info = {}
1184 info["name"] = "././@PaxHeader"
1185 info["type"] = type
1186 info["size"] = len(records)
1187 info["magic"] = POSIX_MAGIC
1188
1189 # Create pax header + record blocks.
1190 return cls._create_header(info, USTAR_FORMAT) + \
1191 cls._create_payload(records)
1192
Guido van Rossum75b64e62005-01-16 00:16:11 +00001193 @classmethod
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001194 def frombuf(cls, buf):
1195 """Construct a TarInfo object from a 512 byte string buffer.
1196 """
Lars Gustäbeldd866d52009-11-22 18:30:53 +00001197 if len(buf) == 0:
1198 raise EmptyHeaderError("empty header")
Georg Brandl38c6a222006-05-10 16:26:03 +00001199 if len(buf) != BLOCKSIZE:
Lars Gustäbeldd866d52009-11-22 18:30:53 +00001200 raise TruncatedHeaderError("truncated header")
Georg Brandl38c6a222006-05-10 16:26:03 +00001201 if buf.count(NUL) == BLOCKSIZE:
Lars Gustäbeldd866d52009-11-22 18:30:53 +00001202 raise EOFHeaderError("end of file header")
Georg Brandlebbeed72006-12-19 22:06:46 +00001203
Georg Brandlded1c4d2006-12-20 11:55:16 +00001204 chksum = nti(buf[148:156])
Georg Brandlebbeed72006-12-19 22:06:46 +00001205 if chksum not in calc_chksums(buf):
Lars Gustäbeldd866d52009-11-22 18:30:53 +00001206 raise InvalidHeaderError("bad checksum")
Georg Brandl38c6a222006-05-10 16:26:03 +00001207
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001208 obj = cls()
1209 obj.buf = buf
1210 obj.name = nts(buf[0:100])
1211 obj.mode = nti(buf[100:108])
1212 obj.uid = nti(buf[108:116])
1213 obj.gid = nti(buf[116:124])
1214 obj.size = nti(buf[124:136])
1215 obj.mtime = nti(buf[136:148])
1216 obj.chksum = chksum
1217 obj.type = buf[156:157]
1218 obj.linkname = nts(buf[157:257])
1219 obj.uname = nts(buf[265:297])
1220 obj.gname = nts(buf[297:329])
1221 obj.devmajor = nti(buf[329:337])
1222 obj.devminor = nti(buf[337:345])
1223 prefix = nts(buf[345:500])
Georg Brandl3354f282006-10-29 09:16:12 +00001224
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001225 # Old V7 tar format represents a directory as a regular
1226 # file with a trailing slash.
1227 if obj.type == AREGTYPE and obj.name.endswith("/"):
1228 obj.type = DIRTYPE
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001229
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001230 # Remove redundant slashes from directories.
1231 if obj.isdir():
1232 obj.name = obj.name.rstrip("/")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001233
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001234 # Reconstruct a ustar longname.
1235 if prefix and obj.type not in GNU_TYPES:
1236 obj.name = prefix + "/" + obj.name
1237 return obj
1238
1239 @classmethod
1240 def fromtarfile(cls, tarfile):
1241 """Return the next TarInfo object from TarFile object
1242 tarfile.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001243 """
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001244 buf = tarfile.fileobj.read(BLOCKSIZE)
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001245 obj = cls.frombuf(buf)
1246 obj.offset = tarfile.fileobj.tell() - BLOCKSIZE
1247 return obj._proc_member(tarfile)
Georg Brandl3354f282006-10-29 09:16:12 +00001248
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001249 #--------------------------------------------------------------------------
1250 # The following are methods that are called depending on the type of a
1251 # member. The entry point is _proc_member() which can be overridden in a
1252 # subclass to add custom _proc_*() methods. A _proc_*() method MUST
1253 # implement the following
1254 # operations:
1255 # 1. Set self.offset_data to the position where the data blocks begin,
1256 # if there is data that follows.
1257 # 2. Set tarfile.offset to the position where the next member's header will
1258 # begin.
1259 # 3. Return self or another valid TarInfo object.
1260 def _proc_member(self, tarfile):
1261 """Choose the right processing method depending on
1262 the type and call it.
Georg Brandl3354f282006-10-29 09:16:12 +00001263 """
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001264 if self.type in (GNUTYPE_LONGNAME, GNUTYPE_LONGLINK):
1265 return self._proc_gnulong(tarfile)
1266 elif self.type == GNUTYPE_SPARSE:
1267 return self._proc_sparse(tarfile)
1268 elif self.type in (XHDTYPE, XGLTYPE, SOLARIS_XHDTYPE):
1269 return self._proc_pax(tarfile)
1270 else:
1271 return self._proc_builtin(tarfile)
Georg Brandl3354f282006-10-29 09:16:12 +00001272
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001273 def _proc_builtin(self, tarfile):
1274 """Process a builtin type or an unknown type which
1275 will be treated as a regular file.
1276 """
1277 self.offset_data = tarfile.fileobj.tell()
1278 offset = self.offset_data
1279 if self.isreg() or self.type not in SUPPORTED_TYPES:
1280 # Skip the following data blocks.
1281 offset += self._block(self.size)
1282 tarfile.offset = offset
Georg Brandl3354f282006-10-29 09:16:12 +00001283
Lars Gustäbela0fcb932007-05-27 19:49:30 +00001284 # Patch the TarInfo object with saved global
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001285 # header information.
Lars Gustäbela0fcb932007-05-27 19:49:30 +00001286 self._apply_pax_info(tarfile.pax_headers, tarfile.encoding, tarfile.errors)
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001287
1288 return self
1289
1290 def _proc_gnulong(self, tarfile):
1291 """Process the blocks that hold a GNU longname
1292 or longlink member.
1293 """
1294 buf = tarfile.fileobj.read(self._block(self.size))
1295
1296 # Fetch the next header and process it.
Lars Gustäbeldd866d52009-11-22 18:30:53 +00001297 try:
1298 next = self.fromtarfile(tarfile)
1299 except HeaderError:
1300 raise SubsequentHeaderError("missing or bad subsequent header")
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001301
1302 # Patch the TarInfo object from the next header with
1303 # the longname information.
1304 next.offset = self.offset
1305 if self.type == GNUTYPE_LONGNAME:
Lars Gustäbela0fcb932007-05-27 19:49:30 +00001306 next.name = nts(buf)
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001307 elif self.type == GNUTYPE_LONGLINK:
Lars Gustäbela0fcb932007-05-27 19:49:30 +00001308 next.linkname = nts(buf)
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001309
1310 return next
1311
1312 def _proc_sparse(self, tarfile):
1313 """Process a GNU sparse header plus extra headers.
1314 """
1315 buf = self.buf
1316 sp = _ringbuffer()
1317 pos = 386
1318 lastpos = 0L
1319 realpos = 0L
1320 # There are 4 possible sparse structs in the
1321 # first header.
1322 for i in xrange(4):
1323 try:
1324 offset = nti(buf[pos:pos + 12])
1325 numbytes = nti(buf[pos + 12:pos + 24])
1326 except ValueError:
1327 break
1328 if offset > lastpos:
1329 sp.append(_hole(lastpos, offset - lastpos))
1330 sp.append(_data(offset, numbytes, realpos))
1331 realpos += numbytes
1332 lastpos = offset + numbytes
1333 pos += 24
1334
1335 isextended = ord(buf[482])
1336 origsize = nti(buf[483:495])
1337
1338 # If the isextended flag is given,
1339 # there are extra headers to process.
1340 while isextended == 1:
1341 buf = tarfile.fileobj.read(BLOCKSIZE)
1342 pos = 0
1343 for i in xrange(21):
1344 try:
1345 offset = nti(buf[pos:pos + 12])
1346 numbytes = nti(buf[pos + 12:pos + 24])
1347 except ValueError:
1348 break
1349 if offset > lastpos:
1350 sp.append(_hole(lastpos, offset - lastpos))
1351 sp.append(_data(offset, numbytes, realpos))
1352 realpos += numbytes
1353 lastpos = offset + numbytes
1354 pos += 24
1355 isextended = ord(buf[504])
1356
1357 if lastpos < origsize:
1358 sp.append(_hole(lastpos, origsize - lastpos))
1359
1360 self.sparse = sp
1361
1362 self.offset_data = tarfile.fileobj.tell()
1363 tarfile.offset = self.offset_data + self._block(self.size)
1364 self.size = origsize
1365
1366 return self
1367
1368 def _proc_pax(self, tarfile):
1369 """Process an extended or global header as described in
1370 POSIX.1-2001.
1371 """
1372 # Read the header information.
1373 buf = tarfile.fileobj.read(self._block(self.size))
1374
1375 # A pax header stores supplemental information for either
1376 # the following file (extended) or all following files
1377 # (global).
1378 if self.type == XGLTYPE:
1379 pax_headers = tarfile.pax_headers
1380 else:
1381 pax_headers = tarfile.pax_headers.copy()
1382
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001383 # Parse pax header information. A record looks like that:
1384 # "%d %s=%s\n" % (length, keyword, value). length is the size
1385 # of the complete record including the length field itself and
Lars Gustäbela0fcb932007-05-27 19:49:30 +00001386 # the newline. keyword and value are both UTF-8 encoded strings.
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001387 regex = re.compile(r"(\d+) ([^=]+)=", re.U)
1388 pos = 0
1389 while True:
1390 match = regex.match(buf, pos)
1391 if not match:
1392 break
1393
1394 length, keyword = match.groups()
1395 length = int(length)
1396 value = buf[match.end(2) + 1:match.start(1) + length - 1]
1397
1398 keyword = keyword.decode("utf8")
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001399 value = value.decode("utf8")
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001400
1401 pax_headers[keyword] = value
1402 pos += length
1403
Lars Gustäbela0fcb932007-05-27 19:49:30 +00001404 # Fetch the next header.
Lars Gustäbeldd866d52009-11-22 18:30:53 +00001405 try:
1406 next = self.fromtarfile(tarfile)
1407 except HeaderError:
1408 raise SubsequentHeaderError("missing or bad subsequent header")
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001409
Lars Gustäbela0fcb932007-05-27 19:49:30 +00001410 if self.type in (XHDTYPE, SOLARIS_XHDTYPE):
Lars Gustäbela0fcb932007-05-27 19:49:30 +00001411 # Patch the TarInfo object with the extended header info.
1412 next._apply_pax_info(pax_headers, tarfile.encoding, tarfile.errors)
1413 next.offset = self.offset
1414
Brett Cannon132fc542008-08-04 21:23:07 +00001415 if "size" in pax_headers:
Lars Gustäbela0fcb932007-05-27 19:49:30 +00001416 # If the extended header replaces the size field,
1417 # we need to recalculate the offset where the next
1418 # header starts.
1419 offset = next.offset_data
1420 if next.isreg() or next.type not in SUPPORTED_TYPES:
1421 offset += next._block(next.size)
1422 tarfile.offset = offset
1423
1424 return next
1425
1426 def _apply_pax_info(self, pax_headers, encoding, errors):
1427 """Replace fields with supplemental information from a previous
1428 pax extended or global header.
1429 """
1430 for keyword, value in pax_headers.iteritems():
1431 if keyword not in PAX_FIELDS:
1432 continue
1433
1434 if keyword == "path":
1435 value = value.rstrip("/")
1436
1437 if keyword in PAX_NUMBER_FIELDS:
1438 try:
1439 value = PAX_NUMBER_FIELDS[keyword](value)
1440 except ValueError:
1441 value = 0
1442 else:
1443 value = uts(value, encoding, errors)
1444
1445 setattr(self, keyword, value)
1446
1447 self.pax_headers = pax_headers.copy()
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001448
1449 def _block(self, count):
1450 """Round up a byte count by BLOCKSIZE and return it,
1451 e.g. _block(834) => 1024.
1452 """
1453 blocks, remainder = divmod(count, BLOCKSIZE)
1454 if remainder:
1455 blocks += 1
1456 return blocks * BLOCKSIZE
Georg Brandl3354f282006-10-29 09:16:12 +00001457
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001458 def isreg(self):
1459 return self.type in REGULAR_TYPES
1460 def isfile(self):
1461 return self.isreg()
1462 def isdir(self):
1463 return self.type == DIRTYPE
1464 def issym(self):
1465 return self.type == SYMTYPE
1466 def islnk(self):
1467 return self.type == LNKTYPE
1468 def ischr(self):
1469 return self.type == CHRTYPE
1470 def isblk(self):
1471 return self.type == BLKTYPE
1472 def isfifo(self):
1473 return self.type == FIFOTYPE
1474 def issparse(self):
1475 return self.type == GNUTYPE_SPARSE
1476 def isdev(self):
1477 return self.type in (CHRTYPE, BLKTYPE, FIFOTYPE)
1478# class TarInfo
1479
1480class TarFile(object):
1481 """The TarFile Class provides an interface to tar archives.
1482 """
1483
1484 debug = 0 # May be set from 0 (no msgs) to 3 (all msgs)
1485
1486 dereference = False # If true, add content of linked file to the
1487 # tar file, else the link.
1488
1489 ignore_zeros = False # If true, skips empty or invalid blocks and
1490 # continues processing.
1491
Lars Gustäbel92ca7562009-12-13 11:32:27 +00001492 errorlevel = 1 # If 0, fatal errors only appear in debug
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001493 # messages (if debug >= 0). If > 0, errors
1494 # are passed to the caller as exceptions.
1495
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001496 format = DEFAULT_FORMAT # The format to use when creating an archive.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001497
Lars Gustäbela0fcb932007-05-27 19:49:30 +00001498 encoding = ENCODING # Encoding for 8-bit character strings.
1499
1500 errors = None # Error handler for unicode conversion.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001501
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001502 tarinfo = TarInfo # The default TarInfo class to use.
1503
1504 fileobject = ExFileObject # The default ExFileObject class to use.
1505
1506 def __init__(self, name=None, mode="r", fileobj=None, format=None,
1507 tarinfo=None, dereference=None, ignore_zeros=None, encoding=None,
Lars Gustäbela0fcb932007-05-27 19:49:30 +00001508 errors=None, pax_headers=None, debug=None, errorlevel=None):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001509 """Open an (uncompressed) tar archive `name'. `mode' is either 'r' to
1510 read from an existing archive, 'a' to append data to an existing
1511 file or 'w' to create a new file overwriting an existing one. `mode'
1512 defaults to 'r'.
1513 If `fileobj' is given, it is used for reading or writing data. If it
1514 can be determined, `mode' is overridden by `fileobj's mode.
1515 `fileobj' is not closed, when TarFile is closed.
1516 """
Serhiy Storchaka75ba21a2014-01-18 15:35:19 +02001517 modes = {"r": "rb", "a": "r+b", "w": "wb"}
1518 if mode not in modes:
Georg Brandle4751e32006-05-18 06:11:19 +00001519 raise ValueError("mode must be 'r', 'a' or 'w'")
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001520 self.mode = mode
Serhiy Storchaka75ba21a2014-01-18 15:35:19 +02001521 self._mode = modes[mode]
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001522
1523 if not fileobj:
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001524 if self.mode == "a" and not os.path.exists(name):
Lars Gustäbel3f8aca12007-02-06 18:38:13 +00001525 # Create nonexistent files in append mode.
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001526 self.mode = "w"
1527 self._mode = "wb"
Brett Cannon6cef0762007-05-25 20:17:15 +00001528 fileobj = bltn_open(name, self._mode)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001529 self._extfileobj = False
1530 else:
Serhiy Storchaka7cc3b0a2014-07-22 10:39:59 +03001531 if name is None and hasattr(fileobj, "name"):
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001532 name = fileobj.name
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001533 if hasattr(fileobj, "mode"):
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001534 self._mode = fileobj.mode
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001535 self._extfileobj = True
Lars Gustäbel0f4a14b2007-08-28 12:31:09 +00001536 self.name = os.path.abspath(name) if name else None
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001537 self.fileobj = fileobj
1538
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001539 # Init attributes.
1540 if format is not None:
1541 self.format = format
1542 if tarinfo is not None:
1543 self.tarinfo = tarinfo
1544 if dereference is not None:
1545 self.dereference = dereference
1546 if ignore_zeros is not None:
1547 self.ignore_zeros = ignore_zeros
1548 if encoding is not None:
1549 self.encoding = encoding
Lars Gustäbela0fcb932007-05-27 19:49:30 +00001550
1551 if errors is not None:
1552 self.errors = errors
1553 elif mode == "r":
1554 self.errors = "utf-8"
1555 else:
1556 self.errors = "strict"
1557
1558 if pax_headers is not None and self.format == PAX_FORMAT:
1559 self.pax_headers = pax_headers
1560 else:
1561 self.pax_headers = {}
1562
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001563 if debug is not None:
1564 self.debug = debug
1565 if errorlevel is not None:
1566 self.errorlevel = errorlevel
1567
1568 # Init datastructures.
Georg Brandl38c6a222006-05-10 16:26:03 +00001569 self.closed = False
1570 self.members = [] # list of members as TarInfo objects
1571 self._loaded = False # flag if all members have been read
Lars Gustäbel77b2d632007-12-01 21:02:12 +00001572 self.offset = self.fileobj.tell()
1573 # current position in the archive file
Georg Brandl38c6a222006-05-10 16:26:03 +00001574 self.inodes = {} # dictionary caching the inodes of
1575 # archive members already added
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001576
Lars Gustäbel355538e2009-11-18 20:24:54 +00001577 try:
1578 if self.mode == "r":
1579 self.firstmember = None
1580 self.firstmember = self.next()
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001581
Lars Gustäbel355538e2009-11-18 20:24:54 +00001582 if self.mode == "a":
1583 # Move to the end of the archive,
1584 # before the first empty block.
Lars Gustäbel355538e2009-11-18 20:24:54 +00001585 while True:
Lars Gustäbeldd866d52009-11-22 18:30:53 +00001586 self.fileobj.seek(self.offset)
1587 try:
1588 tarinfo = self.tarinfo.fromtarfile(self)
1589 self.members.append(tarinfo)
1590 except EOFHeaderError:
1591 self.fileobj.seek(self.offset)
Lars Gustäbel355538e2009-11-18 20:24:54 +00001592 break
Lars Gustäbeldd866d52009-11-22 18:30:53 +00001593 except HeaderError, e:
1594 raise ReadError(str(e))
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001595
Lars Gustäbel355538e2009-11-18 20:24:54 +00001596 if self.mode in "aw":
1597 self._loaded = True
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001598
Lars Gustäbel355538e2009-11-18 20:24:54 +00001599 if self.pax_headers:
1600 buf = self.tarinfo.create_pax_global_header(self.pax_headers.copy())
1601 self.fileobj.write(buf)
1602 self.offset += len(buf)
1603 except:
1604 if not self._extfileobj:
1605 self.fileobj.close()
1606 self.closed = True
1607 raise
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001608
1609 def _getposix(self):
1610 return self.format == USTAR_FORMAT
1611 def _setposix(self, value):
1612 import warnings
Philip Jenveyd846f1d2009-05-08 02:28:39 +00001613 warnings.warn("use the format attribute instead", DeprecationWarning,
1614 2)
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001615 if value:
1616 self.format = USTAR_FORMAT
1617 else:
1618 self.format = GNU_FORMAT
1619 posix = property(_getposix, _setposix)
1620
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001621 #--------------------------------------------------------------------------
1622 # Below are the classmethods which act as alternate constructors to the
1623 # TarFile class. The open() method is the only one that is needed for
1624 # public use; it is the "super"-constructor and is able to select an
1625 # adequate "sub"-constructor for a particular compression using the mapping
1626 # from OPEN_METH.
1627 #
1628 # This concept allows one to subclass TarFile without losing the comfort of
1629 # the super-constructor. A sub-constructor is registered and made available
1630 # by adding it to the mapping in OPEN_METH.
1631
Guido van Rossum75b64e62005-01-16 00:16:11 +00001632 @classmethod
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001633 def open(cls, name=None, mode="r", fileobj=None, bufsize=RECORDSIZE, **kwargs):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001634 """Open a tar archive for reading, writing or appending. Return
1635 an appropriate TarFile class.
1636
1637 mode:
Martin v. Löwis78be7df2005-03-05 12:47:42 +00001638 'r' or 'r:*' open for reading with transparent compression
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001639 'r:' open for reading exclusively uncompressed
1640 'r:gz' open for reading with gzip compression
1641 'r:bz2' open for reading with bzip2 compression
Lars Gustäbel3f8aca12007-02-06 18:38:13 +00001642 'a' or 'a:' open for appending, creating the file if necessary
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001643 'w' or 'w:' open for writing without compression
1644 'w:gz' open for writing with gzip compression
1645 'w:bz2' open for writing with bzip2 compression
Martin v. Löwis78be7df2005-03-05 12:47:42 +00001646
1647 'r|*' open a stream of tar blocks with transparent compression
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001648 'r|' open an uncompressed stream of tar blocks for reading
1649 'r|gz' open a gzip compressed stream of tar blocks
1650 'r|bz2' open a bzip2 compressed stream of tar blocks
1651 'w|' open an uncompressed stream for writing
1652 'w|gz' open a gzip compressed stream for writing
1653 'w|bz2' open a bzip2 compressed stream for writing
1654 """
1655
1656 if not name and not fileobj:
Georg Brandle4751e32006-05-18 06:11:19 +00001657 raise ValueError("nothing to open")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001658
Martin v. Löwis78be7df2005-03-05 12:47:42 +00001659 if mode in ("r", "r:*"):
1660 # Find out which *open() is appropriate for opening the file.
1661 for comptype in cls.OPEN_METH:
1662 func = getattr(cls, cls.OPEN_METH[comptype])
Lars Gustäbela7ba6fc2006-12-27 10:30:46 +00001663 if fileobj is not None:
1664 saved_pos = fileobj.tell()
Martin v. Löwis78be7df2005-03-05 12:47:42 +00001665 try:
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001666 return func(name, "r", fileobj, **kwargs)
1667 except (ReadError, CompressionError), e:
Lars Gustäbela7ba6fc2006-12-27 10:30:46 +00001668 if fileobj is not None:
1669 fileobj.seek(saved_pos)
Martin v. Löwis78be7df2005-03-05 12:47:42 +00001670 continue
Georg Brandle4751e32006-05-18 06:11:19 +00001671 raise ReadError("file could not be opened successfully")
Martin v. Löwis78be7df2005-03-05 12:47:42 +00001672
1673 elif ":" in mode:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001674 filemode, comptype = mode.split(":", 1)
1675 filemode = filemode or "r"
1676 comptype = comptype or "tar"
1677
1678 # Select the *open() function according to
1679 # given compression.
1680 if comptype in cls.OPEN_METH:
1681 func = getattr(cls, cls.OPEN_METH[comptype])
1682 else:
Georg Brandle4751e32006-05-18 06:11:19 +00001683 raise CompressionError("unknown compression type %r" % comptype)
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001684 return func(name, filemode, fileobj, **kwargs)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001685
1686 elif "|" in mode:
1687 filemode, comptype = mode.split("|", 1)
1688 filemode = filemode or "r"
1689 comptype = comptype or "tar"
1690
Serhiy Storchaka75ba21a2014-01-18 15:35:19 +02001691 if filemode not in ("r", "w"):
Georg Brandle4751e32006-05-18 06:11:19 +00001692 raise ValueError("mode must be 'r' or 'w'")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001693
Benjamin Peterson7fd59e02014-08-27 20:31:21 -04001694 stream = _Stream(name, filemode, comptype, fileobj, bufsize)
1695 try:
1696 t = cls(name, filemode, stream, **kwargs)
1697 except:
1698 stream.close()
1699 raise
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001700 t._extfileobj = False
1701 return t
1702
Serhiy Storchaka75ba21a2014-01-18 15:35:19 +02001703 elif mode in ("a", "w"):
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001704 return cls.taropen(name, mode, fileobj, **kwargs)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001705
Georg Brandle4751e32006-05-18 06:11:19 +00001706 raise ValueError("undiscernible mode")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001707
Guido van Rossum75b64e62005-01-16 00:16:11 +00001708 @classmethod
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001709 def taropen(cls, name, mode="r", fileobj=None, **kwargs):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001710 """Open uncompressed tar archive name for reading or writing.
1711 """
Serhiy Storchaka75ba21a2014-01-18 15:35:19 +02001712 if mode not in ("r", "a", "w"):
Georg Brandle4751e32006-05-18 06:11:19 +00001713 raise ValueError("mode must be 'r', 'a' or 'w'")
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001714 return cls(name, mode, fileobj, **kwargs)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001715
Guido van Rossum75b64e62005-01-16 00:16:11 +00001716 @classmethod
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001717 def gzopen(cls, name, mode="r", fileobj=None, compresslevel=9, **kwargs):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001718 """Open gzip compressed tar archive name for reading or writing.
1719 Appending is not allowed.
1720 """
Serhiy Storchaka75ba21a2014-01-18 15:35:19 +02001721 if mode not in ("r", "w"):
Georg Brandle4751e32006-05-18 06:11:19 +00001722 raise ValueError("mode must be 'r' or 'w'")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001723
1724 try:
1725 import gzip
Neal Norwitz4ec68242003-04-11 03:05:56 +00001726 gzip.GzipFile
1727 except (ImportError, AttributeError):
Georg Brandle4751e32006-05-18 06:11:19 +00001728 raise CompressionError("gzip module is not available")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001729
Benjamin Peterson7fd59e02014-08-27 20:31:21 -04001730 try:
1731 fileobj = gzip.GzipFile(name, mode, compresslevel, fileobj)
1732 except OSError:
1733 if fileobj is not None and mode == 'r':
1734 raise ReadError("not a gzip file")
1735 raise
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001736
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001737 try:
Benjamin Peterson7fd59e02014-08-27 20:31:21 -04001738 t = cls.taropen(name, mode, fileobj, **kwargs)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001739 except IOError:
Benjamin Peterson7fd59e02014-08-27 20:31:21 -04001740 fileobj.close()
Serhiy Storchaka7a278da2014-01-18 16:14:00 +02001741 if mode == 'r':
1742 raise ReadError("not a gzip file")
1743 raise
Benjamin Peterson7fd59e02014-08-27 20:31:21 -04001744 except:
1745 fileobj.close()
1746 raise
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001747 t._extfileobj = False
1748 return t
1749
Guido van Rossum75b64e62005-01-16 00:16:11 +00001750 @classmethod
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001751 def bz2open(cls, name, mode="r", fileobj=None, compresslevel=9, **kwargs):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001752 """Open bzip2 compressed tar archive name for reading or writing.
1753 Appending is not allowed.
1754 """
Serhiy Storchaka75ba21a2014-01-18 15:35:19 +02001755 if mode not in ("r", "w"):
Georg Brandle4751e32006-05-18 06:11:19 +00001756 raise ValueError("mode must be 'r' or 'w'.")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001757
1758 try:
1759 import bz2
1760 except ImportError:
Georg Brandle4751e32006-05-18 06:11:19 +00001761 raise CompressionError("bz2 module is not available")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001762
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001763 if fileobj is not None:
Georg Brandl49c8f4c2006-05-15 19:30:35 +00001764 fileobj = _BZ2Proxy(fileobj, mode)
1765 else:
1766 fileobj = bz2.BZ2File(name, mode, compresslevel=compresslevel)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001767
1768 try:
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001769 t = cls.taropen(name, mode, fileobj, **kwargs)
Lars Gustäbeldd866d52009-11-22 18:30:53 +00001770 except (IOError, EOFError):
Benjamin Peterson7fd59e02014-08-27 20:31:21 -04001771 fileobj.close()
Serhiy Storchaka7a278da2014-01-18 16:14:00 +02001772 if mode == 'r':
1773 raise ReadError("not a bzip2 file")
1774 raise
Benjamin Peterson7fd59e02014-08-27 20:31:21 -04001775 except:
1776 fileobj.close()
1777 raise
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001778 t._extfileobj = False
1779 return t
1780
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001781 # All *open() methods are registered here.
1782 OPEN_METH = {
1783 "tar": "taropen", # uncompressed tar
1784 "gz": "gzopen", # gzip compressed tar
1785 "bz2": "bz2open" # bzip2 compressed tar
1786 }
1787
1788 #--------------------------------------------------------------------------
1789 # The public methods which TarFile provides:
1790
1791 def close(self):
1792 """Close the TarFile. In write-mode, two finishing zero blocks are
1793 appended to the archive.
1794 """
1795 if self.closed:
1796 return
1797
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001798 if self.mode in "aw":
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001799 self.fileobj.write(NUL * (BLOCKSIZE * 2))
1800 self.offset += (BLOCKSIZE * 2)
1801 # fill up the end with zero-blocks
1802 # (like option -b20 for tar does)
1803 blocks, remainder = divmod(self.offset, RECORDSIZE)
1804 if remainder > 0:
1805 self.fileobj.write(NUL * (RECORDSIZE - remainder))
1806
1807 if not self._extfileobj:
1808 self.fileobj.close()
1809 self.closed = True
1810
1811 def getmember(self, name):
1812 """Return a TarInfo object for member `name'. If `name' can not be
1813 found in the archive, KeyError is raised. If a member occurs more
Mark Dickinson3e4caeb2009-02-21 20:27:01 +00001814 than once in the archive, its last occurrence is assumed to be the
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001815 most up-to-date version.
1816 """
Martin v. Löwisf3c56112004-09-18 09:08:52 +00001817 tarinfo = self._getmember(name)
1818 if tarinfo is None:
Georg Brandle4751e32006-05-18 06:11:19 +00001819 raise KeyError("filename %r not found" % name)
Martin v. Löwisf3c56112004-09-18 09:08:52 +00001820 return tarinfo
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001821
1822 def getmembers(self):
1823 """Return the members of the archive as a list of TarInfo objects. The
1824 list has the same order as the members in the archive.
1825 """
1826 self._check()
1827 if not self._loaded: # if we want to obtain a list of
1828 self._load() # all members, we first have to
1829 # scan the whole archive.
1830 return self.members
1831
1832 def getnames(self):
1833 """Return the members of the archive as a list of their names. It has
1834 the same order as the list returned by getmembers().
1835 """
Martin v. Löwisf3c56112004-09-18 09:08:52 +00001836 return [tarinfo.name for tarinfo in self.getmembers()]
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001837
1838 def gettarinfo(self, name=None, arcname=None, fileobj=None):
1839 """Create a TarInfo object for either the file `name' or the file
1840 object `fileobj' (using os.fstat on its file descriptor). You can
1841 modify some of the TarInfo's attributes before you add it using
1842 addfile(). If given, `arcname' specifies an alternative name for the
1843 file in the archive.
1844 """
1845 self._check("aw")
1846
1847 # When fileobj is given, replace name by
1848 # fileobj's real name.
1849 if fileobj is not None:
1850 name = fileobj.name
1851
1852 # Building the name of the member in the archive.
1853 # Backward slashes are converted to forward slashes,
1854 # Absolute paths are turned to relative paths.
1855 if arcname is None:
1856 arcname = name
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001857 drv, arcname = os.path.splitdrive(arcname)
Lars Gustäbelf7cda522009-08-28 19:23:44 +00001858 arcname = arcname.replace(os.sep, "/")
1859 arcname = arcname.lstrip("/")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001860
1861 # Now, fill the TarInfo object with
1862 # information specific for the file.
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001863 tarinfo = self.tarinfo()
1864 tarinfo.tarfile = self
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001865
1866 # Use os.stat or os.lstat, depending on platform
1867 # and if symlinks shall be resolved.
1868 if fileobj is None:
1869 if hasattr(os, "lstat") and not self.dereference:
1870 statres = os.lstat(name)
1871 else:
1872 statres = os.stat(name)
1873 else:
1874 statres = os.fstat(fileobj.fileno())
1875 linkname = ""
1876
1877 stmd = statres.st_mode
1878 if stat.S_ISREG(stmd):
1879 inode = (statres.st_ino, statres.st_dev)
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001880 if not self.dereference and statres.st_nlink > 1 and \
1881 inode in self.inodes and arcname != self.inodes[inode]:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001882 # Is it a hardlink to an already
1883 # archived file?
1884 type = LNKTYPE
1885 linkname = self.inodes[inode]
1886 else:
1887 # The inode is added only if its valid.
1888 # For win32 it is always 0.
1889 type = REGTYPE
1890 if inode[0]:
1891 self.inodes[inode] = arcname
1892 elif stat.S_ISDIR(stmd):
1893 type = DIRTYPE
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001894 elif stat.S_ISFIFO(stmd):
1895 type = FIFOTYPE
1896 elif stat.S_ISLNK(stmd):
1897 type = SYMTYPE
1898 linkname = os.readlink(name)
1899 elif stat.S_ISCHR(stmd):
1900 type = CHRTYPE
1901 elif stat.S_ISBLK(stmd):
1902 type = BLKTYPE
1903 else:
1904 return None
1905
1906 # Fill the TarInfo object with all
1907 # information we can get.
Martin v. Löwis5dbdc592005-08-27 10:07:56 +00001908 tarinfo.name = arcname
1909 tarinfo.mode = stmd
1910 tarinfo.uid = statres.st_uid
1911 tarinfo.gid = statres.st_gid
Lars Gustäbel2ee9c6f2010-06-03 09:56:22 +00001912 if type == REGTYPE:
Martin v. Löwis61d77e02004-08-20 06:35:46 +00001913 tarinfo.size = statres.st_size
Martin v. Löwis5dbdc592005-08-27 10:07:56 +00001914 else:
1915 tarinfo.size = 0L
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001916 tarinfo.mtime = statres.st_mtime
Martin v. Löwis5dbdc592005-08-27 10:07:56 +00001917 tarinfo.type = type
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001918 tarinfo.linkname = linkname
1919 if pwd:
1920 try:
1921 tarinfo.uname = pwd.getpwuid(tarinfo.uid)[0]
1922 except KeyError:
1923 pass
1924 if grp:
1925 try:
1926 tarinfo.gname = grp.getgrgid(tarinfo.gid)[0]
1927 except KeyError:
1928 pass
1929
1930 if type in (CHRTYPE, BLKTYPE):
1931 if hasattr(os, "major") and hasattr(os, "minor"):
1932 tarinfo.devmajor = os.major(statres.st_rdev)
1933 tarinfo.devminor = os.minor(statres.st_rdev)
1934 return tarinfo
1935
1936 def list(self, verbose=True):
1937 """Print a table of contents to sys.stdout. If `verbose' is False, only
1938 the names of the members are printed. If it is True, an `ls -l'-like
1939 output is produced.
1940 """
1941 self._check()
1942
1943 for tarinfo in self:
1944 if verbose:
1945 print filemode(tarinfo.mode),
1946 print "%s/%s" % (tarinfo.uname or tarinfo.uid,
1947 tarinfo.gname or tarinfo.gid),
1948 if tarinfo.ischr() or tarinfo.isblk():
1949 print "%10s" % ("%d,%d" \
1950 % (tarinfo.devmajor, tarinfo.devminor)),
1951 else:
1952 print "%10d" % tarinfo.size,
1953 print "%d-%02d-%02d %02d:%02d:%02d" \
1954 % time.localtime(tarinfo.mtime)[:6],
1955
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001956 print tarinfo.name + ("/" if tarinfo.isdir() else ""),
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001957
1958 if verbose:
1959 if tarinfo.issym():
1960 print "->", tarinfo.linkname,
1961 if tarinfo.islnk():
1962 print "link to", tarinfo.linkname,
1963 print
1964
Lars Gustäbel21121e62009-09-12 10:28:15 +00001965 def add(self, name, arcname=None, recursive=True, exclude=None, filter=None):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001966 """Add the file `name' to the archive. `name' may be any type of file
1967 (directory, fifo, symbolic link, etc.). If given, `arcname'
1968 specifies an alternative name for the file in the archive.
1969 Directories are added recursively by default. This can be avoided by
Lars Gustäbel104490e2007-06-18 11:42:11 +00001970 setting `recursive' to False. `exclude' is a function that should
Lars Gustäbel21121e62009-09-12 10:28:15 +00001971 return True for each filename to be excluded. `filter' is a function
1972 that expects a TarInfo object argument and returns the changed
1973 TarInfo object, if it returns None the TarInfo object will be
1974 excluded from the archive.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001975 """
1976 self._check("aw")
1977
1978 if arcname is None:
1979 arcname = name
1980
Lars Gustäbel104490e2007-06-18 11:42:11 +00001981 # Exclude pathnames.
Lars Gustäbel21121e62009-09-12 10:28:15 +00001982 if exclude is not None:
1983 import warnings
1984 warnings.warn("use the filter argument instead",
1985 DeprecationWarning, 2)
1986 if exclude(name):
1987 self._dbg(2, "tarfile: Excluded %r" % name)
1988 return
Lars Gustäbel104490e2007-06-18 11:42:11 +00001989
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001990 # Skip if somebody tries to archive the archive...
Lars Gustäbela4b23812006-12-23 17:57:23 +00001991 if self.name is not None and os.path.abspath(name) == self.name:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001992 self._dbg(2, "tarfile: Skipped %r" % name)
1993 return
1994
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001995 self._dbg(1, name)
1996
1997 # Create a TarInfo object from the file.
1998 tarinfo = self.gettarinfo(name, arcname)
1999
2000 if tarinfo is None:
2001 self._dbg(1, "tarfile: Unsupported type %r" % name)
2002 return
2003
Lars Gustäbel21121e62009-09-12 10:28:15 +00002004 # Change or exclude the TarInfo object.
2005 if filter is not None:
2006 tarinfo = filter(tarinfo)
2007 if tarinfo is None:
2008 self._dbg(2, "tarfile: Excluded %r" % name)
2009 return
2010
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002011 # Append the tar header and data to the archive.
2012 if tarinfo.isreg():
Andrew Svetlovac26a2e2012-11-29 14:22:26 +02002013 with bltn_open(name, "rb") as f:
2014 self.addfile(tarinfo, f)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002015
Martin v. Löwis5dbdc592005-08-27 10:07:56 +00002016 elif tarinfo.isdir():
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002017 self.addfile(tarinfo)
2018 if recursive:
2019 for f in os.listdir(name):
Lars Gustäbel21121e62009-09-12 10:28:15 +00002020 self.add(os.path.join(name, f), os.path.join(arcname, f),
2021 recursive, exclude, filter)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002022
Martin v. Löwis5dbdc592005-08-27 10:07:56 +00002023 else:
2024 self.addfile(tarinfo)
2025
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002026 def addfile(self, tarinfo, fileobj=None):
2027 """Add the TarInfo object `tarinfo' to the archive. If `fileobj' is
2028 given, tarinfo.size bytes are read from it and added to the archive.
2029 You can create TarInfo objects using gettarinfo().
2030 On Windows platforms, `fileobj' should always be opened with mode
2031 'rb' to avoid irritation about the file size.
2032 """
2033 self._check("aw")
2034
Georg Brandl3354f282006-10-29 09:16:12 +00002035 tarinfo = copy.copy(tarinfo)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002036
Lars Gustäbela0fcb932007-05-27 19:49:30 +00002037 buf = tarinfo.tobuf(self.format, self.encoding, self.errors)
Georg Brandl3354f282006-10-29 09:16:12 +00002038 self.fileobj.write(buf)
2039 self.offset += len(buf)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002040
2041 # If there's data to follow, append it.
2042 if fileobj is not None:
2043 copyfileobj(fileobj, self.fileobj, tarinfo.size)
2044 blocks, remainder = divmod(tarinfo.size, BLOCKSIZE)
2045 if remainder > 0:
2046 self.fileobj.write(NUL * (BLOCKSIZE - remainder))
2047 blocks += 1
2048 self.offset += blocks * BLOCKSIZE
2049
Martin v. Löwisf3c56112004-09-18 09:08:52 +00002050 self.members.append(tarinfo)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002051
Martin v. Löwis00a73e72005-03-04 19:40:34 +00002052 def extractall(self, path=".", members=None):
2053 """Extract all members from the archive to the current working
2054 directory and set owner, modification time and permissions on
2055 directories afterwards. `path' specifies a different directory
2056 to extract to. `members' is optional and must be a subset of the
2057 list returned by getmembers().
2058 """
2059 directories = []
2060
2061 if members is None:
2062 members = self
2063
2064 for tarinfo in members:
2065 if tarinfo.isdir():
Lars Gustäbel0192e432008-02-05 11:51:40 +00002066 # Extract directories with a safe mode.
Martin v. Löwis00a73e72005-03-04 19:40:34 +00002067 directories.append(tarinfo)
Lars Gustäbel0192e432008-02-05 11:51:40 +00002068 tarinfo = copy.copy(tarinfo)
2069 tarinfo.mode = 0700
2070 self.extract(tarinfo, path)
Martin v. Löwis00a73e72005-03-04 19:40:34 +00002071
2072 # Reverse sort directories.
Brett Cannon132fc542008-08-04 21:23:07 +00002073 directories.sort(key=operator.attrgetter('name'))
Martin v. Löwis00a73e72005-03-04 19:40:34 +00002074 directories.reverse()
2075
2076 # Set correct owner, mtime and filemode on directories.
2077 for tarinfo in directories:
Lars Gustäbel2ee1c762008-01-04 14:00:33 +00002078 dirpath = os.path.join(path, tarinfo.name)
Martin v. Löwis00a73e72005-03-04 19:40:34 +00002079 try:
Lars Gustäbel2ee1c762008-01-04 14:00:33 +00002080 self.chown(tarinfo, dirpath)
2081 self.utime(tarinfo, dirpath)
2082 self.chmod(tarinfo, dirpath)
Martin v. Löwis00a73e72005-03-04 19:40:34 +00002083 except ExtractError, e:
2084 if self.errorlevel > 1:
2085 raise
2086 else:
2087 self._dbg(1, "tarfile: %s" % e)
2088
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002089 def extract(self, member, path=""):
2090 """Extract a member from the archive to the current working directory,
2091 using its full name. Its file information is extracted as accurately
2092 as possible. `member' may be a filename or a TarInfo object. You can
2093 specify a different directory using `path'.
2094 """
2095 self._check("r")
2096
Lars Gustäbelc64e4022007-03-13 10:47:19 +00002097 if isinstance(member, basestring):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002098 tarinfo = self.getmember(member)
Lars Gustäbelc64e4022007-03-13 10:47:19 +00002099 else:
2100 tarinfo = member
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002101
Neal Norwitza4f651a2004-07-20 22:07:44 +00002102 # Prepare the link target for makelink().
2103 if tarinfo.islnk():
2104 tarinfo._link_target = os.path.join(path, tarinfo.linkname)
2105
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002106 try:
2107 self._extract_member(tarinfo, os.path.join(path, tarinfo.name))
2108 except EnvironmentError, e:
2109 if self.errorlevel > 0:
2110 raise
2111 else:
2112 if e.filename is None:
2113 self._dbg(1, "tarfile: %s" % e.strerror)
2114 else:
2115 self._dbg(1, "tarfile: %s %r" % (e.strerror, e.filename))
2116 except ExtractError, e:
2117 if self.errorlevel > 1:
2118 raise
2119 else:
2120 self._dbg(1, "tarfile: %s" % e)
2121
2122 def extractfile(self, member):
2123 """Extract a member from the archive as a file object. `member' may be
2124 a filename or a TarInfo object. If `member' is a regular file, a
2125 file-like object is returned. If `member' is a link, a file-like
2126 object is constructed from the link's target. If `member' is none of
2127 the above, None is returned.
2128 The file-like object is read-only and provides the following
2129 methods: read(), readline(), readlines(), seek() and tell()
2130 """
2131 self._check("r")
2132
Lars Gustäbelc64e4022007-03-13 10:47:19 +00002133 if isinstance(member, basestring):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002134 tarinfo = self.getmember(member)
Lars Gustäbelc64e4022007-03-13 10:47:19 +00002135 else:
2136 tarinfo = member
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002137
2138 if tarinfo.isreg():
2139 return self.fileobject(self, tarinfo)
2140
2141 elif tarinfo.type not in SUPPORTED_TYPES:
2142 # If a member's type is unknown, it is treated as a
2143 # regular file.
2144 return self.fileobject(self, tarinfo)
2145
2146 elif tarinfo.islnk() or tarinfo.issym():
2147 if isinstance(self.fileobj, _Stream):
2148 # A small but ugly workaround for the case that someone tries
2149 # to extract a (sym)link as a file-object from a non-seekable
2150 # stream of tar blocks.
Georg Brandle4751e32006-05-18 06:11:19 +00002151 raise StreamError("cannot extract (sym)link as file object")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002152 else:
Georg Brandl7eb4b7d2005-07-22 21:49:32 +00002153 # A (sym)link's file object is its target's file object.
Lars Gustäbel4da7d412010-06-03 12:34:14 +00002154 return self.extractfile(self._find_link_target(tarinfo))
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002155 else:
2156 # If there's no data associated with the member (directory, chrdev,
2157 # blkdev, etc.), return None instead of a file object.
2158 return None
2159
2160 def _extract_member(self, tarinfo, targetpath):
2161 """Extract the TarInfo object tarinfo to a physical
2162 file called targetpath.
2163 """
2164 # Fetch the TarInfo object for the given name
2165 # and build the destination pathname, replacing
2166 # forward slashes to platform specific separators.
Lars Gustäbelf7cda522009-08-28 19:23:44 +00002167 targetpath = targetpath.rstrip("/")
2168 targetpath = targetpath.replace("/", os.sep)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002169
2170 # Create all upper directories.
2171 upperdirs = os.path.dirname(targetpath)
2172 if upperdirs and not os.path.exists(upperdirs):
Lars Gustäbel0192e432008-02-05 11:51:40 +00002173 # Create directories that are not part of the archive with
2174 # default permissions.
Lars Gustäbeld2e22902007-01-23 11:17:33 +00002175 os.makedirs(upperdirs)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002176
2177 if tarinfo.islnk() or tarinfo.issym():
2178 self._dbg(1, "%s -> %s" % (tarinfo.name, tarinfo.linkname))
2179 else:
2180 self._dbg(1, tarinfo.name)
2181
2182 if tarinfo.isreg():
2183 self.makefile(tarinfo, targetpath)
2184 elif tarinfo.isdir():
2185 self.makedir(tarinfo, targetpath)
2186 elif tarinfo.isfifo():
2187 self.makefifo(tarinfo, targetpath)
2188 elif tarinfo.ischr() or tarinfo.isblk():
2189 self.makedev(tarinfo, targetpath)
2190 elif tarinfo.islnk() or tarinfo.issym():
2191 self.makelink(tarinfo, targetpath)
2192 elif tarinfo.type not in SUPPORTED_TYPES:
2193 self.makeunknown(tarinfo, targetpath)
2194 else:
2195 self.makefile(tarinfo, targetpath)
2196
2197 self.chown(tarinfo, targetpath)
2198 if not tarinfo.issym():
2199 self.chmod(tarinfo, targetpath)
2200 self.utime(tarinfo, targetpath)
2201
2202 #--------------------------------------------------------------------------
2203 # Below are the different file methods. They are called via
2204 # _extract_member() when extract() is called. They can be replaced in a
2205 # subclass to implement other functionality.
2206
2207 def makedir(self, tarinfo, targetpath):
2208 """Make a directory called targetpath.
2209 """
2210 try:
Lars Gustäbel0192e432008-02-05 11:51:40 +00002211 # Use a safe mode for the directory, the real mode is set
2212 # later in _extract_member().
2213 os.mkdir(targetpath, 0700)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002214 except EnvironmentError, e:
2215 if e.errno != errno.EEXIST:
2216 raise
2217
2218 def makefile(self, tarinfo, targetpath):
2219 """Make a file called targetpath.
2220 """
2221 source = self.extractfile(tarinfo)
Andrew Svetlovac26a2e2012-11-29 14:22:26 +02002222 try:
2223 with bltn_open(targetpath, "wb") as target:
2224 copyfileobj(source, target)
2225 finally:
2226 source.close()
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002227
2228 def makeunknown(self, tarinfo, targetpath):
2229 """Make a file from a TarInfo object with an unknown type
2230 at targetpath.
2231 """
2232 self.makefile(tarinfo, targetpath)
2233 self._dbg(1, "tarfile: Unknown file type %r, " \
2234 "extracted as regular file." % tarinfo.type)
2235
2236 def makefifo(self, tarinfo, targetpath):
2237 """Make a fifo called targetpath.
2238 """
2239 if hasattr(os, "mkfifo"):
2240 os.mkfifo(targetpath)
2241 else:
Georg Brandle4751e32006-05-18 06:11:19 +00002242 raise ExtractError("fifo not supported by system")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002243
2244 def makedev(self, tarinfo, targetpath):
2245 """Make a character or block device called targetpath.
2246 """
2247 if not hasattr(os, "mknod") or not hasattr(os, "makedev"):
Georg Brandle4751e32006-05-18 06:11:19 +00002248 raise ExtractError("special devices not supported by system")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002249
2250 mode = tarinfo.mode
2251 if tarinfo.isblk():
2252 mode |= stat.S_IFBLK
2253 else:
2254 mode |= stat.S_IFCHR
2255
2256 os.mknod(targetpath, mode,
2257 os.makedev(tarinfo.devmajor, tarinfo.devminor))
2258
2259 def makelink(self, tarinfo, targetpath):
2260 """Make a (symbolic) link called targetpath. If it cannot be created
2261 (platform limitation), we try to make a copy of the referenced file
2262 instead of a link.
2263 """
Lars Gustäbel4da7d412010-06-03 12:34:14 +00002264 if hasattr(os, "symlink") and hasattr(os, "link"):
2265 # For systems that support symbolic and hard links.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002266 if tarinfo.issym():
Senthil Kumaran4dd89ce2011-05-17 10:12:18 +08002267 if os.path.lexists(targetpath):
Senthil Kumaran011525e2011-04-28 15:30:31 +08002268 os.unlink(targetpath)
Lars Gustäbelf7cda522009-08-28 19:23:44 +00002269 os.symlink(tarinfo.linkname, targetpath)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002270 else:
Neal Norwitza4f651a2004-07-20 22:07:44 +00002271 # See extract().
Lars Gustäbel4da7d412010-06-03 12:34:14 +00002272 if os.path.exists(tarinfo._link_target):
Senthil Kumaran4dd89ce2011-05-17 10:12:18 +08002273 if os.path.lexists(targetpath):
2274 os.unlink(targetpath)
Lars Gustäbel4da7d412010-06-03 12:34:14 +00002275 os.link(tarinfo._link_target, targetpath)
2276 else:
2277 self._extract_member(self._find_link_target(tarinfo), targetpath)
2278 else:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002279 try:
Lars Gustäbel4da7d412010-06-03 12:34:14 +00002280 self._extract_member(self._find_link_target(tarinfo), targetpath)
2281 except KeyError:
2282 raise ExtractError("unable to resolve link inside archive")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002283
2284 def chown(self, tarinfo, targetpath):
2285 """Set owner of targetpath according to tarinfo.
2286 """
2287 if pwd and hasattr(os, "geteuid") and os.geteuid() == 0:
2288 # We have to be root to do so.
2289 try:
2290 g = grp.getgrnam(tarinfo.gname)[2]
2291 except KeyError:
Lars Gustäbel8babfdf2011-09-05 17:04:18 +02002292 g = tarinfo.gid
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002293 try:
2294 u = pwd.getpwnam(tarinfo.uname)[2]
2295 except KeyError:
Lars Gustäbel8babfdf2011-09-05 17:04:18 +02002296 u = tarinfo.uid
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002297 try:
2298 if tarinfo.issym() and hasattr(os, "lchown"):
2299 os.lchown(targetpath, u, g)
2300 else:
Andrew MacIntyre7970d202003-02-19 12:51:34 +00002301 if sys.platform != "os2emx":
2302 os.chown(targetpath, u, g)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002303 except EnvironmentError, e:
Georg Brandle4751e32006-05-18 06:11:19 +00002304 raise ExtractError("could not change owner")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002305
2306 def chmod(self, tarinfo, targetpath):
2307 """Set file permissions of targetpath according to tarinfo.
2308 """
Jack Jansen834eff62003-03-07 12:47:06 +00002309 if hasattr(os, 'chmod'):
2310 try:
2311 os.chmod(targetpath, tarinfo.mode)
2312 except EnvironmentError, e:
Georg Brandle4751e32006-05-18 06:11:19 +00002313 raise ExtractError("could not change mode")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002314
2315 def utime(self, tarinfo, targetpath):
2316 """Set modification time of targetpath according to tarinfo.
2317 """
Jack Jansen834eff62003-03-07 12:47:06 +00002318 if not hasattr(os, 'utime'):
Tim Petersf9347782003-03-07 15:36:41 +00002319 return
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002320 try:
2321 os.utime(targetpath, (tarinfo.mtime, tarinfo.mtime))
2322 except EnvironmentError, e:
Georg Brandle4751e32006-05-18 06:11:19 +00002323 raise ExtractError("could not change modification time")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002324
2325 #--------------------------------------------------------------------------
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002326 def next(self):
2327 """Return the next member of the archive as a TarInfo object, when
2328 TarFile is opened for reading. Return None if there is no more
2329 available.
2330 """
2331 self._check("ra")
2332 if self.firstmember is not None:
2333 m = self.firstmember
2334 self.firstmember = None
2335 return m
2336
2337 # Read the next block.
Andrew M. Kuchling864bba12004-07-10 22:02:11 +00002338 self.fileobj.seek(self.offset)
Lars Gustäbeldd866d52009-11-22 18:30:53 +00002339 tarinfo = None
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002340 while True:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002341 try:
Lars Gustäbelc64e4022007-03-13 10:47:19 +00002342 tarinfo = self.tarinfo.fromtarfile(self)
Lars Gustäbeldd866d52009-11-22 18:30:53 +00002343 except EOFHeaderError, e:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002344 if self.ignore_zeros:
Georg Brandlebbeed72006-12-19 22:06:46 +00002345 self._dbg(2, "0x%X: %s" % (self.offset, e))
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002346 self.offset += BLOCKSIZE
2347 continue
Lars Gustäbeldd866d52009-11-22 18:30:53 +00002348 except InvalidHeaderError, e:
2349 if self.ignore_zeros:
2350 self._dbg(2, "0x%X: %s" % (self.offset, e))
2351 self.offset += BLOCKSIZE
2352 continue
2353 elif self.offset == 0:
2354 raise ReadError(str(e))
2355 except EmptyHeaderError:
2356 if self.offset == 0:
2357 raise ReadError("empty file")
2358 except TruncatedHeaderError, e:
2359 if self.offset == 0:
2360 raise ReadError(str(e))
2361 except SubsequentHeaderError, e:
2362 raise ReadError(str(e))
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002363 break
2364
Lars Gustäbeldd866d52009-11-22 18:30:53 +00002365 if tarinfo is not None:
2366 self.members.append(tarinfo)
2367 else:
2368 self._loaded = True
2369
Georg Brandl38c6a222006-05-10 16:26:03 +00002370 return tarinfo
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002371
2372 #--------------------------------------------------------------------------
2373 # Little helper methods:
2374
Lars Gustäbel4da7d412010-06-03 12:34:14 +00002375 def _getmember(self, name, tarinfo=None, normalize=False):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002376 """Find an archive member by name from bottom to top.
2377 If tarinfo is given, it is used as the starting point.
2378 """
Martin v. Löwisf3c56112004-09-18 09:08:52 +00002379 # Ensure that all members have been loaded.
2380 members = self.getmembers()
2381
Lars Gustäbel4da7d412010-06-03 12:34:14 +00002382 # Limit the member search list up to tarinfo.
2383 if tarinfo is not None:
2384 members = members[:members.index(tarinfo)]
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002385
Lars Gustäbel4da7d412010-06-03 12:34:14 +00002386 if normalize:
2387 name = os.path.normpath(name)
2388
2389 for member in reversed(members):
2390 if normalize:
2391 member_name = os.path.normpath(member.name)
2392 else:
2393 member_name = member.name
2394
2395 if name == member_name:
2396 return member
Andrew M. Kuchling864bba12004-07-10 22:02:11 +00002397
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002398 def _load(self):
2399 """Read through the entire archive file and look for readable
2400 members.
2401 """
2402 while True:
2403 tarinfo = self.next()
2404 if tarinfo is None:
2405 break
2406 self._loaded = True
2407
2408 def _check(self, mode=None):
2409 """Check if TarFile is still open, and if the operation's mode
2410 corresponds to TarFile's mode.
2411 """
2412 if self.closed:
Georg Brandle4751e32006-05-18 06:11:19 +00002413 raise IOError("%s is closed" % self.__class__.__name__)
Lars Gustäbelc64e4022007-03-13 10:47:19 +00002414 if mode is not None and self.mode not in mode:
2415 raise IOError("bad operation for mode %r" % self.mode)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002416
Lars Gustäbel4da7d412010-06-03 12:34:14 +00002417 def _find_link_target(self, tarinfo):
2418 """Find the target member of a symlink or hardlink member in the
2419 archive.
2420 """
2421 if tarinfo.issym():
2422 # Always search the entire archive.
Lars Gustäbel231d4742012-04-24 22:42:08 +02002423 linkname = "/".join(filter(None, (os.path.dirname(tarinfo.name), tarinfo.linkname)))
Lars Gustäbel4da7d412010-06-03 12:34:14 +00002424 limit = None
2425 else:
2426 # Search the archive before the link, because a hard link is
2427 # just a reference to an already archived file.
2428 linkname = tarinfo.linkname
2429 limit = tarinfo
2430
2431 member = self._getmember(linkname, tarinfo=limit, normalize=True)
2432 if member is None:
2433 raise KeyError("linkname %r not found" % linkname)
2434 return member
2435
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002436 def __iter__(self):
2437 """Provide an iterator object.
2438 """
2439 if self._loaded:
2440 return iter(self.members)
2441 else:
2442 return TarIter(self)
2443
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002444 def _dbg(self, level, msg):
2445 """Write debugging output to sys.stderr.
2446 """
2447 if level <= self.debug:
2448 print >> sys.stderr, msg
Lars Gustäbel64581042010-03-03 11:55:48 +00002449
2450 def __enter__(self):
2451 self._check()
2452 return self
2453
2454 def __exit__(self, type, value, traceback):
2455 if type is None:
2456 self.close()
2457 else:
2458 # An exception occurred. We must not call close() because
2459 # it would try to write end-of-archive blocks and padding.
2460 if not self._extfileobj:
2461 self.fileobj.close()
2462 self.closed = True
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002463# class TarFile
2464
2465class TarIter:
2466 """Iterator Class.
2467
2468 for tarinfo in TarFile(...):
2469 suite...
2470 """
2471
2472 def __init__(self, tarfile):
2473 """Construct a TarIter object.
2474 """
2475 self.tarfile = tarfile
Martin v. Löwis637431b2005-03-03 23:12:42 +00002476 self.index = 0
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002477 def __iter__(self):
2478 """Return iterator object.
2479 """
2480 return self
2481 def next(self):
2482 """Return the next item using TarFile's next() method.
2483 When all members have been read, set TarFile as _loaded.
2484 """
Martin v. Löwis637431b2005-03-03 23:12:42 +00002485 # Fix for SF #1100429: Under rare circumstances it can
2486 # happen that getmembers() is called during iteration,
2487 # which will cause TarIter to stop prematurely.
Serhiy Storchakace34ba62013-05-09 14:22:05 +03002488
2489 if self.index == 0 and self.tarfile.firstmember is not None:
2490 tarinfo = self.tarfile.next()
2491 elif self.index < len(self.tarfile.members):
2492 tarinfo = self.tarfile.members[self.index]
2493 elif not self.tarfile._loaded:
Martin v. Löwis637431b2005-03-03 23:12:42 +00002494 tarinfo = self.tarfile.next()
2495 if not tarinfo:
2496 self.tarfile._loaded = True
2497 raise StopIteration
2498 else:
Serhiy Storchakace34ba62013-05-09 14:22:05 +03002499 raise StopIteration
Martin v. Löwis637431b2005-03-03 23:12:42 +00002500 self.index += 1
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002501 return tarinfo
2502
2503# Helper classes for sparse file support
2504class _section:
2505 """Base class for _data and _hole.
2506 """
2507 def __init__(self, offset, size):
2508 self.offset = offset
2509 self.size = size
2510 def __contains__(self, offset):
2511 return self.offset <= offset < self.offset + self.size
2512
2513class _data(_section):
2514 """Represent a data section in a sparse file.
2515 """
2516 def __init__(self, offset, size, realpos):
2517 _section.__init__(self, offset, size)
2518 self.realpos = realpos
2519
2520class _hole(_section):
2521 """Represent a hole section in a sparse file.
2522 """
2523 pass
2524
2525class _ringbuffer(list):
2526 """Ringbuffer class which increases performance
2527 over a regular list.
2528 """
2529 def __init__(self):
2530 self.idx = 0
2531 def find(self, offset):
2532 idx = self.idx
2533 while True:
2534 item = self[idx]
2535 if offset in item:
2536 break
2537 idx += 1
2538 if idx == len(self):
2539 idx = 0
2540 if idx == self.idx:
2541 # End of File
2542 return None
2543 self.idx = idx
2544 return item
2545
2546#---------------------------------------------
2547# zipfile compatible TarFile class
2548#---------------------------------------------
2549TAR_PLAIN = 0 # zipfile.ZIP_STORED
2550TAR_GZIPPED = 8 # zipfile.ZIP_DEFLATED
2551class TarFileCompat:
2552 """TarFile class compatible with standard module zipfile's
2553 ZipFile class.
2554 """
2555 def __init__(self, file, mode="r", compression=TAR_PLAIN):
Lars Gustäbel727bd0b2008-08-02 11:26:39 +00002556 from warnings import warnpy3k
2557 warnpy3k("the TarFileCompat class has been removed in Python 3.0",
2558 stacklevel=2)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002559 if compression == TAR_PLAIN:
2560 self.tarfile = TarFile.taropen(file, mode)
2561 elif compression == TAR_GZIPPED:
2562 self.tarfile = TarFile.gzopen(file, mode)
2563 else:
Georg Brandle4751e32006-05-18 06:11:19 +00002564 raise ValueError("unknown compression constant")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002565 if mode[0:1] == "r":
2566 members = self.tarfile.getmembers()
Raymond Hettingera1d09e22005-09-11 16:34:05 +00002567 for m in members:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002568 m.filename = m.name
2569 m.file_size = m.size
2570 m.date_time = time.gmtime(m.mtime)[:6]
2571 def namelist(self):
2572 return map(lambda m: m.name, self.infolist())
2573 def infolist(self):
2574 return filter(lambda m: m.type in REGULAR_TYPES,
2575 self.tarfile.getmembers())
2576 def printdir(self):
2577 self.tarfile.list()
2578 def testzip(self):
2579 return
2580 def getinfo(self, name):
2581 return self.tarfile.getmember(name)
2582 def read(self, name):
2583 return self.tarfile.extractfile(self.tarfile.getmember(name)).read()
2584 def write(self, filename, arcname=None, compress_type=None):
2585 self.tarfile.add(filename, arcname)
2586 def writestr(self, zinfo, bytes):
Raymond Hettingera6172712004-12-31 19:15:26 +00002587 try:
2588 from cStringIO import StringIO
2589 except ImportError:
2590 from StringIO import StringIO
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002591 import calendar
Lars Gustäbel727bd0b2008-08-02 11:26:39 +00002592 tinfo = TarInfo(zinfo.filename)
2593 tinfo.size = len(bytes)
2594 tinfo.mtime = calendar.timegm(zinfo.date_time)
2595 self.tarfile.addfile(tinfo, StringIO(bytes))
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002596 def close(self):
2597 self.tarfile.close()
2598#class TarFileCompat
2599
2600#--------------------
2601# exported functions
2602#--------------------
2603def is_tarfile(name):
2604 """Return True if name points to a tar archive that we
2605 are able to handle, else return False.
2606 """
2607 try:
2608 t = open(name)
2609 t.close()
2610 return True
2611 except TarError:
2612 return False
2613
Brett Cannon6cef0762007-05-25 20:17:15 +00002614bltn_open = open
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002615open = TarFile.open