blob: 57ea877911b38df8c3f1d11088b82bf7675f9ebc [file] [log] [blame]
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001# -*- coding: iso-8859-1 -*-
2#-------------------------------------------------------------------
3# tarfile.py
4#-------------------------------------------------------------------
5# Copyright (C) 2002 Lars Gustäbel <lars@gustaebel.de>
6# All rights reserved.
7#
8# Permission is hereby granted, free of charge, to any person
9# obtaining a copy of this software and associated documentation
10# files (the "Software"), to deal in the Software without
11# restriction, including without limitation the rights to use,
12# copy, modify, merge, publish, distribute, sublicense, and/or sell
13# copies of the Software, and to permit persons to whom the
14# Software is furnished to do so, subject to the following
15# conditions:
16#
17# The above copyright notice and this permission notice shall be
18# included in all copies or substantial portions of the Software.
19#
20# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
21# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
22# OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
23# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
24# HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
25# WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
26# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
27# OTHER DEALINGS IN THE SOFTWARE.
28#
29"""Read from and write to tar format archives.
30"""
31
Senthil Kumaran4af1c6a2011-07-28 22:30:27 +080032__version__ = "$Revision: 85213 $"
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +000033# $Source$
34
Lars Gustäbelc64e4022007-03-13 10:47:19 +000035version = "0.9.0"
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +000036__author__ = "Lars Gustäbel (lars@gustaebel.de)"
37__date__ = "$Date$"
38__cvsid__ = "$Id$"
39__credits__ = "Gustavo Niemeyer, Niels Gustäbel, Richard Townsend."
40
41#---------
42# Imports
43#---------
44import sys
45import os
46import shutil
47import stat
48import errno
49import time
50import struct
Georg Brandl3354f282006-10-29 09:16:12 +000051import copy
Lars Gustäbelc64e4022007-03-13 10:47:19 +000052import re
Brett Cannon132fc542008-08-04 21:23:07 +000053import operator
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +000054
55try:
56 import grp, pwd
57except ImportError:
58 grp = pwd = None
59
60# from tarfile import *
61__all__ = ["TarFile", "TarInfo", "is_tarfile", "TarError"]
62
63#---------------------------------------------------------
64# tar constants
65#---------------------------------------------------------
Lars Gustäbelc64e4022007-03-13 10:47:19 +000066NUL = "\0" # the null character
67BLOCKSIZE = 512 # length of processing blocks
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +000068RECORDSIZE = BLOCKSIZE * 20 # length of records
Lars Gustäbelc64e4022007-03-13 10:47:19 +000069GNU_MAGIC = "ustar \0" # magic gnu tar string
70POSIX_MAGIC = "ustar\x0000" # magic posix tar string
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +000071
Lars Gustäbelc64e4022007-03-13 10:47:19 +000072LENGTH_NAME = 100 # maximum length of a filename
73LENGTH_LINK = 100 # maximum length of a linkname
74LENGTH_PREFIX = 155 # maximum length of the prefix field
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +000075
Lars Gustäbelc64e4022007-03-13 10:47:19 +000076REGTYPE = "0" # regular file
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +000077AREGTYPE = "\0" # regular file
Lars Gustäbelc64e4022007-03-13 10:47:19 +000078LNKTYPE = "1" # link (inside tarfile)
79SYMTYPE = "2" # symbolic link
80CHRTYPE = "3" # character special device
81BLKTYPE = "4" # block special device
82DIRTYPE = "5" # directory
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +000083FIFOTYPE = "6" # fifo special device
84CONTTYPE = "7" # contiguous file
85
Lars Gustäbelc64e4022007-03-13 10:47:19 +000086GNUTYPE_LONGNAME = "L" # GNU tar longname
87GNUTYPE_LONGLINK = "K" # GNU tar longlink
88GNUTYPE_SPARSE = "S" # GNU tar sparse file
89
90XHDTYPE = "x" # POSIX.1-2001 extended header
91XGLTYPE = "g" # POSIX.1-2001 global header
92SOLARIS_XHDTYPE = "X" # Solaris extended header
93
94USTAR_FORMAT = 0 # POSIX.1-1988 (ustar) format
95GNU_FORMAT = 1 # GNU tar format
96PAX_FORMAT = 2 # POSIX.1-2001 (pax) format
97DEFAULT_FORMAT = GNU_FORMAT
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +000098
99#---------------------------------------------------------
100# tarfile constants
101#---------------------------------------------------------
Lars Gustäbelc64e4022007-03-13 10:47:19 +0000102# File types that tarfile supports:
103SUPPORTED_TYPES = (REGTYPE, AREGTYPE, LNKTYPE,
104 SYMTYPE, DIRTYPE, FIFOTYPE,
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000105 CONTTYPE, CHRTYPE, BLKTYPE,
106 GNUTYPE_LONGNAME, GNUTYPE_LONGLINK,
107 GNUTYPE_SPARSE)
108
Lars Gustäbelc64e4022007-03-13 10:47:19 +0000109# File types that will be treated as a regular file.
110REGULAR_TYPES = (REGTYPE, AREGTYPE,
111 CONTTYPE, GNUTYPE_SPARSE)
112
113# File types that are part of the GNU tar format.
114GNU_TYPES = (GNUTYPE_LONGNAME, GNUTYPE_LONGLINK,
115 GNUTYPE_SPARSE)
116
117# Fields from a pax header that override a TarInfo attribute.
118PAX_FIELDS = ("path", "linkpath", "size", "mtime",
119 "uid", "gid", "uname", "gname")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000120
Lars Gustäbela0fcb932007-05-27 19:49:30 +0000121# Fields in a pax header that are numbers, all other fields
122# are treated as strings.
123PAX_NUMBER_FIELDS = {
124 "atime": float,
125 "ctime": float,
126 "mtime": float,
127 "uid": int,
128 "gid": int,
129 "size": int
130}
131
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000132#---------------------------------------------------------
133# Bits used in the mode field, values in octal.
134#---------------------------------------------------------
135S_IFLNK = 0120000 # symbolic link
136S_IFREG = 0100000 # regular file
137S_IFBLK = 0060000 # block device
138S_IFDIR = 0040000 # directory
139S_IFCHR = 0020000 # character device
140S_IFIFO = 0010000 # fifo
141
142TSUID = 04000 # set UID on execution
143TSGID = 02000 # set GID on execution
144TSVTX = 01000 # reserved
145
146TUREAD = 0400 # read by owner
147TUWRITE = 0200 # write by owner
148TUEXEC = 0100 # execute/search by owner
149TGREAD = 0040 # read by group
150TGWRITE = 0020 # write by group
151TGEXEC = 0010 # execute/search by group
152TOREAD = 0004 # read by other
153TOWRITE = 0002 # write by other
154TOEXEC = 0001 # execute/search by other
155
156#---------------------------------------------------------
Lars Gustäbelc64e4022007-03-13 10:47:19 +0000157# initialization
158#---------------------------------------------------------
159ENCODING = sys.getfilesystemencoding()
160if ENCODING is None:
Lars Gustäbela0fcb932007-05-27 19:49:30 +0000161 ENCODING = sys.getdefaultencoding()
Lars Gustäbelc64e4022007-03-13 10:47:19 +0000162
163#---------------------------------------------------------
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000164# Some useful functions
165#---------------------------------------------------------
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000166
Georg Brandl38c6a222006-05-10 16:26:03 +0000167def stn(s, length):
168 """Convert a python string to a null-terminated string buffer.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000169 """
Georg Brandla32e0a02006-10-24 16:54:16 +0000170 return s[:length] + (length - len(s)) * NUL
Georg Brandl38c6a222006-05-10 16:26:03 +0000171
Lars Gustäbelc64e4022007-03-13 10:47:19 +0000172def nts(s):
173 """Convert a null-terminated string field to a python string.
174 """
175 # Use the string up to the first null char.
176 p = s.find("\0")
177 if p == -1:
178 return s
179 return s[:p]
180
Georg Brandl38c6a222006-05-10 16:26:03 +0000181def nti(s):
182 """Convert a number field to a python number.
183 """
184 # There are two possible encodings for a number field, see
185 # itn() below.
186 if s[0] != chr(0200):
Georg Brandlded1c4d2006-12-20 11:55:16 +0000187 try:
Lars Gustäbelc64e4022007-03-13 10:47:19 +0000188 n = int(nts(s) or "0", 8)
Georg Brandlded1c4d2006-12-20 11:55:16 +0000189 except ValueError:
Lars Gustäbeldd866d52009-11-22 18:30:53 +0000190 raise InvalidHeaderError("invalid header")
Georg Brandl38c6a222006-05-10 16:26:03 +0000191 else:
192 n = 0L
193 for i in xrange(len(s) - 1):
194 n <<= 8
195 n += ord(s[i + 1])
196 return n
197
Lars Gustäbelc64e4022007-03-13 10:47:19 +0000198def itn(n, digits=8, format=DEFAULT_FORMAT):
Georg Brandl38c6a222006-05-10 16:26:03 +0000199 """Convert a python number to a number field.
200 """
201 # POSIX 1003.1-1988 requires numbers to be encoded as a string of
202 # octal digits followed by a null-byte, this allows values up to
203 # (8**(digits-1))-1. GNU tar allows storing numbers greater than
204 # that if necessary. A leading 0200 byte indicates this particular
205 # encoding, the following digits-1 bytes are a big-endian
206 # representation. This allows values up to (256**(digits-1))-1.
207 if 0 <= n < 8 ** (digits - 1):
208 s = "%0*o" % (digits - 1, n) + NUL
209 else:
Lars Gustäbelc64e4022007-03-13 10:47:19 +0000210 if format != GNU_FORMAT or n >= 256 ** (digits - 1):
Georg Brandle4751e32006-05-18 06:11:19 +0000211 raise ValueError("overflow in number field")
Georg Brandl38c6a222006-05-10 16:26:03 +0000212
213 if n < 0:
214 # XXX We mimic GNU tar's behaviour with negative numbers,
215 # this could raise OverflowError.
216 n = struct.unpack("L", struct.pack("l", n))[0]
217
218 s = ""
219 for i in xrange(digits - 1):
220 s = chr(n & 0377) + s
221 n >>= 8
222 s = chr(0200) + s
223 return s
224
Lars Gustäbela0fcb932007-05-27 19:49:30 +0000225def uts(s, encoding, errors):
226 """Convert a unicode object to a string.
227 """
228 if errors == "utf-8":
229 # An extra error handler similar to the -o invalid=UTF-8 option
230 # in POSIX.1-2001. Replace untranslatable characters with their
231 # UTF-8 representation.
232 try:
233 return s.encode(encoding, "strict")
234 except UnicodeEncodeError:
235 x = []
236 for c in s:
237 try:
238 x.append(c.encode(encoding, "strict"))
239 except UnicodeEncodeError:
240 x.append(c.encode("utf8"))
241 return "".join(x)
242 else:
243 return s.encode(encoding, errors)
244
Georg Brandl38c6a222006-05-10 16:26:03 +0000245def calc_chksums(buf):
246 """Calculate the checksum for a member's header by summing up all
247 characters except for the chksum field which is treated as if
248 it was filled with spaces. According to the GNU tar sources,
249 some tars (Sun and NeXT) calculate chksum with signed char,
250 which will be different if there are chars in the buffer with
251 the high bit set. So we calculate two checksums, unsigned and
252 signed.
253 """
254 unsigned_chksum = 256 + sum(struct.unpack("148B", buf[:148]) + struct.unpack("356B", buf[156:512]))
255 signed_chksum = 256 + sum(struct.unpack("148b", buf[:148]) + struct.unpack("356b", buf[156:512]))
256 return unsigned_chksum, signed_chksum
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000257
258def copyfileobj(src, dst, length=None):
259 """Copy length bytes from fileobj src to fileobj dst.
260 If length is None, copy the entire content.
261 """
262 if length == 0:
263 return
264 if length is None:
265 shutil.copyfileobj(src, dst)
266 return
267
268 BUFSIZE = 16 * 1024
269 blocks, remainder = divmod(length, BUFSIZE)
270 for b in xrange(blocks):
271 buf = src.read(BUFSIZE)
272 if len(buf) < BUFSIZE:
Georg Brandle4751e32006-05-18 06:11:19 +0000273 raise IOError("end of file reached")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000274 dst.write(buf)
275
276 if remainder != 0:
277 buf = src.read(remainder)
278 if len(buf) < remainder:
Georg Brandle4751e32006-05-18 06:11:19 +0000279 raise IOError("end of file reached")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000280 dst.write(buf)
281 return
282
283filemode_table = (
Andrew M. Kuchling8bc462f2004-10-20 11:48:42 +0000284 ((S_IFLNK, "l"),
285 (S_IFREG, "-"),
286 (S_IFBLK, "b"),
287 (S_IFDIR, "d"),
288 (S_IFCHR, "c"),
289 (S_IFIFO, "p")),
290
291 ((TUREAD, "r"),),
292 ((TUWRITE, "w"),),
293 ((TUEXEC|TSUID, "s"),
294 (TSUID, "S"),
295 (TUEXEC, "x")),
296
297 ((TGREAD, "r"),),
298 ((TGWRITE, "w"),),
299 ((TGEXEC|TSGID, "s"),
300 (TSGID, "S"),
301 (TGEXEC, "x")),
302
303 ((TOREAD, "r"),),
304 ((TOWRITE, "w"),),
305 ((TOEXEC|TSVTX, "t"),
306 (TSVTX, "T"),
307 (TOEXEC, "x"))
308)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000309
310def filemode(mode):
311 """Convert a file's mode to a string of the form
312 -rwxrwxrwx.
313 Used by TarFile.list()
314 """
Andrew M. Kuchling8bc462f2004-10-20 11:48:42 +0000315 perm = []
316 for table in filemode_table:
317 for bit, char in table:
318 if mode & bit == bit:
319 perm.append(char)
320 break
321 else:
322 perm.append("-")
323 return "".join(perm)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000324
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000325class TarError(Exception):
326 """Base exception."""
327 pass
328class ExtractError(TarError):
329 """General exception for extract errors."""
330 pass
331class ReadError(TarError):
Ezio Melottif5469cf2013-08-17 15:43:51 +0300332 """Exception for unreadable tar archives."""
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000333 pass
334class CompressionError(TarError):
335 """Exception for unavailable compression methods."""
336 pass
337class StreamError(TarError):
338 """Exception for unsupported operations on stream-like TarFiles."""
339 pass
Georg Brandlebbeed72006-12-19 22:06:46 +0000340class HeaderError(TarError):
Lars Gustäbeldd866d52009-11-22 18:30:53 +0000341 """Base exception for header errors."""
342 pass
343class EmptyHeaderError(HeaderError):
344 """Exception for empty headers."""
345 pass
346class TruncatedHeaderError(HeaderError):
347 """Exception for truncated headers."""
348 pass
349class EOFHeaderError(HeaderError):
350 """Exception for end of file headers."""
351 pass
352class InvalidHeaderError(HeaderError):
Georg Brandlebbeed72006-12-19 22:06:46 +0000353 """Exception for invalid headers."""
354 pass
Lars Gustäbeldd866d52009-11-22 18:30:53 +0000355class SubsequentHeaderError(HeaderError):
356 """Exception for missing and invalid extended headers."""
357 pass
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000358
359#---------------------------
360# internal stream interface
361#---------------------------
362class _LowLevelFile:
363 """Low-level file object. Supports reading and writing.
364 It is used instead of a regular file object for streaming
365 access.
366 """
367
368 def __init__(self, name, mode):
369 mode = {
370 "r": os.O_RDONLY,
371 "w": os.O_WRONLY | os.O_CREAT | os.O_TRUNC,
372 }[mode]
373 if hasattr(os, "O_BINARY"):
374 mode |= os.O_BINARY
Lars Gustäbel5c4c4612010-04-29 15:23:38 +0000375 self.fd = os.open(name, mode, 0666)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000376
377 def close(self):
378 os.close(self.fd)
379
380 def read(self, size):
381 return os.read(self.fd, size)
382
383 def write(self, s):
384 os.write(self.fd, s)
385
386class _Stream:
387 """Class that serves as an adapter between TarFile and
388 a stream-like object. The stream-like object only
389 needs to have a read() or write() method and is accessed
390 blockwise. Use of gzip or bzip2 compression is possible.
391 A stream-like object could be for example: sys.stdin,
392 sys.stdout, a socket, a tape device etc.
393
394 _Stream is intended to be used only internally.
395 """
396
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000397 def __init__(self, name, mode, comptype, fileobj, bufsize):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000398 """Construct a _Stream object.
399 """
400 self._extfileobj = True
401 if fileobj is None:
402 fileobj = _LowLevelFile(name, mode)
403 self._extfileobj = False
404
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000405 if comptype == '*':
406 # Enable transparent compression detection for the
407 # stream interface
408 fileobj = _StreamProxy(fileobj)
409 comptype = fileobj.getcomptype()
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000410
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000411 self.name = name or ""
412 self.mode = mode
413 self.comptype = comptype
414 self.fileobj = fileobj
415 self.bufsize = bufsize
416 self.buf = ""
417 self.pos = 0L
418 self.closed = False
419
420 if comptype == "gz":
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000421 try:
422 import zlib
423 except ImportError:
Georg Brandle4751e32006-05-18 06:11:19 +0000424 raise CompressionError("zlib module is not available")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000425 self.zlib = zlib
Gregory P. Smith88440962008-03-25 06:12:45 +0000426 self.crc = zlib.crc32("") & 0xffffffffL
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000427 if mode == "r":
428 self._init_read_gz()
429 else:
430 self._init_write_gz()
431
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000432 if comptype == "bz2":
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000433 try:
434 import bz2
435 except ImportError:
Georg Brandle4751e32006-05-18 06:11:19 +0000436 raise CompressionError("bz2 module is not available")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000437 if mode == "r":
438 self.dbuf = ""
439 self.cmp = bz2.BZ2Decompressor()
440 else:
441 self.cmp = bz2.BZ2Compressor()
442
443 def __del__(self):
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000444 if hasattr(self, "closed") and not self.closed:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000445 self.close()
446
447 def _init_write_gz(self):
448 """Initialize for writing with gzip compression.
449 """
450 self.cmp = self.zlib.compressobj(9, self.zlib.DEFLATED,
451 -self.zlib.MAX_WBITS,
452 self.zlib.DEF_MEM_LEVEL,
453 0)
454 timestamp = struct.pack("<L", long(time.time()))
455 self.__write("\037\213\010\010%s\002\377" % timestamp)
Lars Gustäbel7d4d0742011-12-21 19:27:50 +0100456 if type(self.name) is unicode:
457 self.name = self.name.encode("iso-8859-1", "replace")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000458 if self.name.endswith(".gz"):
459 self.name = self.name[:-3]
460 self.__write(self.name + NUL)
461
462 def write(self, s):
463 """Write string s to the stream.
464 """
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000465 if self.comptype == "gz":
Gregory P. Smith88440962008-03-25 06:12:45 +0000466 self.crc = self.zlib.crc32(s, self.crc) & 0xffffffffL
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000467 self.pos += len(s)
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000468 if self.comptype != "tar":
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000469 s = self.cmp.compress(s)
470 self.__write(s)
471
472 def __write(self, s):
473 """Write string s to the stream if a whole new block
474 is ready to be written.
475 """
476 self.buf += s
477 while len(self.buf) > self.bufsize:
478 self.fileobj.write(self.buf[:self.bufsize])
479 self.buf = self.buf[self.bufsize:]
480
481 def close(self):
482 """Close the _Stream object. No operation should be
483 done on it afterwards.
484 """
485 if self.closed:
486 return
487
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000488 if self.mode == "w" and self.comptype != "tar":
Martin v. Löwisc234a522004-08-22 21:28:33 +0000489 self.buf += self.cmp.flush()
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000490
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000491 if self.mode == "w" and self.buf:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000492 self.fileobj.write(self.buf)
493 self.buf = ""
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000494 if self.comptype == "gz":
Tim Petersa05f6e22006-08-02 05:20:08 +0000495 # The native zlib crc is an unsigned 32-bit integer, but
496 # the Python wrapper implicitly casts that to a signed C
497 # long. So, on a 32-bit box self.crc may "look negative",
498 # while the same crc on a 64-bit box may "look positive".
499 # To avoid irksome warnings from the `struct` module, force
500 # it to look positive on all boxes.
501 self.fileobj.write(struct.pack("<L", self.crc & 0xffffffffL))
Andrew M. Kuchling10a44492003-10-24 17:38:34 +0000502 self.fileobj.write(struct.pack("<L", self.pos & 0xffffFFFFL))
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000503
504 if not self._extfileobj:
505 self.fileobj.close()
506
507 self.closed = True
508
509 def _init_read_gz(self):
510 """Initialize for reading a gzip compressed fileobj.
511 """
512 self.cmp = self.zlib.decompressobj(-self.zlib.MAX_WBITS)
513 self.dbuf = ""
514
515 # taken from gzip.GzipFile with some alterations
516 if self.__read(2) != "\037\213":
Georg Brandle4751e32006-05-18 06:11:19 +0000517 raise ReadError("not a gzip file")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000518 if self.__read(1) != "\010":
Georg Brandle4751e32006-05-18 06:11:19 +0000519 raise CompressionError("unsupported compression method")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000520
521 flag = ord(self.__read(1))
522 self.__read(6)
523
524 if flag & 4:
525 xlen = ord(self.__read(1)) + 256 * ord(self.__read(1))
526 self.read(xlen)
527 if flag & 8:
528 while True:
529 s = self.__read(1)
530 if not s or s == NUL:
531 break
532 if flag & 16:
533 while True:
534 s = self.__read(1)
535 if not s or s == NUL:
536 break
537 if flag & 2:
538 self.__read(2)
539
540 def tell(self):
541 """Return the stream's file pointer position.
542 """
543 return self.pos
544
545 def seek(self, pos=0):
546 """Set the stream's file pointer to pos. Negative seeking
547 is forbidden.
548 """
549 if pos - self.pos >= 0:
550 blocks, remainder = divmod(pos - self.pos, self.bufsize)
551 for i in xrange(blocks):
552 self.read(self.bufsize)
553 self.read(remainder)
554 else:
Georg Brandle4751e32006-05-18 06:11:19 +0000555 raise StreamError("seeking backwards is not allowed")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000556 return self.pos
557
558 def read(self, size=None):
559 """Return the next size number of bytes from the stream.
560 If size is not defined, return all bytes of the stream
561 up to EOF.
562 """
563 if size is None:
564 t = []
565 while True:
566 buf = self._read(self.bufsize)
567 if not buf:
568 break
569 t.append(buf)
570 buf = "".join(t)
571 else:
572 buf = self._read(size)
573 self.pos += len(buf)
574 return buf
575
576 def _read(self, size):
577 """Return size bytes from the stream.
578 """
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000579 if self.comptype == "tar":
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000580 return self.__read(size)
581
582 c = len(self.dbuf)
583 t = [self.dbuf]
584 while c < size:
585 buf = self.__read(self.bufsize)
586 if not buf:
587 break
Lars Gustäbelc64e4022007-03-13 10:47:19 +0000588 try:
589 buf = self.cmp.decompress(buf)
590 except IOError:
591 raise ReadError("invalid compressed data")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000592 t.append(buf)
593 c += len(buf)
594 t = "".join(t)
595 self.dbuf = t[size:]
596 return t[:size]
597
598 def __read(self, size):
599 """Return size bytes from stream. If internal buffer is empty,
600 read another block from the stream.
601 """
602 c = len(self.buf)
603 t = [self.buf]
604 while c < size:
605 buf = self.fileobj.read(self.bufsize)
606 if not buf:
607 break
608 t.append(buf)
609 c += len(buf)
610 t = "".join(t)
611 self.buf = t[size:]
612 return t[:size]
613# class _Stream
614
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000615class _StreamProxy(object):
616 """Small proxy class that enables transparent compression
617 detection for the Stream interface (mode 'r|*').
618 """
619
620 def __init__(self, fileobj):
621 self.fileobj = fileobj
622 self.buf = self.fileobj.read(BLOCKSIZE)
623
624 def read(self, size):
625 self.read = self.fileobj.read
626 return self.buf
627
628 def getcomptype(self):
629 if self.buf.startswith("\037\213\010"):
630 return "gz"
Lars Gustäbel9a388632011-12-06 13:07:09 +0100631 if self.buf[0:3] == "BZh" and self.buf[4:10] == "1AY&SY":
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000632 return "bz2"
633 return "tar"
634
635 def close(self):
636 self.fileobj.close()
637# class StreamProxy
638
Georg Brandl49c8f4c2006-05-15 19:30:35 +0000639class _BZ2Proxy(object):
640 """Small proxy class that enables external file object
641 support for "r:bz2" and "w:bz2" modes. This is actually
642 a workaround for a limitation in bz2 module's BZ2File
643 class which (unlike gzip.GzipFile) has no support for
644 a file object argument.
645 """
646
647 blocksize = 16 * 1024
648
649 def __init__(self, fileobj, mode):
650 self.fileobj = fileobj
651 self.mode = mode
Lars Gustäbelc64e4022007-03-13 10:47:19 +0000652 self.name = getattr(self.fileobj, "name", None)
Georg Brandl49c8f4c2006-05-15 19:30:35 +0000653 self.init()
654
655 def init(self):
656 import bz2
657 self.pos = 0
658 if self.mode == "r":
659 self.bz2obj = bz2.BZ2Decompressor()
660 self.fileobj.seek(0)
661 self.buf = ""
662 else:
663 self.bz2obj = bz2.BZ2Compressor()
664
665 def read(self, size):
666 b = [self.buf]
667 x = len(self.buf)
668 while x < size:
Lars Gustäbel2020a592009-03-22 20:09:33 +0000669 raw = self.fileobj.read(self.blocksize)
670 if not raw:
Georg Brandl49c8f4c2006-05-15 19:30:35 +0000671 break
Lars Gustäbel2020a592009-03-22 20:09:33 +0000672 data = self.bz2obj.decompress(raw)
673 b.append(data)
Georg Brandl49c8f4c2006-05-15 19:30:35 +0000674 x += len(data)
675 self.buf = "".join(b)
676
677 buf = self.buf[:size]
678 self.buf = self.buf[size:]
679 self.pos += len(buf)
680 return buf
681
682 def seek(self, pos):
683 if pos < self.pos:
684 self.init()
685 self.read(pos - self.pos)
686
687 def tell(self):
688 return self.pos
689
690 def write(self, data):
691 self.pos += len(data)
692 raw = self.bz2obj.compress(data)
693 self.fileobj.write(raw)
694
695 def close(self):
696 if self.mode == "w":
697 raw = self.bz2obj.flush()
698 self.fileobj.write(raw)
Georg Brandl49c8f4c2006-05-15 19:30:35 +0000699# class _BZ2Proxy
700
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000701#------------------------
702# Extraction file object
703#------------------------
Lars Gustäbel6baa5022006-12-23 16:40:13 +0000704class _FileInFile(object):
705 """A thin wrapper around an existing file object that
706 provides a part of its data as an individual file
707 object.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000708 """
709
Lars Gustäbel6baa5022006-12-23 16:40:13 +0000710 def __init__(self, fileobj, offset, size, sparse=None):
711 self.fileobj = fileobj
712 self.offset = offset
713 self.size = size
714 self.sparse = sparse
715 self.position = 0
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000716
Lars Gustäbel6baa5022006-12-23 16:40:13 +0000717 def tell(self):
718 """Return the current file position.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000719 """
Lars Gustäbel6baa5022006-12-23 16:40:13 +0000720 return self.position
721
722 def seek(self, position):
723 """Seek to a position in the file.
724 """
725 self.position = position
726
727 def read(self, size=None):
728 """Read data from the file.
729 """
730 if size is None:
731 size = self.size - self.position
732 else:
733 size = min(size, self.size - self.position)
734
735 if self.sparse is None:
736 return self.readnormal(size)
737 else:
738 return self.readsparse(size)
739
740 def readnormal(self, size):
741 """Read operation for regular files.
742 """
743 self.fileobj.seek(self.offset + self.position)
744 self.position += size
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000745 return self.fileobj.read(size)
746
Lars Gustäbel6baa5022006-12-23 16:40:13 +0000747 def readsparse(self, size):
748 """Read operation for sparse files.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000749 """
Lars Gustäbel6baa5022006-12-23 16:40:13 +0000750 data = []
751 while size > 0:
752 buf = self.readsparsesection(size)
753 if not buf:
754 break
755 size -= len(buf)
756 data.append(buf)
757 return "".join(data)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000758
Lars Gustäbel6baa5022006-12-23 16:40:13 +0000759 def readsparsesection(self, size):
760 """Read a single section of a sparse file.
761 """
762 section = self.sparse.find(self.position)
763
764 if section is None:
765 return ""
766
767 size = min(size, section.offset + section.size - self.position)
768
769 if isinstance(section, _data):
770 realpos = section.realpos + self.position - section.offset
771 self.fileobj.seek(self.offset + realpos)
772 self.position += size
773 return self.fileobj.read(size)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000774 else:
Lars Gustäbel6baa5022006-12-23 16:40:13 +0000775 self.position += size
776 return NUL * size
777#class _FileInFile
778
779
780class ExFileObject(object):
781 """File-like object for reading an archive member.
782 Is returned by TarFile.extractfile().
783 """
784 blocksize = 1024
785
786 def __init__(self, tarfile, tarinfo):
787 self.fileobj = _FileInFile(tarfile.fileobj,
788 tarinfo.offset_data,
789 tarinfo.size,
790 getattr(tarinfo, "sparse", None))
791 self.name = tarinfo.name
792 self.mode = "r"
793 self.closed = False
794 self.size = tarinfo.size
795
796 self.position = 0
797 self.buffer = ""
798
799 def read(self, size=None):
800 """Read at most size bytes from the file. If size is not
801 present or None, read all data until EOF is reached.
802 """
803 if self.closed:
804 raise ValueError("I/O operation on closed file")
805
806 buf = ""
807 if self.buffer:
808 if size is None:
809 buf = self.buffer
810 self.buffer = ""
811 else:
812 buf = self.buffer[:size]
813 self.buffer = self.buffer[size:]
814
815 if size is None:
816 buf += self.fileobj.read()
817 else:
818 buf += self.fileobj.read(size - len(buf))
819
820 self.position += len(buf)
821 return buf
822
823 def readline(self, size=-1):
824 """Read one entire line from the file. If size is present
825 and non-negative, return a string with at most that
826 size, which may be an incomplete line.
827 """
828 if self.closed:
829 raise ValueError("I/O operation on closed file")
830
831 if "\n" in self.buffer:
832 pos = self.buffer.find("\n") + 1
833 else:
834 buffers = [self.buffer]
835 while True:
836 buf = self.fileobj.read(self.blocksize)
837 buffers.append(buf)
838 if not buf or "\n" in buf:
839 self.buffer = "".join(buffers)
840 pos = self.buffer.find("\n") + 1
841 if pos == 0:
842 # no newline found.
843 pos = len(self.buffer)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000844 break
Lars Gustäbel6baa5022006-12-23 16:40:13 +0000845
846 if size != -1:
847 pos = min(size, pos)
848
849 buf = self.buffer[:pos]
850 self.buffer = self.buffer[pos:]
851 self.position += len(buf)
852 return buf
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000853
854 def readlines(self):
Lars Gustäbel6baa5022006-12-23 16:40:13 +0000855 """Return a list with all remaining lines.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000856 """
857 result = []
858 while True:
859 line = self.readline()
860 if not line: break
861 result.append(line)
862 return result
863
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000864 def tell(self):
865 """Return the current file position.
866 """
Lars Gustäbel6baa5022006-12-23 16:40:13 +0000867 if self.closed:
868 raise ValueError("I/O operation on closed file")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000869
Lars Gustäbel6baa5022006-12-23 16:40:13 +0000870 return self.position
871
872 def seek(self, pos, whence=os.SEEK_SET):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000873 """Seek to a position in the file.
874 """
Lars Gustäbel6baa5022006-12-23 16:40:13 +0000875 if self.closed:
876 raise ValueError("I/O operation on closed file")
877
878 if whence == os.SEEK_SET:
879 self.position = min(max(pos, 0), self.size)
880 elif whence == os.SEEK_CUR:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000881 if pos < 0:
Lars Gustäbel6baa5022006-12-23 16:40:13 +0000882 self.position = max(self.position + pos, 0)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000883 else:
Lars Gustäbel6baa5022006-12-23 16:40:13 +0000884 self.position = min(self.position + pos, self.size)
885 elif whence == os.SEEK_END:
886 self.position = max(min(self.size + pos, self.size), 0)
887 else:
888 raise ValueError("Invalid argument")
889
890 self.buffer = ""
891 self.fileobj.seek(self.position)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000892
893 def close(self):
894 """Close the file object.
895 """
896 self.closed = True
Martin v. Löwisdf241532005-03-03 08:17:42 +0000897
898 def __iter__(self):
Lars Gustäbel6baa5022006-12-23 16:40:13 +0000899 """Get an iterator over the file's lines.
Martin v. Löwisdf241532005-03-03 08:17:42 +0000900 """
Lars Gustäbel6baa5022006-12-23 16:40:13 +0000901 while True:
902 line = self.readline()
903 if not line:
904 break
905 yield line
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000906#class ExFileObject
907
908#------------------
909# Exported Classes
910#------------------
911class TarInfo(object):
912 """Informational class which holds the details about an
913 archive member given by a tar header block.
914 TarInfo objects are returned by TarFile.getmember(),
915 TarFile.getmembers() and TarFile.gettarinfo() and are
916 usually created internally.
917 """
918
919 def __init__(self, name=""):
920 """Construct a TarInfo object. name is the optional name
921 of the member.
922 """
Lars Gustäbelc64e4022007-03-13 10:47:19 +0000923 self.name = name # member name
924 self.mode = 0644 # file permissions
Georg Brandl38c6a222006-05-10 16:26:03 +0000925 self.uid = 0 # user id
926 self.gid = 0 # group id
927 self.size = 0 # file size
928 self.mtime = 0 # modification time
929 self.chksum = 0 # header checksum
930 self.type = REGTYPE # member type
931 self.linkname = "" # link name
Lars Gustäbel6aab8d02010-10-04 15:37:53 +0000932 self.uname = "" # user name
933 self.gname = "" # group name
Georg Brandl38c6a222006-05-10 16:26:03 +0000934 self.devmajor = 0 # device major number
935 self.devminor = 0 # device minor number
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000936
Georg Brandl38c6a222006-05-10 16:26:03 +0000937 self.offset = 0 # the tar header starts here
938 self.offset_data = 0 # the file's data starts here
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000939
Lars Gustäbelc64e4022007-03-13 10:47:19 +0000940 self.pax_headers = {} # pax header information
941
942 # In pax headers the "name" and "linkname" field are called
943 # "path" and "linkpath".
944 def _getpath(self):
945 return self.name
946 def _setpath(self, name):
947 self.name = name
948 path = property(_getpath, _setpath)
949
950 def _getlinkpath(self):
951 return self.linkname
952 def _setlinkpath(self, linkname):
953 self.linkname = linkname
954 linkpath = property(_getlinkpath, _setlinkpath)
955
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000956 def __repr__(self):
957 return "<%s %r at %#x>" % (self.__class__.__name__,self.name,id(self))
958
Lars Gustäbela0fcb932007-05-27 19:49:30 +0000959 def get_info(self, encoding, errors):
Lars Gustäbelc64e4022007-03-13 10:47:19 +0000960 """Return the TarInfo's attributes as a dictionary.
961 """
962 info = {
Lars Gustäbelf7cda522009-08-28 19:23:44 +0000963 "name": self.name,
Lars Gustäbelc64e4022007-03-13 10:47:19 +0000964 "mode": self.mode & 07777,
965 "uid": self.uid,
966 "gid": self.gid,
967 "size": self.size,
968 "mtime": self.mtime,
969 "chksum": self.chksum,
970 "type": self.type,
Lars Gustäbelf7cda522009-08-28 19:23:44 +0000971 "linkname": self.linkname,
Lars Gustäbelc64e4022007-03-13 10:47:19 +0000972 "uname": self.uname,
973 "gname": self.gname,
974 "devmajor": self.devmajor,
975 "devminor": self.devminor
976 }
977
978 if info["type"] == DIRTYPE and not info["name"].endswith("/"):
979 info["name"] += "/"
980
Lars Gustäbela0fcb932007-05-27 19:49:30 +0000981 for key in ("name", "linkname", "uname", "gname"):
982 if type(info[key]) is unicode:
983 info[key] = info[key].encode(encoding, errors)
984
Lars Gustäbelc64e4022007-03-13 10:47:19 +0000985 return info
986
Lars Gustäbela0fcb932007-05-27 19:49:30 +0000987 def tobuf(self, format=DEFAULT_FORMAT, encoding=ENCODING, errors="strict"):
Lars Gustäbelc64e4022007-03-13 10:47:19 +0000988 """Return a tar header as a string of 512 byte blocks.
989 """
Lars Gustäbela0fcb932007-05-27 19:49:30 +0000990 info = self.get_info(encoding, errors)
991
Lars Gustäbelc64e4022007-03-13 10:47:19 +0000992 if format == USTAR_FORMAT:
Lars Gustäbela0fcb932007-05-27 19:49:30 +0000993 return self.create_ustar_header(info)
Lars Gustäbelc64e4022007-03-13 10:47:19 +0000994 elif format == GNU_FORMAT:
Lars Gustäbela0fcb932007-05-27 19:49:30 +0000995 return self.create_gnu_header(info)
Lars Gustäbelc64e4022007-03-13 10:47:19 +0000996 elif format == PAX_FORMAT:
Lars Gustäbela0fcb932007-05-27 19:49:30 +0000997 return self.create_pax_header(info, encoding, errors)
Lars Gustäbelc64e4022007-03-13 10:47:19 +0000998 else:
999 raise ValueError("invalid format")
1000
Lars Gustäbela0fcb932007-05-27 19:49:30 +00001001 def create_ustar_header(self, info):
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001002 """Return the object as a ustar header block.
1003 """
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001004 info["magic"] = POSIX_MAGIC
1005
1006 if len(info["linkname"]) > LENGTH_LINK:
1007 raise ValueError("linkname is too long")
1008
1009 if len(info["name"]) > LENGTH_NAME:
1010 info["prefix"], info["name"] = self._posix_split_name(info["name"])
1011
1012 return self._create_header(info, USTAR_FORMAT)
1013
Lars Gustäbela0fcb932007-05-27 19:49:30 +00001014 def create_gnu_header(self, info):
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001015 """Return the object as a GNU header block sequence.
1016 """
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001017 info["magic"] = GNU_MAGIC
1018
1019 buf = ""
1020 if len(info["linkname"]) > LENGTH_LINK:
1021 buf += self._create_gnu_long_header(info["linkname"], GNUTYPE_LONGLINK)
1022
1023 if len(info["name"]) > LENGTH_NAME:
1024 buf += self._create_gnu_long_header(info["name"], GNUTYPE_LONGNAME)
1025
1026 return buf + self._create_header(info, GNU_FORMAT)
1027
Lars Gustäbela0fcb932007-05-27 19:49:30 +00001028 def create_pax_header(self, info, encoding, errors):
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001029 """Return the object as a ustar header block. If it cannot be
1030 represented this way, prepend a pax extended header sequence
1031 with supplement information.
1032 """
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001033 info["magic"] = POSIX_MAGIC
1034 pax_headers = self.pax_headers.copy()
1035
1036 # Test string fields for values that exceed the field length or cannot
1037 # be represented in ASCII encoding.
1038 for name, hname, length in (
1039 ("name", "path", LENGTH_NAME), ("linkname", "linkpath", LENGTH_LINK),
1040 ("uname", "uname", 32), ("gname", "gname", 32)):
1041
Lars Gustäbela0fcb932007-05-27 19:49:30 +00001042 if hname in pax_headers:
1043 # The pax header has priority.
1044 continue
1045
1046 val = info[name].decode(encoding, errors)
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001047
1048 # Try to encode the string as ASCII.
1049 try:
1050 val.encode("ascii")
1051 except UnicodeEncodeError:
1052 pax_headers[hname] = val
1053 continue
1054
Lars Gustäbela0fcb932007-05-27 19:49:30 +00001055 if len(info[name]) > length:
1056 pax_headers[hname] = val
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001057
1058 # Test number fields for values that exceed the field limit or values
1059 # that like to be stored as float.
1060 for name, digits in (("uid", 8), ("gid", 8), ("size", 12), ("mtime", 12)):
Lars Gustäbela0fcb932007-05-27 19:49:30 +00001061 if name in pax_headers:
1062 # The pax header has priority. Avoid overflow.
1063 info[name] = 0
1064 continue
1065
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001066 val = info[name]
1067 if not 0 <= val < 8 ** (digits - 1) or isinstance(val, float):
1068 pax_headers[name] = unicode(val)
1069 info[name] = 0
1070
Lars Gustäbela0fcb932007-05-27 19:49:30 +00001071 # Create a pax extended header if necessary.
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001072 if pax_headers:
1073 buf = self._create_pax_generic_header(pax_headers)
1074 else:
1075 buf = ""
1076
1077 return buf + self._create_header(info, USTAR_FORMAT)
1078
1079 @classmethod
Lars Gustäbela0fcb932007-05-27 19:49:30 +00001080 def create_pax_global_header(cls, pax_headers):
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001081 """Return the object as a pax global header block sequence.
1082 """
Lars Gustäbela0fcb932007-05-27 19:49:30 +00001083 return cls._create_pax_generic_header(pax_headers, type=XGLTYPE)
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001084
1085 def _posix_split_name(self, name):
1086 """Split a name longer than 100 chars into a prefix
1087 and a name part.
1088 """
1089 prefix = name[:LENGTH_PREFIX + 1]
1090 while prefix and prefix[-1] != "/":
1091 prefix = prefix[:-1]
1092
1093 name = name[len(prefix):]
1094 prefix = prefix[:-1]
1095
1096 if not prefix or len(name) > LENGTH_NAME:
1097 raise ValueError("name is too long")
1098 return prefix, name
1099
1100 @staticmethod
1101 def _create_header(info, format):
1102 """Return a header block. info is a dictionary with file
1103 information, format must be one of the *_FORMAT constants.
1104 """
1105 parts = [
1106 stn(info.get("name", ""), 100),
1107 itn(info.get("mode", 0) & 07777, 8, format),
1108 itn(info.get("uid", 0), 8, format),
1109 itn(info.get("gid", 0), 8, format),
1110 itn(info.get("size", 0), 12, format),
1111 itn(info.get("mtime", 0), 12, format),
1112 " ", # checksum field
1113 info.get("type", REGTYPE),
1114 stn(info.get("linkname", ""), 100),
Lars Gustäbela0fcb932007-05-27 19:49:30 +00001115 stn(info.get("magic", POSIX_MAGIC), 8),
Lars Gustäbel6aab8d02010-10-04 15:37:53 +00001116 stn(info.get("uname", ""), 32),
1117 stn(info.get("gname", ""), 32),
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001118 itn(info.get("devmajor", 0), 8, format),
1119 itn(info.get("devminor", 0), 8, format),
1120 stn(info.get("prefix", ""), 155)
1121 ]
1122
1123 buf = struct.pack("%ds" % BLOCKSIZE, "".join(parts))
1124 chksum = calc_chksums(buf[-BLOCKSIZE:])[0]
1125 buf = buf[:-364] + "%06o\0" % chksum + buf[-357:]
1126 return buf
1127
1128 @staticmethod
1129 def _create_payload(payload):
1130 """Return the string payload filled with zero bytes
1131 up to the next 512 byte border.
1132 """
1133 blocks, remainder = divmod(len(payload), BLOCKSIZE)
1134 if remainder > 0:
1135 payload += (BLOCKSIZE - remainder) * NUL
1136 return payload
1137
1138 @classmethod
1139 def _create_gnu_long_header(cls, name, type):
1140 """Return a GNUTYPE_LONGNAME or GNUTYPE_LONGLINK sequence
1141 for name.
1142 """
1143 name += NUL
1144
1145 info = {}
1146 info["name"] = "././@LongLink"
1147 info["type"] = type
1148 info["size"] = len(name)
1149 info["magic"] = GNU_MAGIC
1150
1151 # create extended header + name blocks.
1152 return cls._create_header(info, USTAR_FORMAT) + \
1153 cls._create_payload(name)
1154
1155 @classmethod
1156 def _create_pax_generic_header(cls, pax_headers, type=XHDTYPE):
1157 """Return a POSIX.1-2001 extended or global header sequence
1158 that contains a list of keyword, value pairs. The values
1159 must be unicode objects.
1160 """
1161 records = []
1162 for keyword, value in pax_headers.iteritems():
1163 keyword = keyword.encode("utf8")
1164 value = value.encode("utf8")
1165 l = len(keyword) + len(value) + 3 # ' ' + '=' + '\n'
1166 n = p = 0
1167 while True:
1168 n = l + len(str(p))
1169 if n == p:
1170 break
1171 p = n
1172 records.append("%d %s=%s\n" % (p, keyword, value))
1173 records = "".join(records)
1174
1175 # We use a hardcoded "././@PaxHeader" name like star does
1176 # instead of the one that POSIX recommends.
1177 info = {}
1178 info["name"] = "././@PaxHeader"
1179 info["type"] = type
1180 info["size"] = len(records)
1181 info["magic"] = POSIX_MAGIC
1182
1183 # Create pax header + record blocks.
1184 return cls._create_header(info, USTAR_FORMAT) + \
1185 cls._create_payload(records)
1186
Guido van Rossum75b64e62005-01-16 00:16:11 +00001187 @classmethod
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001188 def frombuf(cls, buf):
1189 """Construct a TarInfo object from a 512 byte string buffer.
1190 """
Lars Gustäbeldd866d52009-11-22 18:30:53 +00001191 if len(buf) == 0:
1192 raise EmptyHeaderError("empty header")
Georg Brandl38c6a222006-05-10 16:26:03 +00001193 if len(buf) != BLOCKSIZE:
Lars Gustäbeldd866d52009-11-22 18:30:53 +00001194 raise TruncatedHeaderError("truncated header")
Georg Brandl38c6a222006-05-10 16:26:03 +00001195 if buf.count(NUL) == BLOCKSIZE:
Lars Gustäbeldd866d52009-11-22 18:30:53 +00001196 raise EOFHeaderError("end of file header")
Georg Brandlebbeed72006-12-19 22:06:46 +00001197
Georg Brandlded1c4d2006-12-20 11:55:16 +00001198 chksum = nti(buf[148:156])
Georg Brandlebbeed72006-12-19 22:06:46 +00001199 if chksum not in calc_chksums(buf):
Lars Gustäbeldd866d52009-11-22 18:30:53 +00001200 raise InvalidHeaderError("bad checksum")
Georg Brandl38c6a222006-05-10 16:26:03 +00001201
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001202 obj = cls()
1203 obj.buf = buf
1204 obj.name = nts(buf[0:100])
1205 obj.mode = nti(buf[100:108])
1206 obj.uid = nti(buf[108:116])
1207 obj.gid = nti(buf[116:124])
1208 obj.size = nti(buf[124:136])
1209 obj.mtime = nti(buf[136:148])
1210 obj.chksum = chksum
1211 obj.type = buf[156:157]
1212 obj.linkname = nts(buf[157:257])
1213 obj.uname = nts(buf[265:297])
1214 obj.gname = nts(buf[297:329])
1215 obj.devmajor = nti(buf[329:337])
1216 obj.devminor = nti(buf[337:345])
1217 prefix = nts(buf[345:500])
Georg Brandl3354f282006-10-29 09:16:12 +00001218
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001219 # Old V7 tar format represents a directory as a regular
1220 # file with a trailing slash.
1221 if obj.type == AREGTYPE and obj.name.endswith("/"):
1222 obj.type = DIRTYPE
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001223
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001224 # Remove redundant slashes from directories.
1225 if obj.isdir():
1226 obj.name = obj.name.rstrip("/")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001227
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001228 # Reconstruct a ustar longname.
1229 if prefix and obj.type not in GNU_TYPES:
1230 obj.name = prefix + "/" + obj.name
1231 return obj
1232
1233 @classmethod
1234 def fromtarfile(cls, tarfile):
1235 """Return the next TarInfo object from TarFile object
1236 tarfile.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001237 """
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001238 buf = tarfile.fileobj.read(BLOCKSIZE)
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001239 obj = cls.frombuf(buf)
1240 obj.offset = tarfile.fileobj.tell() - BLOCKSIZE
1241 return obj._proc_member(tarfile)
Georg Brandl3354f282006-10-29 09:16:12 +00001242
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001243 #--------------------------------------------------------------------------
1244 # The following are methods that are called depending on the type of a
1245 # member. The entry point is _proc_member() which can be overridden in a
1246 # subclass to add custom _proc_*() methods. A _proc_*() method MUST
1247 # implement the following
1248 # operations:
1249 # 1. Set self.offset_data to the position where the data blocks begin,
1250 # if there is data that follows.
1251 # 2. Set tarfile.offset to the position where the next member's header will
1252 # begin.
1253 # 3. Return self or another valid TarInfo object.
1254 def _proc_member(self, tarfile):
1255 """Choose the right processing method depending on
1256 the type and call it.
Georg Brandl3354f282006-10-29 09:16:12 +00001257 """
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001258 if self.type in (GNUTYPE_LONGNAME, GNUTYPE_LONGLINK):
1259 return self._proc_gnulong(tarfile)
1260 elif self.type == GNUTYPE_SPARSE:
1261 return self._proc_sparse(tarfile)
1262 elif self.type in (XHDTYPE, XGLTYPE, SOLARIS_XHDTYPE):
1263 return self._proc_pax(tarfile)
1264 else:
1265 return self._proc_builtin(tarfile)
Georg Brandl3354f282006-10-29 09:16:12 +00001266
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001267 def _proc_builtin(self, tarfile):
1268 """Process a builtin type or an unknown type which
1269 will be treated as a regular file.
1270 """
1271 self.offset_data = tarfile.fileobj.tell()
1272 offset = self.offset_data
1273 if self.isreg() or self.type not in SUPPORTED_TYPES:
1274 # Skip the following data blocks.
1275 offset += self._block(self.size)
1276 tarfile.offset = offset
Georg Brandl3354f282006-10-29 09:16:12 +00001277
Lars Gustäbela0fcb932007-05-27 19:49:30 +00001278 # Patch the TarInfo object with saved global
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001279 # header information.
Lars Gustäbela0fcb932007-05-27 19:49:30 +00001280 self._apply_pax_info(tarfile.pax_headers, tarfile.encoding, tarfile.errors)
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001281
1282 return self
1283
1284 def _proc_gnulong(self, tarfile):
1285 """Process the blocks that hold a GNU longname
1286 or longlink member.
1287 """
1288 buf = tarfile.fileobj.read(self._block(self.size))
1289
1290 # Fetch the next header and process it.
Lars Gustäbeldd866d52009-11-22 18:30:53 +00001291 try:
1292 next = self.fromtarfile(tarfile)
1293 except HeaderError:
1294 raise SubsequentHeaderError("missing or bad subsequent header")
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001295
1296 # Patch the TarInfo object from the next header with
1297 # the longname information.
1298 next.offset = self.offset
1299 if self.type == GNUTYPE_LONGNAME:
Lars Gustäbela0fcb932007-05-27 19:49:30 +00001300 next.name = nts(buf)
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001301 elif self.type == GNUTYPE_LONGLINK:
Lars Gustäbela0fcb932007-05-27 19:49:30 +00001302 next.linkname = nts(buf)
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001303
1304 return next
1305
1306 def _proc_sparse(self, tarfile):
1307 """Process a GNU sparse header plus extra headers.
1308 """
1309 buf = self.buf
1310 sp = _ringbuffer()
1311 pos = 386
1312 lastpos = 0L
1313 realpos = 0L
1314 # There are 4 possible sparse structs in the
1315 # first header.
1316 for i in xrange(4):
1317 try:
1318 offset = nti(buf[pos:pos + 12])
1319 numbytes = nti(buf[pos + 12:pos + 24])
1320 except ValueError:
1321 break
1322 if offset > lastpos:
1323 sp.append(_hole(lastpos, offset - lastpos))
1324 sp.append(_data(offset, numbytes, realpos))
1325 realpos += numbytes
1326 lastpos = offset + numbytes
1327 pos += 24
1328
1329 isextended = ord(buf[482])
1330 origsize = nti(buf[483:495])
1331
1332 # If the isextended flag is given,
1333 # there are extra headers to process.
1334 while isextended == 1:
1335 buf = tarfile.fileobj.read(BLOCKSIZE)
1336 pos = 0
1337 for i in xrange(21):
1338 try:
1339 offset = nti(buf[pos:pos + 12])
1340 numbytes = nti(buf[pos + 12:pos + 24])
1341 except ValueError:
1342 break
1343 if offset > lastpos:
1344 sp.append(_hole(lastpos, offset - lastpos))
1345 sp.append(_data(offset, numbytes, realpos))
1346 realpos += numbytes
1347 lastpos = offset + numbytes
1348 pos += 24
1349 isextended = ord(buf[504])
1350
1351 if lastpos < origsize:
1352 sp.append(_hole(lastpos, origsize - lastpos))
1353
1354 self.sparse = sp
1355
1356 self.offset_data = tarfile.fileobj.tell()
1357 tarfile.offset = self.offset_data + self._block(self.size)
1358 self.size = origsize
1359
1360 return self
1361
1362 def _proc_pax(self, tarfile):
1363 """Process an extended or global header as described in
1364 POSIX.1-2001.
1365 """
1366 # Read the header information.
1367 buf = tarfile.fileobj.read(self._block(self.size))
1368
1369 # A pax header stores supplemental information for either
1370 # the following file (extended) or all following files
1371 # (global).
1372 if self.type == XGLTYPE:
1373 pax_headers = tarfile.pax_headers
1374 else:
1375 pax_headers = tarfile.pax_headers.copy()
1376
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001377 # Parse pax header information. A record looks like that:
1378 # "%d %s=%s\n" % (length, keyword, value). length is the size
1379 # of the complete record including the length field itself and
Lars Gustäbela0fcb932007-05-27 19:49:30 +00001380 # the newline. keyword and value are both UTF-8 encoded strings.
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001381 regex = re.compile(r"(\d+) ([^=]+)=", re.U)
1382 pos = 0
1383 while True:
1384 match = regex.match(buf, pos)
1385 if not match:
1386 break
1387
1388 length, keyword = match.groups()
1389 length = int(length)
1390 value = buf[match.end(2) + 1:match.start(1) + length - 1]
1391
1392 keyword = keyword.decode("utf8")
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001393 value = value.decode("utf8")
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001394
1395 pax_headers[keyword] = value
1396 pos += length
1397
Lars Gustäbela0fcb932007-05-27 19:49:30 +00001398 # Fetch the next header.
Lars Gustäbeldd866d52009-11-22 18:30:53 +00001399 try:
1400 next = self.fromtarfile(tarfile)
1401 except HeaderError:
1402 raise SubsequentHeaderError("missing or bad subsequent header")
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001403
Lars Gustäbela0fcb932007-05-27 19:49:30 +00001404 if self.type in (XHDTYPE, SOLARIS_XHDTYPE):
Lars Gustäbela0fcb932007-05-27 19:49:30 +00001405 # Patch the TarInfo object with the extended header info.
1406 next._apply_pax_info(pax_headers, tarfile.encoding, tarfile.errors)
1407 next.offset = self.offset
1408
Brett Cannon132fc542008-08-04 21:23:07 +00001409 if "size" in pax_headers:
Lars Gustäbela0fcb932007-05-27 19:49:30 +00001410 # If the extended header replaces the size field,
1411 # we need to recalculate the offset where the next
1412 # header starts.
1413 offset = next.offset_data
1414 if next.isreg() or next.type not in SUPPORTED_TYPES:
1415 offset += next._block(next.size)
1416 tarfile.offset = offset
1417
1418 return next
1419
1420 def _apply_pax_info(self, pax_headers, encoding, errors):
1421 """Replace fields with supplemental information from a previous
1422 pax extended or global header.
1423 """
1424 for keyword, value in pax_headers.iteritems():
1425 if keyword not in PAX_FIELDS:
1426 continue
1427
1428 if keyword == "path":
1429 value = value.rstrip("/")
1430
1431 if keyword in PAX_NUMBER_FIELDS:
1432 try:
1433 value = PAX_NUMBER_FIELDS[keyword](value)
1434 except ValueError:
1435 value = 0
1436 else:
1437 value = uts(value, encoding, errors)
1438
1439 setattr(self, keyword, value)
1440
1441 self.pax_headers = pax_headers.copy()
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001442
1443 def _block(self, count):
1444 """Round up a byte count by BLOCKSIZE and return it,
1445 e.g. _block(834) => 1024.
1446 """
1447 blocks, remainder = divmod(count, BLOCKSIZE)
1448 if remainder:
1449 blocks += 1
1450 return blocks * BLOCKSIZE
Georg Brandl3354f282006-10-29 09:16:12 +00001451
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001452 def isreg(self):
1453 return self.type in REGULAR_TYPES
1454 def isfile(self):
1455 return self.isreg()
1456 def isdir(self):
1457 return self.type == DIRTYPE
1458 def issym(self):
1459 return self.type == SYMTYPE
1460 def islnk(self):
1461 return self.type == LNKTYPE
1462 def ischr(self):
1463 return self.type == CHRTYPE
1464 def isblk(self):
1465 return self.type == BLKTYPE
1466 def isfifo(self):
1467 return self.type == FIFOTYPE
1468 def issparse(self):
1469 return self.type == GNUTYPE_SPARSE
1470 def isdev(self):
1471 return self.type in (CHRTYPE, BLKTYPE, FIFOTYPE)
1472# class TarInfo
1473
1474class TarFile(object):
1475 """The TarFile Class provides an interface to tar archives.
1476 """
1477
1478 debug = 0 # May be set from 0 (no msgs) to 3 (all msgs)
1479
1480 dereference = False # If true, add content of linked file to the
1481 # tar file, else the link.
1482
1483 ignore_zeros = False # If true, skips empty or invalid blocks and
1484 # continues processing.
1485
Lars Gustäbel92ca7562009-12-13 11:32:27 +00001486 errorlevel = 1 # If 0, fatal errors only appear in debug
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001487 # messages (if debug >= 0). If > 0, errors
1488 # are passed to the caller as exceptions.
1489
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001490 format = DEFAULT_FORMAT # The format to use when creating an archive.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001491
Lars Gustäbela0fcb932007-05-27 19:49:30 +00001492 encoding = ENCODING # Encoding for 8-bit character strings.
1493
1494 errors = None # Error handler for unicode conversion.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001495
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001496 tarinfo = TarInfo # The default TarInfo class to use.
1497
1498 fileobject = ExFileObject # The default ExFileObject class to use.
1499
1500 def __init__(self, name=None, mode="r", fileobj=None, format=None,
1501 tarinfo=None, dereference=None, ignore_zeros=None, encoding=None,
Lars Gustäbela0fcb932007-05-27 19:49:30 +00001502 errors=None, pax_headers=None, debug=None, errorlevel=None):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001503 """Open an (uncompressed) tar archive `name'. `mode' is either 'r' to
1504 read from an existing archive, 'a' to append data to an existing
1505 file or 'w' to create a new file overwriting an existing one. `mode'
1506 defaults to 'r'.
1507 If `fileobj' is given, it is used for reading or writing data. If it
1508 can be determined, `mode' is overridden by `fileobj's mode.
1509 `fileobj' is not closed, when TarFile is closed.
1510 """
Serhiy Storchaka75ba21a2014-01-18 15:35:19 +02001511 modes = {"r": "rb", "a": "r+b", "w": "wb"}
1512 if mode not in modes:
Georg Brandle4751e32006-05-18 06:11:19 +00001513 raise ValueError("mode must be 'r', 'a' or 'w'")
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001514 self.mode = mode
Serhiy Storchaka75ba21a2014-01-18 15:35:19 +02001515 self._mode = modes[mode]
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001516
1517 if not fileobj:
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001518 if self.mode == "a" and not os.path.exists(name):
Lars Gustäbel3f8aca12007-02-06 18:38:13 +00001519 # Create nonexistent files in append mode.
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001520 self.mode = "w"
1521 self._mode = "wb"
Brett Cannon6cef0762007-05-25 20:17:15 +00001522 fileobj = bltn_open(name, self._mode)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001523 self._extfileobj = False
1524 else:
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001525 if name is None and hasattr(fileobj, "name"):
1526 name = fileobj.name
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001527 if hasattr(fileobj, "mode"):
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001528 self._mode = fileobj.mode
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001529 self._extfileobj = True
Lars Gustäbel0f4a14b2007-08-28 12:31:09 +00001530 self.name = os.path.abspath(name) if name else None
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001531 self.fileobj = fileobj
1532
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001533 # Init attributes.
1534 if format is not None:
1535 self.format = format
1536 if tarinfo is not None:
1537 self.tarinfo = tarinfo
1538 if dereference is not None:
1539 self.dereference = dereference
1540 if ignore_zeros is not None:
1541 self.ignore_zeros = ignore_zeros
1542 if encoding is not None:
1543 self.encoding = encoding
Lars Gustäbela0fcb932007-05-27 19:49:30 +00001544
1545 if errors is not None:
1546 self.errors = errors
1547 elif mode == "r":
1548 self.errors = "utf-8"
1549 else:
1550 self.errors = "strict"
1551
1552 if pax_headers is not None and self.format == PAX_FORMAT:
1553 self.pax_headers = pax_headers
1554 else:
1555 self.pax_headers = {}
1556
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001557 if debug is not None:
1558 self.debug = debug
1559 if errorlevel is not None:
1560 self.errorlevel = errorlevel
1561
1562 # Init datastructures.
Georg Brandl38c6a222006-05-10 16:26:03 +00001563 self.closed = False
1564 self.members = [] # list of members as TarInfo objects
1565 self._loaded = False # flag if all members have been read
Lars Gustäbel77b2d632007-12-01 21:02:12 +00001566 self.offset = self.fileobj.tell()
1567 # current position in the archive file
Georg Brandl38c6a222006-05-10 16:26:03 +00001568 self.inodes = {} # dictionary caching the inodes of
1569 # archive members already added
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001570
Lars Gustäbel355538e2009-11-18 20:24:54 +00001571 try:
1572 if self.mode == "r":
1573 self.firstmember = None
1574 self.firstmember = self.next()
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001575
Lars Gustäbel355538e2009-11-18 20:24:54 +00001576 if self.mode == "a":
1577 # Move to the end of the archive,
1578 # before the first empty block.
Lars Gustäbel355538e2009-11-18 20:24:54 +00001579 while True:
Lars Gustäbeldd866d52009-11-22 18:30:53 +00001580 self.fileobj.seek(self.offset)
1581 try:
1582 tarinfo = self.tarinfo.fromtarfile(self)
1583 self.members.append(tarinfo)
1584 except EOFHeaderError:
1585 self.fileobj.seek(self.offset)
Lars Gustäbel355538e2009-11-18 20:24:54 +00001586 break
Lars Gustäbeldd866d52009-11-22 18:30:53 +00001587 except HeaderError, e:
1588 raise ReadError(str(e))
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001589
Lars Gustäbel355538e2009-11-18 20:24:54 +00001590 if self.mode in "aw":
1591 self._loaded = True
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001592
Lars Gustäbel355538e2009-11-18 20:24:54 +00001593 if self.pax_headers:
1594 buf = self.tarinfo.create_pax_global_header(self.pax_headers.copy())
1595 self.fileobj.write(buf)
1596 self.offset += len(buf)
1597 except:
1598 if not self._extfileobj:
1599 self.fileobj.close()
1600 self.closed = True
1601 raise
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001602
1603 def _getposix(self):
1604 return self.format == USTAR_FORMAT
1605 def _setposix(self, value):
1606 import warnings
Philip Jenveyd846f1d2009-05-08 02:28:39 +00001607 warnings.warn("use the format attribute instead", DeprecationWarning,
1608 2)
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001609 if value:
1610 self.format = USTAR_FORMAT
1611 else:
1612 self.format = GNU_FORMAT
1613 posix = property(_getposix, _setposix)
1614
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001615 #--------------------------------------------------------------------------
1616 # Below are the classmethods which act as alternate constructors to the
1617 # TarFile class. The open() method is the only one that is needed for
1618 # public use; it is the "super"-constructor and is able to select an
1619 # adequate "sub"-constructor for a particular compression using the mapping
1620 # from OPEN_METH.
1621 #
1622 # This concept allows one to subclass TarFile without losing the comfort of
1623 # the super-constructor. A sub-constructor is registered and made available
1624 # by adding it to the mapping in OPEN_METH.
1625
Guido van Rossum75b64e62005-01-16 00:16:11 +00001626 @classmethod
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001627 def open(cls, name=None, mode="r", fileobj=None, bufsize=RECORDSIZE, **kwargs):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001628 """Open a tar archive for reading, writing or appending. Return
1629 an appropriate TarFile class.
1630
1631 mode:
Martin v. Löwis78be7df2005-03-05 12:47:42 +00001632 'r' or 'r:*' open for reading with transparent compression
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001633 'r:' open for reading exclusively uncompressed
1634 'r:gz' open for reading with gzip compression
1635 'r:bz2' open for reading with bzip2 compression
Lars Gustäbel3f8aca12007-02-06 18:38:13 +00001636 'a' or 'a:' open for appending, creating the file if necessary
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001637 'w' or 'w:' open for writing without compression
1638 'w:gz' open for writing with gzip compression
1639 'w:bz2' open for writing with bzip2 compression
Martin v. Löwis78be7df2005-03-05 12:47:42 +00001640
1641 'r|*' open a stream of tar blocks with transparent compression
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001642 'r|' open an uncompressed stream of tar blocks for reading
1643 'r|gz' open a gzip compressed stream of tar blocks
1644 'r|bz2' open a bzip2 compressed stream of tar blocks
1645 'w|' open an uncompressed stream for writing
1646 'w|gz' open a gzip compressed stream for writing
1647 'w|bz2' open a bzip2 compressed stream for writing
1648 """
1649
1650 if not name and not fileobj:
Georg Brandle4751e32006-05-18 06:11:19 +00001651 raise ValueError("nothing to open")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001652
Martin v. Löwis78be7df2005-03-05 12:47:42 +00001653 if mode in ("r", "r:*"):
1654 # Find out which *open() is appropriate for opening the file.
1655 for comptype in cls.OPEN_METH:
1656 func = getattr(cls, cls.OPEN_METH[comptype])
Lars Gustäbela7ba6fc2006-12-27 10:30:46 +00001657 if fileobj is not None:
1658 saved_pos = fileobj.tell()
Martin v. Löwis78be7df2005-03-05 12:47:42 +00001659 try:
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001660 return func(name, "r", fileobj, **kwargs)
1661 except (ReadError, CompressionError), e:
Lars Gustäbela7ba6fc2006-12-27 10:30:46 +00001662 if fileobj is not None:
1663 fileobj.seek(saved_pos)
Martin v. Löwis78be7df2005-03-05 12:47:42 +00001664 continue
Georg Brandle4751e32006-05-18 06:11:19 +00001665 raise ReadError("file could not be opened successfully")
Martin v. Löwis78be7df2005-03-05 12:47:42 +00001666
1667 elif ":" in mode:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001668 filemode, comptype = mode.split(":", 1)
1669 filemode = filemode or "r"
1670 comptype = comptype or "tar"
1671
1672 # Select the *open() function according to
1673 # given compression.
1674 if comptype in cls.OPEN_METH:
1675 func = getattr(cls, cls.OPEN_METH[comptype])
1676 else:
Georg Brandle4751e32006-05-18 06:11:19 +00001677 raise CompressionError("unknown compression type %r" % comptype)
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001678 return func(name, filemode, fileobj, **kwargs)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001679
1680 elif "|" in mode:
1681 filemode, comptype = mode.split("|", 1)
1682 filemode = filemode or "r"
1683 comptype = comptype or "tar"
1684
Serhiy Storchaka75ba21a2014-01-18 15:35:19 +02001685 if filemode not in ("r", "w"):
Georg Brandle4751e32006-05-18 06:11:19 +00001686 raise ValueError("mode must be 'r' or 'w'")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001687
1688 t = cls(name, filemode,
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001689 _Stream(name, filemode, comptype, fileobj, bufsize),
1690 **kwargs)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001691 t._extfileobj = False
1692 return t
1693
Serhiy Storchaka75ba21a2014-01-18 15:35:19 +02001694 elif mode in ("a", "w"):
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001695 return cls.taropen(name, mode, fileobj, **kwargs)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001696
Georg Brandle4751e32006-05-18 06:11:19 +00001697 raise ValueError("undiscernible mode")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001698
Guido van Rossum75b64e62005-01-16 00:16:11 +00001699 @classmethod
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001700 def taropen(cls, name, mode="r", fileobj=None, **kwargs):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001701 """Open uncompressed tar archive name for reading or writing.
1702 """
Serhiy Storchaka75ba21a2014-01-18 15:35:19 +02001703 if mode not in ("r", "a", "w"):
Georg Brandle4751e32006-05-18 06:11:19 +00001704 raise ValueError("mode must be 'r', 'a' or 'w'")
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001705 return cls(name, mode, fileobj, **kwargs)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001706
Guido van Rossum75b64e62005-01-16 00:16:11 +00001707 @classmethod
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001708 def gzopen(cls, name, mode="r", fileobj=None, compresslevel=9, **kwargs):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001709 """Open gzip compressed tar archive name for reading or writing.
1710 Appending is not allowed.
1711 """
Serhiy Storchaka75ba21a2014-01-18 15:35:19 +02001712 if mode not in ("r", "w"):
Georg Brandle4751e32006-05-18 06:11:19 +00001713 raise ValueError("mode must be 'r' or 'w'")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001714
1715 try:
1716 import gzip
Neal Norwitz4ec68242003-04-11 03:05:56 +00001717 gzip.GzipFile
1718 except (ImportError, AttributeError):
Georg Brandle4751e32006-05-18 06:11:19 +00001719 raise CompressionError("gzip module is not available")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001720
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001721 if fileobj is None:
Brett Cannon6cef0762007-05-25 20:17:15 +00001722 fileobj = bltn_open(name, mode + "b")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001723
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001724 try:
Lars Gustäbela4b23812006-12-23 17:57:23 +00001725 t = cls.taropen(name, mode,
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001726 gzip.GzipFile(name, mode, compresslevel, fileobj),
1727 **kwargs)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001728 except IOError:
Serhiy Storchaka7a278da2014-01-18 16:14:00 +02001729 if mode == 'r':
1730 raise ReadError("not a gzip file")
1731 raise
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001732 t._extfileobj = False
1733 return t
1734
Guido van Rossum75b64e62005-01-16 00:16:11 +00001735 @classmethod
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001736 def bz2open(cls, name, mode="r", fileobj=None, compresslevel=9, **kwargs):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001737 """Open bzip2 compressed tar archive name for reading or writing.
1738 Appending is not allowed.
1739 """
Serhiy Storchaka75ba21a2014-01-18 15:35:19 +02001740 if mode not in ("r", "w"):
Georg Brandle4751e32006-05-18 06:11:19 +00001741 raise ValueError("mode must be 'r' or 'w'.")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001742
1743 try:
1744 import bz2
1745 except ImportError:
Georg Brandle4751e32006-05-18 06:11:19 +00001746 raise CompressionError("bz2 module is not available")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001747
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001748 if fileobj is not None:
Georg Brandl49c8f4c2006-05-15 19:30:35 +00001749 fileobj = _BZ2Proxy(fileobj, mode)
1750 else:
1751 fileobj = bz2.BZ2File(name, mode, compresslevel=compresslevel)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001752
1753 try:
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001754 t = cls.taropen(name, mode, fileobj, **kwargs)
Lars Gustäbeldd866d52009-11-22 18:30:53 +00001755 except (IOError, EOFError):
Serhiy Storchaka7a278da2014-01-18 16:14:00 +02001756 if mode == 'r':
1757 raise ReadError("not a bzip2 file")
1758 raise
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001759 t._extfileobj = False
1760 return t
1761
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001762 # All *open() methods are registered here.
1763 OPEN_METH = {
1764 "tar": "taropen", # uncompressed tar
1765 "gz": "gzopen", # gzip compressed tar
1766 "bz2": "bz2open" # bzip2 compressed tar
1767 }
1768
1769 #--------------------------------------------------------------------------
1770 # The public methods which TarFile provides:
1771
1772 def close(self):
1773 """Close the TarFile. In write-mode, two finishing zero blocks are
1774 appended to the archive.
1775 """
1776 if self.closed:
1777 return
1778
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001779 if self.mode in "aw":
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001780 self.fileobj.write(NUL * (BLOCKSIZE * 2))
1781 self.offset += (BLOCKSIZE * 2)
1782 # fill up the end with zero-blocks
1783 # (like option -b20 for tar does)
1784 blocks, remainder = divmod(self.offset, RECORDSIZE)
1785 if remainder > 0:
1786 self.fileobj.write(NUL * (RECORDSIZE - remainder))
1787
1788 if not self._extfileobj:
1789 self.fileobj.close()
1790 self.closed = True
1791
1792 def getmember(self, name):
1793 """Return a TarInfo object for member `name'. If `name' can not be
1794 found in the archive, KeyError is raised. If a member occurs more
Mark Dickinson3e4caeb2009-02-21 20:27:01 +00001795 than once in the archive, its last occurrence is assumed to be the
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001796 most up-to-date version.
1797 """
Martin v. Löwisf3c56112004-09-18 09:08:52 +00001798 tarinfo = self._getmember(name)
1799 if tarinfo is None:
Georg Brandle4751e32006-05-18 06:11:19 +00001800 raise KeyError("filename %r not found" % name)
Martin v. Löwisf3c56112004-09-18 09:08:52 +00001801 return tarinfo
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001802
1803 def getmembers(self):
1804 """Return the members of the archive as a list of TarInfo objects. The
1805 list has the same order as the members in the archive.
1806 """
1807 self._check()
1808 if not self._loaded: # if we want to obtain a list of
1809 self._load() # all members, we first have to
1810 # scan the whole archive.
1811 return self.members
1812
1813 def getnames(self):
1814 """Return the members of the archive as a list of their names. It has
1815 the same order as the list returned by getmembers().
1816 """
Martin v. Löwisf3c56112004-09-18 09:08:52 +00001817 return [tarinfo.name for tarinfo in self.getmembers()]
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001818
1819 def gettarinfo(self, name=None, arcname=None, fileobj=None):
1820 """Create a TarInfo object for either the file `name' or the file
1821 object `fileobj' (using os.fstat on its file descriptor). You can
1822 modify some of the TarInfo's attributes before you add it using
1823 addfile(). If given, `arcname' specifies an alternative name for the
1824 file in the archive.
1825 """
1826 self._check("aw")
1827
1828 # When fileobj is given, replace name by
1829 # fileobj's real name.
1830 if fileobj is not None:
1831 name = fileobj.name
1832
1833 # Building the name of the member in the archive.
1834 # Backward slashes are converted to forward slashes,
1835 # Absolute paths are turned to relative paths.
1836 if arcname is None:
1837 arcname = name
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001838 drv, arcname = os.path.splitdrive(arcname)
Lars Gustäbelf7cda522009-08-28 19:23:44 +00001839 arcname = arcname.replace(os.sep, "/")
1840 arcname = arcname.lstrip("/")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001841
1842 # Now, fill the TarInfo object with
1843 # information specific for the file.
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001844 tarinfo = self.tarinfo()
1845 tarinfo.tarfile = self
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001846
1847 # Use os.stat or os.lstat, depending on platform
1848 # and if symlinks shall be resolved.
1849 if fileobj is None:
1850 if hasattr(os, "lstat") and not self.dereference:
1851 statres = os.lstat(name)
1852 else:
1853 statres = os.stat(name)
1854 else:
1855 statres = os.fstat(fileobj.fileno())
1856 linkname = ""
1857
1858 stmd = statres.st_mode
1859 if stat.S_ISREG(stmd):
1860 inode = (statres.st_ino, statres.st_dev)
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001861 if not self.dereference and statres.st_nlink > 1 and \
1862 inode in self.inodes and arcname != self.inodes[inode]:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001863 # Is it a hardlink to an already
1864 # archived file?
1865 type = LNKTYPE
1866 linkname = self.inodes[inode]
1867 else:
1868 # The inode is added only if its valid.
1869 # For win32 it is always 0.
1870 type = REGTYPE
1871 if inode[0]:
1872 self.inodes[inode] = arcname
1873 elif stat.S_ISDIR(stmd):
1874 type = DIRTYPE
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001875 elif stat.S_ISFIFO(stmd):
1876 type = FIFOTYPE
1877 elif stat.S_ISLNK(stmd):
1878 type = SYMTYPE
1879 linkname = os.readlink(name)
1880 elif stat.S_ISCHR(stmd):
1881 type = CHRTYPE
1882 elif stat.S_ISBLK(stmd):
1883 type = BLKTYPE
1884 else:
1885 return None
1886
1887 # Fill the TarInfo object with all
1888 # information we can get.
Martin v. Löwis5dbdc592005-08-27 10:07:56 +00001889 tarinfo.name = arcname
1890 tarinfo.mode = stmd
1891 tarinfo.uid = statres.st_uid
1892 tarinfo.gid = statres.st_gid
Lars Gustäbel2ee9c6f2010-06-03 09:56:22 +00001893 if type == REGTYPE:
Martin v. Löwis61d77e02004-08-20 06:35:46 +00001894 tarinfo.size = statres.st_size
Martin v. Löwis5dbdc592005-08-27 10:07:56 +00001895 else:
1896 tarinfo.size = 0L
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001897 tarinfo.mtime = statres.st_mtime
Martin v. Löwis5dbdc592005-08-27 10:07:56 +00001898 tarinfo.type = type
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001899 tarinfo.linkname = linkname
1900 if pwd:
1901 try:
1902 tarinfo.uname = pwd.getpwuid(tarinfo.uid)[0]
1903 except KeyError:
1904 pass
1905 if grp:
1906 try:
1907 tarinfo.gname = grp.getgrgid(tarinfo.gid)[0]
1908 except KeyError:
1909 pass
1910
1911 if type in (CHRTYPE, BLKTYPE):
1912 if hasattr(os, "major") and hasattr(os, "minor"):
1913 tarinfo.devmajor = os.major(statres.st_rdev)
1914 tarinfo.devminor = os.minor(statres.st_rdev)
1915 return tarinfo
1916
1917 def list(self, verbose=True):
1918 """Print a table of contents to sys.stdout. If `verbose' is False, only
1919 the names of the members are printed. If it is True, an `ls -l'-like
1920 output is produced.
1921 """
1922 self._check()
1923
1924 for tarinfo in self:
1925 if verbose:
1926 print filemode(tarinfo.mode),
1927 print "%s/%s" % (tarinfo.uname or tarinfo.uid,
1928 tarinfo.gname or tarinfo.gid),
1929 if tarinfo.ischr() or tarinfo.isblk():
1930 print "%10s" % ("%d,%d" \
1931 % (tarinfo.devmajor, tarinfo.devminor)),
1932 else:
1933 print "%10d" % tarinfo.size,
1934 print "%d-%02d-%02d %02d:%02d:%02d" \
1935 % time.localtime(tarinfo.mtime)[:6],
1936
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001937 print tarinfo.name + ("/" if tarinfo.isdir() else ""),
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001938
1939 if verbose:
1940 if tarinfo.issym():
1941 print "->", tarinfo.linkname,
1942 if tarinfo.islnk():
1943 print "link to", tarinfo.linkname,
1944 print
1945
Lars Gustäbel21121e62009-09-12 10:28:15 +00001946 def add(self, name, arcname=None, recursive=True, exclude=None, filter=None):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001947 """Add the file `name' to the archive. `name' may be any type of file
1948 (directory, fifo, symbolic link, etc.). If given, `arcname'
1949 specifies an alternative name for the file in the archive.
1950 Directories are added recursively by default. This can be avoided by
Lars Gustäbel104490e2007-06-18 11:42:11 +00001951 setting `recursive' to False. `exclude' is a function that should
Lars Gustäbel21121e62009-09-12 10:28:15 +00001952 return True for each filename to be excluded. `filter' is a function
1953 that expects a TarInfo object argument and returns the changed
1954 TarInfo object, if it returns None the TarInfo object will be
1955 excluded from the archive.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001956 """
1957 self._check("aw")
1958
1959 if arcname is None:
1960 arcname = name
1961
Lars Gustäbel104490e2007-06-18 11:42:11 +00001962 # Exclude pathnames.
Lars Gustäbel21121e62009-09-12 10:28:15 +00001963 if exclude is not None:
1964 import warnings
1965 warnings.warn("use the filter argument instead",
1966 DeprecationWarning, 2)
1967 if exclude(name):
1968 self._dbg(2, "tarfile: Excluded %r" % name)
1969 return
Lars Gustäbel104490e2007-06-18 11:42:11 +00001970
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001971 # Skip if somebody tries to archive the archive...
Lars Gustäbela4b23812006-12-23 17:57:23 +00001972 if self.name is not None and os.path.abspath(name) == self.name:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001973 self._dbg(2, "tarfile: Skipped %r" % name)
1974 return
1975
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001976 self._dbg(1, name)
1977
1978 # Create a TarInfo object from the file.
1979 tarinfo = self.gettarinfo(name, arcname)
1980
1981 if tarinfo is None:
1982 self._dbg(1, "tarfile: Unsupported type %r" % name)
1983 return
1984
Lars Gustäbel21121e62009-09-12 10:28:15 +00001985 # Change or exclude the TarInfo object.
1986 if filter is not None:
1987 tarinfo = filter(tarinfo)
1988 if tarinfo is None:
1989 self._dbg(2, "tarfile: Excluded %r" % name)
1990 return
1991
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001992 # Append the tar header and data to the archive.
1993 if tarinfo.isreg():
Andrew Svetlovac26a2e2012-11-29 14:22:26 +02001994 with bltn_open(name, "rb") as f:
1995 self.addfile(tarinfo, f)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001996
Martin v. Löwis5dbdc592005-08-27 10:07:56 +00001997 elif tarinfo.isdir():
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001998 self.addfile(tarinfo)
1999 if recursive:
2000 for f in os.listdir(name):
Lars Gustäbel21121e62009-09-12 10:28:15 +00002001 self.add(os.path.join(name, f), os.path.join(arcname, f),
2002 recursive, exclude, filter)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002003
Martin v. Löwis5dbdc592005-08-27 10:07:56 +00002004 else:
2005 self.addfile(tarinfo)
2006
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002007 def addfile(self, tarinfo, fileobj=None):
2008 """Add the TarInfo object `tarinfo' to the archive. If `fileobj' is
2009 given, tarinfo.size bytes are read from it and added to the archive.
2010 You can create TarInfo objects using gettarinfo().
2011 On Windows platforms, `fileobj' should always be opened with mode
2012 'rb' to avoid irritation about the file size.
2013 """
2014 self._check("aw")
2015
Georg Brandl3354f282006-10-29 09:16:12 +00002016 tarinfo = copy.copy(tarinfo)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002017
Lars Gustäbela0fcb932007-05-27 19:49:30 +00002018 buf = tarinfo.tobuf(self.format, self.encoding, self.errors)
Georg Brandl3354f282006-10-29 09:16:12 +00002019 self.fileobj.write(buf)
2020 self.offset += len(buf)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002021
2022 # If there's data to follow, append it.
2023 if fileobj is not None:
2024 copyfileobj(fileobj, self.fileobj, tarinfo.size)
2025 blocks, remainder = divmod(tarinfo.size, BLOCKSIZE)
2026 if remainder > 0:
2027 self.fileobj.write(NUL * (BLOCKSIZE - remainder))
2028 blocks += 1
2029 self.offset += blocks * BLOCKSIZE
2030
Martin v. Löwisf3c56112004-09-18 09:08:52 +00002031 self.members.append(tarinfo)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002032
Martin v. Löwis00a73e72005-03-04 19:40:34 +00002033 def extractall(self, path=".", members=None):
2034 """Extract all members from the archive to the current working
2035 directory and set owner, modification time and permissions on
2036 directories afterwards. `path' specifies a different directory
2037 to extract to. `members' is optional and must be a subset of the
2038 list returned by getmembers().
2039 """
2040 directories = []
2041
2042 if members is None:
2043 members = self
2044
2045 for tarinfo in members:
2046 if tarinfo.isdir():
Lars Gustäbel0192e432008-02-05 11:51:40 +00002047 # Extract directories with a safe mode.
Martin v. Löwis00a73e72005-03-04 19:40:34 +00002048 directories.append(tarinfo)
Lars Gustäbel0192e432008-02-05 11:51:40 +00002049 tarinfo = copy.copy(tarinfo)
2050 tarinfo.mode = 0700
2051 self.extract(tarinfo, path)
Martin v. Löwis00a73e72005-03-04 19:40:34 +00002052
2053 # Reverse sort directories.
Brett Cannon132fc542008-08-04 21:23:07 +00002054 directories.sort(key=operator.attrgetter('name'))
Martin v. Löwis00a73e72005-03-04 19:40:34 +00002055 directories.reverse()
2056
2057 # Set correct owner, mtime and filemode on directories.
2058 for tarinfo in directories:
Lars Gustäbel2ee1c762008-01-04 14:00:33 +00002059 dirpath = os.path.join(path, tarinfo.name)
Martin v. Löwis00a73e72005-03-04 19:40:34 +00002060 try:
Lars Gustäbel2ee1c762008-01-04 14:00:33 +00002061 self.chown(tarinfo, dirpath)
2062 self.utime(tarinfo, dirpath)
2063 self.chmod(tarinfo, dirpath)
Martin v. Löwis00a73e72005-03-04 19:40:34 +00002064 except ExtractError, e:
2065 if self.errorlevel > 1:
2066 raise
2067 else:
2068 self._dbg(1, "tarfile: %s" % e)
2069
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002070 def extract(self, member, path=""):
2071 """Extract a member from the archive to the current working directory,
2072 using its full name. Its file information is extracted as accurately
2073 as possible. `member' may be a filename or a TarInfo object. You can
2074 specify a different directory using `path'.
2075 """
2076 self._check("r")
2077
Lars Gustäbelc64e4022007-03-13 10:47:19 +00002078 if isinstance(member, basestring):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002079 tarinfo = self.getmember(member)
Lars Gustäbelc64e4022007-03-13 10:47:19 +00002080 else:
2081 tarinfo = member
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002082
Neal Norwitza4f651a2004-07-20 22:07:44 +00002083 # Prepare the link target for makelink().
2084 if tarinfo.islnk():
2085 tarinfo._link_target = os.path.join(path, tarinfo.linkname)
2086
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002087 try:
2088 self._extract_member(tarinfo, os.path.join(path, tarinfo.name))
2089 except EnvironmentError, e:
2090 if self.errorlevel > 0:
2091 raise
2092 else:
2093 if e.filename is None:
2094 self._dbg(1, "tarfile: %s" % e.strerror)
2095 else:
2096 self._dbg(1, "tarfile: %s %r" % (e.strerror, e.filename))
2097 except ExtractError, e:
2098 if self.errorlevel > 1:
2099 raise
2100 else:
2101 self._dbg(1, "tarfile: %s" % e)
2102
2103 def extractfile(self, member):
2104 """Extract a member from the archive as a file object. `member' may be
2105 a filename or a TarInfo object. If `member' is a regular file, a
2106 file-like object is returned. If `member' is a link, a file-like
2107 object is constructed from the link's target. If `member' is none of
2108 the above, None is returned.
2109 The file-like object is read-only and provides the following
2110 methods: read(), readline(), readlines(), seek() and tell()
2111 """
2112 self._check("r")
2113
Lars Gustäbelc64e4022007-03-13 10:47:19 +00002114 if isinstance(member, basestring):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002115 tarinfo = self.getmember(member)
Lars Gustäbelc64e4022007-03-13 10:47:19 +00002116 else:
2117 tarinfo = member
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002118
2119 if tarinfo.isreg():
2120 return self.fileobject(self, tarinfo)
2121
2122 elif tarinfo.type not in SUPPORTED_TYPES:
2123 # If a member's type is unknown, it is treated as a
2124 # regular file.
2125 return self.fileobject(self, tarinfo)
2126
2127 elif tarinfo.islnk() or tarinfo.issym():
2128 if isinstance(self.fileobj, _Stream):
2129 # A small but ugly workaround for the case that someone tries
2130 # to extract a (sym)link as a file-object from a non-seekable
2131 # stream of tar blocks.
Georg Brandle4751e32006-05-18 06:11:19 +00002132 raise StreamError("cannot extract (sym)link as file object")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002133 else:
Georg Brandl7eb4b7d2005-07-22 21:49:32 +00002134 # A (sym)link's file object is its target's file object.
Lars Gustäbel4da7d412010-06-03 12:34:14 +00002135 return self.extractfile(self._find_link_target(tarinfo))
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002136 else:
2137 # If there's no data associated with the member (directory, chrdev,
2138 # blkdev, etc.), return None instead of a file object.
2139 return None
2140
2141 def _extract_member(self, tarinfo, targetpath):
2142 """Extract the TarInfo object tarinfo to a physical
2143 file called targetpath.
2144 """
2145 # Fetch the TarInfo object for the given name
2146 # and build the destination pathname, replacing
2147 # forward slashes to platform specific separators.
Lars Gustäbelf7cda522009-08-28 19:23:44 +00002148 targetpath = targetpath.rstrip("/")
2149 targetpath = targetpath.replace("/", os.sep)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002150
2151 # Create all upper directories.
2152 upperdirs = os.path.dirname(targetpath)
2153 if upperdirs and not os.path.exists(upperdirs):
Lars Gustäbel0192e432008-02-05 11:51:40 +00002154 # Create directories that are not part of the archive with
2155 # default permissions.
Lars Gustäbeld2e22902007-01-23 11:17:33 +00002156 os.makedirs(upperdirs)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002157
2158 if tarinfo.islnk() or tarinfo.issym():
2159 self._dbg(1, "%s -> %s" % (tarinfo.name, tarinfo.linkname))
2160 else:
2161 self._dbg(1, tarinfo.name)
2162
2163 if tarinfo.isreg():
2164 self.makefile(tarinfo, targetpath)
2165 elif tarinfo.isdir():
2166 self.makedir(tarinfo, targetpath)
2167 elif tarinfo.isfifo():
2168 self.makefifo(tarinfo, targetpath)
2169 elif tarinfo.ischr() or tarinfo.isblk():
2170 self.makedev(tarinfo, targetpath)
2171 elif tarinfo.islnk() or tarinfo.issym():
2172 self.makelink(tarinfo, targetpath)
2173 elif tarinfo.type not in SUPPORTED_TYPES:
2174 self.makeunknown(tarinfo, targetpath)
2175 else:
2176 self.makefile(tarinfo, targetpath)
2177
2178 self.chown(tarinfo, targetpath)
2179 if not tarinfo.issym():
2180 self.chmod(tarinfo, targetpath)
2181 self.utime(tarinfo, targetpath)
2182
2183 #--------------------------------------------------------------------------
2184 # Below are the different file methods. They are called via
2185 # _extract_member() when extract() is called. They can be replaced in a
2186 # subclass to implement other functionality.
2187
2188 def makedir(self, tarinfo, targetpath):
2189 """Make a directory called targetpath.
2190 """
2191 try:
Lars Gustäbel0192e432008-02-05 11:51:40 +00002192 # Use a safe mode for the directory, the real mode is set
2193 # later in _extract_member().
2194 os.mkdir(targetpath, 0700)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002195 except EnvironmentError, e:
2196 if e.errno != errno.EEXIST:
2197 raise
2198
2199 def makefile(self, tarinfo, targetpath):
2200 """Make a file called targetpath.
2201 """
2202 source = self.extractfile(tarinfo)
Andrew Svetlovac26a2e2012-11-29 14:22:26 +02002203 try:
2204 with bltn_open(targetpath, "wb") as target:
2205 copyfileobj(source, target)
2206 finally:
2207 source.close()
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002208
2209 def makeunknown(self, tarinfo, targetpath):
2210 """Make a file from a TarInfo object with an unknown type
2211 at targetpath.
2212 """
2213 self.makefile(tarinfo, targetpath)
2214 self._dbg(1, "tarfile: Unknown file type %r, " \
2215 "extracted as regular file." % tarinfo.type)
2216
2217 def makefifo(self, tarinfo, targetpath):
2218 """Make a fifo called targetpath.
2219 """
2220 if hasattr(os, "mkfifo"):
2221 os.mkfifo(targetpath)
2222 else:
Georg Brandle4751e32006-05-18 06:11:19 +00002223 raise ExtractError("fifo not supported by system")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002224
2225 def makedev(self, tarinfo, targetpath):
2226 """Make a character or block device called targetpath.
2227 """
2228 if not hasattr(os, "mknod") or not hasattr(os, "makedev"):
Georg Brandle4751e32006-05-18 06:11:19 +00002229 raise ExtractError("special devices not supported by system")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002230
2231 mode = tarinfo.mode
2232 if tarinfo.isblk():
2233 mode |= stat.S_IFBLK
2234 else:
2235 mode |= stat.S_IFCHR
2236
2237 os.mknod(targetpath, mode,
2238 os.makedev(tarinfo.devmajor, tarinfo.devminor))
2239
2240 def makelink(self, tarinfo, targetpath):
2241 """Make a (symbolic) link called targetpath. If it cannot be created
2242 (platform limitation), we try to make a copy of the referenced file
2243 instead of a link.
2244 """
Lars Gustäbel4da7d412010-06-03 12:34:14 +00002245 if hasattr(os, "symlink") and hasattr(os, "link"):
2246 # For systems that support symbolic and hard links.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002247 if tarinfo.issym():
Senthil Kumaran4dd89ce2011-05-17 10:12:18 +08002248 if os.path.lexists(targetpath):
Senthil Kumaran011525e2011-04-28 15:30:31 +08002249 os.unlink(targetpath)
Lars Gustäbelf7cda522009-08-28 19:23:44 +00002250 os.symlink(tarinfo.linkname, targetpath)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002251 else:
Neal Norwitza4f651a2004-07-20 22:07:44 +00002252 # See extract().
Lars Gustäbel4da7d412010-06-03 12:34:14 +00002253 if os.path.exists(tarinfo._link_target):
Senthil Kumaran4dd89ce2011-05-17 10:12:18 +08002254 if os.path.lexists(targetpath):
2255 os.unlink(targetpath)
Lars Gustäbel4da7d412010-06-03 12:34:14 +00002256 os.link(tarinfo._link_target, targetpath)
2257 else:
2258 self._extract_member(self._find_link_target(tarinfo), targetpath)
2259 else:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002260 try:
Lars Gustäbel4da7d412010-06-03 12:34:14 +00002261 self._extract_member(self._find_link_target(tarinfo), targetpath)
2262 except KeyError:
2263 raise ExtractError("unable to resolve link inside archive")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002264
2265 def chown(self, tarinfo, targetpath):
2266 """Set owner of targetpath according to tarinfo.
2267 """
2268 if pwd and hasattr(os, "geteuid") and os.geteuid() == 0:
2269 # We have to be root to do so.
2270 try:
2271 g = grp.getgrnam(tarinfo.gname)[2]
2272 except KeyError:
Lars Gustäbel8babfdf2011-09-05 17:04:18 +02002273 g = tarinfo.gid
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002274 try:
2275 u = pwd.getpwnam(tarinfo.uname)[2]
2276 except KeyError:
Lars Gustäbel8babfdf2011-09-05 17:04:18 +02002277 u = tarinfo.uid
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002278 try:
2279 if tarinfo.issym() and hasattr(os, "lchown"):
2280 os.lchown(targetpath, u, g)
2281 else:
Andrew MacIntyre7970d202003-02-19 12:51:34 +00002282 if sys.platform != "os2emx":
2283 os.chown(targetpath, u, g)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002284 except EnvironmentError, e:
Georg Brandle4751e32006-05-18 06:11:19 +00002285 raise ExtractError("could not change owner")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002286
2287 def chmod(self, tarinfo, targetpath):
2288 """Set file permissions of targetpath according to tarinfo.
2289 """
Jack Jansen834eff62003-03-07 12:47:06 +00002290 if hasattr(os, 'chmod'):
2291 try:
2292 os.chmod(targetpath, tarinfo.mode)
2293 except EnvironmentError, e:
Georg Brandle4751e32006-05-18 06:11:19 +00002294 raise ExtractError("could not change mode")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002295
2296 def utime(self, tarinfo, targetpath):
2297 """Set modification time of targetpath according to tarinfo.
2298 """
Jack Jansen834eff62003-03-07 12:47:06 +00002299 if not hasattr(os, 'utime'):
Tim Petersf9347782003-03-07 15:36:41 +00002300 return
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002301 try:
2302 os.utime(targetpath, (tarinfo.mtime, tarinfo.mtime))
2303 except EnvironmentError, e:
Georg Brandle4751e32006-05-18 06:11:19 +00002304 raise ExtractError("could not change modification time")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002305
2306 #--------------------------------------------------------------------------
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002307 def next(self):
2308 """Return the next member of the archive as a TarInfo object, when
2309 TarFile is opened for reading. Return None if there is no more
2310 available.
2311 """
2312 self._check("ra")
2313 if self.firstmember is not None:
2314 m = self.firstmember
2315 self.firstmember = None
2316 return m
2317
2318 # Read the next block.
Andrew M. Kuchling864bba12004-07-10 22:02:11 +00002319 self.fileobj.seek(self.offset)
Lars Gustäbeldd866d52009-11-22 18:30:53 +00002320 tarinfo = None
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002321 while True:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002322 try:
Lars Gustäbelc64e4022007-03-13 10:47:19 +00002323 tarinfo = self.tarinfo.fromtarfile(self)
Lars Gustäbeldd866d52009-11-22 18:30:53 +00002324 except EOFHeaderError, e:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002325 if self.ignore_zeros:
Georg Brandlebbeed72006-12-19 22:06:46 +00002326 self._dbg(2, "0x%X: %s" % (self.offset, e))
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002327 self.offset += BLOCKSIZE
2328 continue
Lars Gustäbeldd866d52009-11-22 18:30:53 +00002329 except InvalidHeaderError, e:
2330 if self.ignore_zeros:
2331 self._dbg(2, "0x%X: %s" % (self.offset, e))
2332 self.offset += BLOCKSIZE
2333 continue
2334 elif self.offset == 0:
2335 raise ReadError(str(e))
2336 except EmptyHeaderError:
2337 if self.offset == 0:
2338 raise ReadError("empty file")
2339 except TruncatedHeaderError, e:
2340 if self.offset == 0:
2341 raise ReadError(str(e))
2342 except SubsequentHeaderError, e:
2343 raise ReadError(str(e))
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002344 break
2345
Lars Gustäbeldd866d52009-11-22 18:30:53 +00002346 if tarinfo is not None:
2347 self.members.append(tarinfo)
2348 else:
2349 self._loaded = True
2350
Georg Brandl38c6a222006-05-10 16:26:03 +00002351 return tarinfo
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002352
2353 #--------------------------------------------------------------------------
2354 # Little helper methods:
2355
Lars Gustäbel4da7d412010-06-03 12:34:14 +00002356 def _getmember(self, name, tarinfo=None, normalize=False):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002357 """Find an archive member by name from bottom to top.
2358 If tarinfo is given, it is used as the starting point.
2359 """
Martin v. Löwisf3c56112004-09-18 09:08:52 +00002360 # Ensure that all members have been loaded.
2361 members = self.getmembers()
2362
Lars Gustäbel4da7d412010-06-03 12:34:14 +00002363 # Limit the member search list up to tarinfo.
2364 if tarinfo is not None:
2365 members = members[:members.index(tarinfo)]
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002366
Lars Gustäbel4da7d412010-06-03 12:34:14 +00002367 if normalize:
2368 name = os.path.normpath(name)
2369
2370 for member in reversed(members):
2371 if normalize:
2372 member_name = os.path.normpath(member.name)
2373 else:
2374 member_name = member.name
2375
2376 if name == member_name:
2377 return member
Andrew M. Kuchling864bba12004-07-10 22:02:11 +00002378
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002379 def _load(self):
2380 """Read through the entire archive file and look for readable
2381 members.
2382 """
2383 while True:
2384 tarinfo = self.next()
2385 if tarinfo is None:
2386 break
2387 self._loaded = True
2388
2389 def _check(self, mode=None):
2390 """Check if TarFile is still open, and if the operation's mode
2391 corresponds to TarFile's mode.
2392 """
2393 if self.closed:
Georg Brandle4751e32006-05-18 06:11:19 +00002394 raise IOError("%s is closed" % self.__class__.__name__)
Lars Gustäbelc64e4022007-03-13 10:47:19 +00002395 if mode is not None and self.mode not in mode:
2396 raise IOError("bad operation for mode %r" % self.mode)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002397
Lars Gustäbel4da7d412010-06-03 12:34:14 +00002398 def _find_link_target(self, tarinfo):
2399 """Find the target member of a symlink or hardlink member in the
2400 archive.
2401 """
2402 if tarinfo.issym():
2403 # Always search the entire archive.
Lars Gustäbel231d4742012-04-24 22:42:08 +02002404 linkname = "/".join(filter(None, (os.path.dirname(tarinfo.name), tarinfo.linkname)))
Lars Gustäbel4da7d412010-06-03 12:34:14 +00002405 limit = None
2406 else:
2407 # Search the archive before the link, because a hard link is
2408 # just a reference to an already archived file.
2409 linkname = tarinfo.linkname
2410 limit = tarinfo
2411
2412 member = self._getmember(linkname, tarinfo=limit, normalize=True)
2413 if member is None:
2414 raise KeyError("linkname %r not found" % linkname)
2415 return member
2416
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002417 def __iter__(self):
2418 """Provide an iterator object.
2419 """
2420 if self._loaded:
2421 return iter(self.members)
2422 else:
2423 return TarIter(self)
2424
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002425 def _dbg(self, level, msg):
2426 """Write debugging output to sys.stderr.
2427 """
2428 if level <= self.debug:
2429 print >> sys.stderr, msg
Lars Gustäbel64581042010-03-03 11:55:48 +00002430
2431 def __enter__(self):
2432 self._check()
2433 return self
2434
2435 def __exit__(self, type, value, traceback):
2436 if type is None:
2437 self.close()
2438 else:
2439 # An exception occurred. We must not call close() because
2440 # it would try to write end-of-archive blocks and padding.
2441 if not self._extfileobj:
2442 self.fileobj.close()
2443 self.closed = True
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002444# class TarFile
2445
2446class TarIter:
2447 """Iterator Class.
2448
2449 for tarinfo in TarFile(...):
2450 suite...
2451 """
2452
2453 def __init__(self, tarfile):
2454 """Construct a TarIter object.
2455 """
2456 self.tarfile = tarfile
Martin v. Löwis637431b2005-03-03 23:12:42 +00002457 self.index = 0
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002458 def __iter__(self):
2459 """Return iterator object.
2460 """
2461 return self
2462 def next(self):
2463 """Return the next item using TarFile's next() method.
2464 When all members have been read, set TarFile as _loaded.
2465 """
Martin v. Löwis637431b2005-03-03 23:12:42 +00002466 # Fix for SF #1100429: Under rare circumstances it can
2467 # happen that getmembers() is called during iteration,
2468 # which will cause TarIter to stop prematurely.
Serhiy Storchakace34ba62013-05-09 14:22:05 +03002469
2470 if self.index == 0 and self.tarfile.firstmember is not None:
2471 tarinfo = self.tarfile.next()
2472 elif self.index < len(self.tarfile.members):
2473 tarinfo = self.tarfile.members[self.index]
2474 elif not self.tarfile._loaded:
Martin v. Löwis637431b2005-03-03 23:12:42 +00002475 tarinfo = self.tarfile.next()
2476 if not tarinfo:
2477 self.tarfile._loaded = True
2478 raise StopIteration
2479 else:
Serhiy Storchakace34ba62013-05-09 14:22:05 +03002480 raise StopIteration
Martin v. Löwis637431b2005-03-03 23:12:42 +00002481 self.index += 1
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002482 return tarinfo
2483
2484# Helper classes for sparse file support
2485class _section:
2486 """Base class for _data and _hole.
2487 """
2488 def __init__(self, offset, size):
2489 self.offset = offset
2490 self.size = size
2491 def __contains__(self, offset):
2492 return self.offset <= offset < self.offset + self.size
2493
2494class _data(_section):
2495 """Represent a data section in a sparse file.
2496 """
2497 def __init__(self, offset, size, realpos):
2498 _section.__init__(self, offset, size)
2499 self.realpos = realpos
2500
2501class _hole(_section):
2502 """Represent a hole section in a sparse file.
2503 """
2504 pass
2505
2506class _ringbuffer(list):
2507 """Ringbuffer class which increases performance
2508 over a regular list.
2509 """
2510 def __init__(self):
2511 self.idx = 0
2512 def find(self, offset):
2513 idx = self.idx
2514 while True:
2515 item = self[idx]
2516 if offset in item:
2517 break
2518 idx += 1
2519 if idx == len(self):
2520 idx = 0
2521 if idx == self.idx:
2522 # End of File
2523 return None
2524 self.idx = idx
2525 return item
2526
2527#---------------------------------------------
2528# zipfile compatible TarFile class
2529#---------------------------------------------
2530TAR_PLAIN = 0 # zipfile.ZIP_STORED
2531TAR_GZIPPED = 8 # zipfile.ZIP_DEFLATED
2532class TarFileCompat:
2533 """TarFile class compatible with standard module zipfile's
2534 ZipFile class.
2535 """
2536 def __init__(self, file, mode="r", compression=TAR_PLAIN):
Lars Gustäbel727bd0b2008-08-02 11:26:39 +00002537 from warnings import warnpy3k
2538 warnpy3k("the TarFileCompat class has been removed in Python 3.0",
2539 stacklevel=2)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002540 if compression == TAR_PLAIN:
2541 self.tarfile = TarFile.taropen(file, mode)
2542 elif compression == TAR_GZIPPED:
2543 self.tarfile = TarFile.gzopen(file, mode)
2544 else:
Georg Brandle4751e32006-05-18 06:11:19 +00002545 raise ValueError("unknown compression constant")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002546 if mode[0:1] == "r":
2547 members = self.tarfile.getmembers()
Raymond Hettingera1d09e22005-09-11 16:34:05 +00002548 for m in members:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002549 m.filename = m.name
2550 m.file_size = m.size
2551 m.date_time = time.gmtime(m.mtime)[:6]
2552 def namelist(self):
2553 return map(lambda m: m.name, self.infolist())
2554 def infolist(self):
2555 return filter(lambda m: m.type in REGULAR_TYPES,
2556 self.tarfile.getmembers())
2557 def printdir(self):
2558 self.tarfile.list()
2559 def testzip(self):
2560 return
2561 def getinfo(self, name):
2562 return self.tarfile.getmember(name)
2563 def read(self, name):
2564 return self.tarfile.extractfile(self.tarfile.getmember(name)).read()
2565 def write(self, filename, arcname=None, compress_type=None):
2566 self.tarfile.add(filename, arcname)
2567 def writestr(self, zinfo, bytes):
Raymond Hettingera6172712004-12-31 19:15:26 +00002568 try:
2569 from cStringIO import StringIO
2570 except ImportError:
2571 from StringIO import StringIO
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002572 import calendar
Lars Gustäbel727bd0b2008-08-02 11:26:39 +00002573 tinfo = TarInfo(zinfo.filename)
2574 tinfo.size = len(bytes)
2575 tinfo.mtime = calendar.timegm(zinfo.date_time)
2576 self.tarfile.addfile(tinfo, StringIO(bytes))
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002577 def close(self):
2578 self.tarfile.close()
2579#class TarFileCompat
2580
2581#--------------------
2582# exported functions
2583#--------------------
2584def is_tarfile(name):
2585 """Return True if name points to a tar archive that we
2586 are able to handle, else return False.
2587 """
2588 try:
2589 t = open(name)
2590 t.close()
2591 return True
2592 except TarError:
2593 return False
2594
Brett Cannon6cef0762007-05-25 20:17:15 +00002595bltn_open = open
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002596open = TarFile.open