blob: 16a6e86dce1cd0264b1382b7e31c2a0e823bcc17 [file] [log] [blame]
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001#!/usr/bin/env python
2# -*- coding: iso-8859-1 -*-
3#-------------------------------------------------------------------
4# tarfile.py
5#-------------------------------------------------------------------
6# Copyright (C) 2002 Lars Gustäbel <lars@gustaebel.de>
7# All rights reserved.
8#
9# Permission is hereby granted, free of charge, to any person
10# obtaining a copy of this software and associated documentation
11# files (the "Software"), to deal in the Software without
12# restriction, including without limitation the rights to use,
13# copy, modify, merge, publish, distribute, sublicense, and/or sell
14# copies of the Software, and to permit persons to whom the
15# Software is furnished to do so, subject to the following
16# conditions:
17#
18# The above copyright notice and this permission notice shall be
19# included in all copies or substantial portions of the Software.
20#
21# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
22# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
23# OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
24# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
25# HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
26# WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
27# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
28# OTHER DEALINGS IN THE SOFTWARE.
29#
30"""Read from and write to tar format archives.
31"""
32
Senthil Kumaran4af1c6a2011-07-28 22:30:27 +080033__version__ = "$Revision: 85213 $"
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +000034# $Source$
35
Lars Gustäbelc64e4022007-03-13 10:47:19 +000036version = "0.9.0"
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +000037__author__ = "Lars Gustäbel (lars@gustaebel.de)"
38__date__ = "$Date$"
39__cvsid__ = "$Id$"
40__credits__ = "Gustavo Niemeyer, Niels Gustäbel, Richard Townsend."
41
42#---------
43# Imports
44#---------
45import sys
46import os
47import shutil
48import stat
49import errno
50import time
51import struct
Georg Brandl3354f282006-10-29 09:16:12 +000052import copy
Lars Gustäbelc64e4022007-03-13 10:47:19 +000053import re
Brett Cannon132fc542008-08-04 21:23:07 +000054import operator
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +000055
56try:
57 import grp, pwd
58except ImportError:
59 grp = pwd = None
60
61# from tarfile import *
62__all__ = ["TarFile", "TarInfo", "is_tarfile", "TarError"]
63
64#---------------------------------------------------------
65# tar constants
66#---------------------------------------------------------
Lars Gustäbelc64e4022007-03-13 10:47:19 +000067NUL = "\0" # the null character
68BLOCKSIZE = 512 # length of processing blocks
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +000069RECORDSIZE = BLOCKSIZE * 20 # length of records
Lars Gustäbelc64e4022007-03-13 10:47:19 +000070GNU_MAGIC = "ustar \0" # magic gnu tar string
71POSIX_MAGIC = "ustar\x0000" # magic posix tar string
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +000072
Lars Gustäbelc64e4022007-03-13 10:47:19 +000073LENGTH_NAME = 100 # maximum length of a filename
74LENGTH_LINK = 100 # maximum length of a linkname
75LENGTH_PREFIX = 155 # maximum length of the prefix field
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +000076
Lars Gustäbelc64e4022007-03-13 10:47:19 +000077REGTYPE = "0" # regular file
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +000078AREGTYPE = "\0" # regular file
Lars Gustäbelc64e4022007-03-13 10:47:19 +000079LNKTYPE = "1" # link (inside tarfile)
80SYMTYPE = "2" # symbolic link
81CHRTYPE = "3" # character special device
82BLKTYPE = "4" # block special device
83DIRTYPE = "5" # directory
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +000084FIFOTYPE = "6" # fifo special device
85CONTTYPE = "7" # contiguous file
86
Lars Gustäbelc64e4022007-03-13 10:47:19 +000087GNUTYPE_LONGNAME = "L" # GNU tar longname
88GNUTYPE_LONGLINK = "K" # GNU tar longlink
89GNUTYPE_SPARSE = "S" # GNU tar sparse file
90
91XHDTYPE = "x" # POSIX.1-2001 extended header
92XGLTYPE = "g" # POSIX.1-2001 global header
93SOLARIS_XHDTYPE = "X" # Solaris extended header
94
95USTAR_FORMAT = 0 # POSIX.1-1988 (ustar) format
96GNU_FORMAT = 1 # GNU tar format
97PAX_FORMAT = 2 # POSIX.1-2001 (pax) format
98DEFAULT_FORMAT = GNU_FORMAT
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +000099
100#---------------------------------------------------------
101# tarfile constants
102#---------------------------------------------------------
Lars Gustäbelc64e4022007-03-13 10:47:19 +0000103# File types that tarfile supports:
104SUPPORTED_TYPES = (REGTYPE, AREGTYPE, LNKTYPE,
105 SYMTYPE, DIRTYPE, FIFOTYPE,
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000106 CONTTYPE, CHRTYPE, BLKTYPE,
107 GNUTYPE_LONGNAME, GNUTYPE_LONGLINK,
108 GNUTYPE_SPARSE)
109
Lars Gustäbelc64e4022007-03-13 10:47:19 +0000110# File types that will be treated as a regular file.
111REGULAR_TYPES = (REGTYPE, AREGTYPE,
112 CONTTYPE, GNUTYPE_SPARSE)
113
114# File types that are part of the GNU tar format.
115GNU_TYPES = (GNUTYPE_LONGNAME, GNUTYPE_LONGLINK,
116 GNUTYPE_SPARSE)
117
118# Fields from a pax header that override a TarInfo attribute.
119PAX_FIELDS = ("path", "linkpath", "size", "mtime",
120 "uid", "gid", "uname", "gname")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000121
Lars Gustäbela0fcb932007-05-27 19:49:30 +0000122# Fields in a pax header that are numbers, all other fields
123# are treated as strings.
124PAX_NUMBER_FIELDS = {
125 "atime": float,
126 "ctime": float,
127 "mtime": float,
128 "uid": int,
129 "gid": int,
130 "size": int
131}
132
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000133#---------------------------------------------------------
134# Bits used in the mode field, values in octal.
135#---------------------------------------------------------
136S_IFLNK = 0120000 # symbolic link
137S_IFREG = 0100000 # regular file
138S_IFBLK = 0060000 # block device
139S_IFDIR = 0040000 # directory
140S_IFCHR = 0020000 # character device
141S_IFIFO = 0010000 # fifo
142
143TSUID = 04000 # set UID on execution
144TSGID = 02000 # set GID on execution
145TSVTX = 01000 # reserved
146
147TUREAD = 0400 # read by owner
148TUWRITE = 0200 # write by owner
149TUEXEC = 0100 # execute/search by owner
150TGREAD = 0040 # read by group
151TGWRITE = 0020 # write by group
152TGEXEC = 0010 # execute/search by group
153TOREAD = 0004 # read by other
154TOWRITE = 0002 # write by other
155TOEXEC = 0001 # execute/search by other
156
157#---------------------------------------------------------
Lars Gustäbelc64e4022007-03-13 10:47:19 +0000158# initialization
159#---------------------------------------------------------
160ENCODING = sys.getfilesystemencoding()
161if ENCODING is None:
Lars Gustäbela0fcb932007-05-27 19:49:30 +0000162 ENCODING = sys.getdefaultencoding()
Lars Gustäbelc64e4022007-03-13 10:47:19 +0000163
164#---------------------------------------------------------
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000165# Some useful functions
166#---------------------------------------------------------
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000167
Georg Brandl38c6a222006-05-10 16:26:03 +0000168def stn(s, length):
169 """Convert a python string to a null-terminated string buffer.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000170 """
Georg Brandla32e0a02006-10-24 16:54:16 +0000171 return s[:length] + (length - len(s)) * NUL
Georg Brandl38c6a222006-05-10 16:26:03 +0000172
Lars Gustäbelc64e4022007-03-13 10:47:19 +0000173def nts(s):
174 """Convert a null-terminated string field to a python string.
175 """
176 # Use the string up to the first null char.
177 p = s.find("\0")
178 if p == -1:
179 return s
180 return s[:p]
181
Georg Brandl38c6a222006-05-10 16:26:03 +0000182def nti(s):
183 """Convert a number field to a python number.
184 """
185 # There are two possible encodings for a number field, see
186 # itn() below.
187 if s[0] != chr(0200):
Georg Brandlded1c4d2006-12-20 11:55:16 +0000188 try:
Lars Gustäbelc64e4022007-03-13 10:47:19 +0000189 n = int(nts(s) or "0", 8)
Georg Brandlded1c4d2006-12-20 11:55:16 +0000190 except ValueError:
Lars Gustäbeldd866d52009-11-22 18:30:53 +0000191 raise InvalidHeaderError("invalid header")
Georg Brandl38c6a222006-05-10 16:26:03 +0000192 else:
193 n = 0L
194 for i in xrange(len(s) - 1):
195 n <<= 8
196 n += ord(s[i + 1])
197 return n
198
Lars Gustäbelc64e4022007-03-13 10:47:19 +0000199def itn(n, digits=8, format=DEFAULT_FORMAT):
Georg Brandl38c6a222006-05-10 16:26:03 +0000200 """Convert a python number to a number field.
201 """
202 # POSIX 1003.1-1988 requires numbers to be encoded as a string of
203 # octal digits followed by a null-byte, this allows values up to
204 # (8**(digits-1))-1. GNU tar allows storing numbers greater than
205 # that if necessary. A leading 0200 byte indicates this particular
206 # encoding, the following digits-1 bytes are a big-endian
207 # representation. This allows values up to (256**(digits-1))-1.
208 if 0 <= n < 8 ** (digits - 1):
209 s = "%0*o" % (digits - 1, n) + NUL
210 else:
Lars Gustäbelc64e4022007-03-13 10:47:19 +0000211 if format != GNU_FORMAT or n >= 256 ** (digits - 1):
Georg Brandle4751e32006-05-18 06:11:19 +0000212 raise ValueError("overflow in number field")
Georg Brandl38c6a222006-05-10 16:26:03 +0000213
214 if n < 0:
215 # XXX We mimic GNU tar's behaviour with negative numbers,
216 # this could raise OverflowError.
217 n = struct.unpack("L", struct.pack("l", n))[0]
218
219 s = ""
220 for i in xrange(digits - 1):
221 s = chr(n & 0377) + s
222 n >>= 8
223 s = chr(0200) + s
224 return s
225
Lars Gustäbela0fcb932007-05-27 19:49:30 +0000226def uts(s, encoding, errors):
227 """Convert a unicode object to a string.
228 """
229 if errors == "utf-8":
230 # An extra error handler similar to the -o invalid=UTF-8 option
231 # in POSIX.1-2001. Replace untranslatable characters with their
232 # UTF-8 representation.
233 try:
234 return s.encode(encoding, "strict")
235 except UnicodeEncodeError:
236 x = []
237 for c in s:
238 try:
239 x.append(c.encode(encoding, "strict"))
240 except UnicodeEncodeError:
241 x.append(c.encode("utf8"))
242 return "".join(x)
243 else:
244 return s.encode(encoding, errors)
245
Georg Brandl38c6a222006-05-10 16:26:03 +0000246def calc_chksums(buf):
247 """Calculate the checksum for a member's header by summing up all
248 characters except for the chksum field which is treated as if
249 it was filled with spaces. According to the GNU tar sources,
250 some tars (Sun and NeXT) calculate chksum with signed char,
251 which will be different if there are chars in the buffer with
252 the high bit set. So we calculate two checksums, unsigned and
253 signed.
254 """
255 unsigned_chksum = 256 + sum(struct.unpack("148B", buf[:148]) + struct.unpack("356B", buf[156:512]))
256 signed_chksum = 256 + sum(struct.unpack("148b", buf[:148]) + struct.unpack("356b", buf[156:512]))
257 return unsigned_chksum, signed_chksum
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000258
259def copyfileobj(src, dst, length=None):
260 """Copy length bytes from fileobj src to fileobj dst.
261 If length is None, copy the entire content.
262 """
263 if length == 0:
264 return
265 if length is None:
266 shutil.copyfileobj(src, dst)
267 return
268
269 BUFSIZE = 16 * 1024
270 blocks, remainder = divmod(length, BUFSIZE)
271 for b in xrange(blocks):
272 buf = src.read(BUFSIZE)
273 if len(buf) < BUFSIZE:
Georg Brandle4751e32006-05-18 06:11:19 +0000274 raise IOError("end of file reached")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000275 dst.write(buf)
276
277 if remainder != 0:
278 buf = src.read(remainder)
279 if len(buf) < remainder:
Georg Brandle4751e32006-05-18 06:11:19 +0000280 raise IOError("end of file reached")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000281 dst.write(buf)
282 return
283
284filemode_table = (
Andrew M. Kuchling8bc462f2004-10-20 11:48:42 +0000285 ((S_IFLNK, "l"),
286 (S_IFREG, "-"),
287 (S_IFBLK, "b"),
288 (S_IFDIR, "d"),
289 (S_IFCHR, "c"),
290 (S_IFIFO, "p")),
291
292 ((TUREAD, "r"),),
293 ((TUWRITE, "w"),),
294 ((TUEXEC|TSUID, "s"),
295 (TSUID, "S"),
296 (TUEXEC, "x")),
297
298 ((TGREAD, "r"),),
299 ((TGWRITE, "w"),),
300 ((TGEXEC|TSGID, "s"),
301 (TSGID, "S"),
302 (TGEXEC, "x")),
303
304 ((TOREAD, "r"),),
305 ((TOWRITE, "w"),),
306 ((TOEXEC|TSVTX, "t"),
307 (TSVTX, "T"),
308 (TOEXEC, "x"))
309)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000310
311def filemode(mode):
312 """Convert a file's mode to a string of the form
313 -rwxrwxrwx.
314 Used by TarFile.list()
315 """
Andrew M. Kuchling8bc462f2004-10-20 11:48:42 +0000316 perm = []
317 for table in filemode_table:
318 for bit, char in table:
319 if mode & bit == bit:
320 perm.append(char)
321 break
322 else:
323 perm.append("-")
324 return "".join(perm)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000325
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000326class TarError(Exception):
327 """Base exception."""
328 pass
329class ExtractError(TarError):
330 """General exception for extract errors."""
331 pass
332class ReadError(TarError):
333 """Exception for unreadble tar archives."""
334 pass
335class CompressionError(TarError):
336 """Exception for unavailable compression methods."""
337 pass
338class StreamError(TarError):
339 """Exception for unsupported operations on stream-like TarFiles."""
340 pass
Georg Brandlebbeed72006-12-19 22:06:46 +0000341class HeaderError(TarError):
Lars Gustäbeldd866d52009-11-22 18:30:53 +0000342 """Base exception for header errors."""
343 pass
344class EmptyHeaderError(HeaderError):
345 """Exception for empty headers."""
346 pass
347class TruncatedHeaderError(HeaderError):
348 """Exception for truncated headers."""
349 pass
350class EOFHeaderError(HeaderError):
351 """Exception for end of file headers."""
352 pass
353class InvalidHeaderError(HeaderError):
Georg Brandlebbeed72006-12-19 22:06:46 +0000354 """Exception for invalid headers."""
355 pass
Lars Gustäbeldd866d52009-11-22 18:30:53 +0000356class SubsequentHeaderError(HeaderError):
357 """Exception for missing and invalid extended headers."""
358 pass
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000359
360#---------------------------
361# internal stream interface
362#---------------------------
363class _LowLevelFile:
364 """Low-level file object. Supports reading and writing.
365 It is used instead of a regular file object for streaming
366 access.
367 """
368
369 def __init__(self, name, mode):
370 mode = {
371 "r": os.O_RDONLY,
372 "w": os.O_WRONLY | os.O_CREAT | os.O_TRUNC,
373 }[mode]
374 if hasattr(os, "O_BINARY"):
375 mode |= os.O_BINARY
Lars Gustäbel5c4c4612010-04-29 15:23:38 +0000376 self.fd = os.open(name, mode, 0666)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000377
378 def close(self):
379 os.close(self.fd)
380
381 def read(self, size):
382 return os.read(self.fd, size)
383
384 def write(self, s):
385 os.write(self.fd, s)
386
387class _Stream:
388 """Class that serves as an adapter between TarFile and
389 a stream-like object. The stream-like object only
390 needs to have a read() or write() method and is accessed
391 blockwise. Use of gzip or bzip2 compression is possible.
392 A stream-like object could be for example: sys.stdin,
393 sys.stdout, a socket, a tape device etc.
394
395 _Stream is intended to be used only internally.
396 """
397
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000398 def __init__(self, name, mode, comptype, fileobj, bufsize):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000399 """Construct a _Stream object.
400 """
401 self._extfileobj = True
402 if fileobj is None:
403 fileobj = _LowLevelFile(name, mode)
404 self._extfileobj = False
405
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000406 if comptype == '*':
407 # Enable transparent compression detection for the
408 # stream interface
409 fileobj = _StreamProxy(fileobj)
410 comptype = fileobj.getcomptype()
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000411
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000412 self.name = name or ""
413 self.mode = mode
414 self.comptype = comptype
415 self.fileobj = fileobj
416 self.bufsize = bufsize
417 self.buf = ""
418 self.pos = 0L
419 self.closed = False
420
421 if comptype == "gz":
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000422 try:
423 import zlib
424 except ImportError:
Georg Brandle4751e32006-05-18 06:11:19 +0000425 raise CompressionError("zlib module is not available")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000426 self.zlib = zlib
Gregory P. Smith88440962008-03-25 06:12:45 +0000427 self.crc = zlib.crc32("") & 0xffffffffL
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000428 if mode == "r":
429 self._init_read_gz()
430 else:
431 self._init_write_gz()
432
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000433 if comptype == "bz2":
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000434 try:
435 import bz2
436 except ImportError:
Georg Brandle4751e32006-05-18 06:11:19 +0000437 raise CompressionError("bz2 module is not available")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000438 if mode == "r":
439 self.dbuf = ""
440 self.cmp = bz2.BZ2Decompressor()
441 else:
442 self.cmp = bz2.BZ2Compressor()
443
444 def __del__(self):
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000445 if hasattr(self, "closed") and not self.closed:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000446 self.close()
447
448 def _init_write_gz(self):
449 """Initialize for writing with gzip compression.
450 """
451 self.cmp = self.zlib.compressobj(9, self.zlib.DEFLATED,
452 -self.zlib.MAX_WBITS,
453 self.zlib.DEF_MEM_LEVEL,
454 0)
455 timestamp = struct.pack("<L", long(time.time()))
456 self.__write("\037\213\010\010%s\002\377" % timestamp)
Lars Gustäbel7d4d0742011-12-21 19:27:50 +0100457 if type(self.name) is unicode:
458 self.name = self.name.encode("iso-8859-1", "replace")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000459 if self.name.endswith(".gz"):
460 self.name = self.name[:-3]
461 self.__write(self.name + NUL)
462
463 def write(self, s):
464 """Write string s to the stream.
465 """
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000466 if self.comptype == "gz":
Gregory P. Smith88440962008-03-25 06:12:45 +0000467 self.crc = self.zlib.crc32(s, self.crc) & 0xffffffffL
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000468 self.pos += len(s)
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000469 if self.comptype != "tar":
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000470 s = self.cmp.compress(s)
471 self.__write(s)
472
473 def __write(self, s):
474 """Write string s to the stream if a whole new block
475 is ready to be written.
476 """
477 self.buf += s
478 while len(self.buf) > self.bufsize:
479 self.fileobj.write(self.buf[:self.bufsize])
480 self.buf = self.buf[self.bufsize:]
481
482 def close(self):
483 """Close the _Stream object. No operation should be
484 done on it afterwards.
485 """
486 if self.closed:
487 return
488
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000489 if self.mode == "w" and self.comptype != "tar":
Martin v. Löwisc234a522004-08-22 21:28:33 +0000490 self.buf += self.cmp.flush()
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000491
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000492 if self.mode == "w" and self.buf:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000493 self.fileobj.write(self.buf)
494 self.buf = ""
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000495 if self.comptype == "gz":
Tim Petersa05f6e22006-08-02 05:20:08 +0000496 # The native zlib crc is an unsigned 32-bit integer, but
497 # the Python wrapper implicitly casts that to a signed C
498 # long. So, on a 32-bit box self.crc may "look negative",
499 # while the same crc on a 64-bit box may "look positive".
500 # To avoid irksome warnings from the `struct` module, force
501 # it to look positive on all boxes.
502 self.fileobj.write(struct.pack("<L", self.crc & 0xffffffffL))
Andrew M. Kuchling10a44492003-10-24 17:38:34 +0000503 self.fileobj.write(struct.pack("<L", self.pos & 0xffffFFFFL))
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000504
505 if not self._extfileobj:
506 self.fileobj.close()
507
508 self.closed = True
509
510 def _init_read_gz(self):
511 """Initialize for reading a gzip compressed fileobj.
512 """
513 self.cmp = self.zlib.decompressobj(-self.zlib.MAX_WBITS)
514 self.dbuf = ""
515
516 # taken from gzip.GzipFile with some alterations
517 if self.__read(2) != "\037\213":
Georg Brandle4751e32006-05-18 06:11:19 +0000518 raise ReadError("not a gzip file")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000519 if self.__read(1) != "\010":
Georg Brandle4751e32006-05-18 06:11:19 +0000520 raise CompressionError("unsupported compression method")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000521
522 flag = ord(self.__read(1))
523 self.__read(6)
524
525 if flag & 4:
526 xlen = ord(self.__read(1)) + 256 * ord(self.__read(1))
527 self.read(xlen)
528 if flag & 8:
529 while True:
530 s = self.__read(1)
531 if not s or s == NUL:
532 break
533 if flag & 16:
534 while True:
535 s = self.__read(1)
536 if not s or s == NUL:
537 break
538 if flag & 2:
539 self.__read(2)
540
541 def tell(self):
542 """Return the stream's file pointer position.
543 """
544 return self.pos
545
546 def seek(self, pos=0):
547 """Set the stream's file pointer to pos. Negative seeking
548 is forbidden.
549 """
550 if pos - self.pos >= 0:
551 blocks, remainder = divmod(pos - self.pos, self.bufsize)
552 for i in xrange(blocks):
553 self.read(self.bufsize)
554 self.read(remainder)
555 else:
Georg Brandle4751e32006-05-18 06:11:19 +0000556 raise StreamError("seeking backwards is not allowed")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000557 return self.pos
558
559 def read(self, size=None):
560 """Return the next size number of bytes from the stream.
561 If size is not defined, return all bytes of the stream
562 up to EOF.
563 """
564 if size is None:
565 t = []
566 while True:
567 buf = self._read(self.bufsize)
568 if not buf:
569 break
570 t.append(buf)
571 buf = "".join(t)
572 else:
573 buf = self._read(size)
574 self.pos += len(buf)
575 return buf
576
577 def _read(self, size):
578 """Return size bytes from the stream.
579 """
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000580 if self.comptype == "tar":
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000581 return self.__read(size)
582
583 c = len(self.dbuf)
584 t = [self.dbuf]
585 while c < size:
586 buf = self.__read(self.bufsize)
587 if not buf:
588 break
Lars Gustäbelc64e4022007-03-13 10:47:19 +0000589 try:
590 buf = self.cmp.decompress(buf)
591 except IOError:
592 raise ReadError("invalid compressed data")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000593 t.append(buf)
594 c += len(buf)
595 t = "".join(t)
596 self.dbuf = t[size:]
597 return t[:size]
598
599 def __read(self, size):
600 """Return size bytes from stream. If internal buffer is empty,
601 read another block from the stream.
602 """
603 c = len(self.buf)
604 t = [self.buf]
605 while c < size:
606 buf = self.fileobj.read(self.bufsize)
607 if not buf:
608 break
609 t.append(buf)
610 c += len(buf)
611 t = "".join(t)
612 self.buf = t[size:]
613 return t[:size]
614# class _Stream
615
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000616class _StreamProxy(object):
617 """Small proxy class that enables transparent compression
618 detection for the Stream interface (mode 'r|*').
619 """
620
621 def __init__(self, fileobj):
622 self.fileobj = fileobj
623 self.buf = self.fileobj.read(BLOCKSIZE)
624
625 def read(self, size):
626 self.read = self.fileobj.read
627 return self.buf
628
629 def getcomptype(self):
630 if self.buf.startswith("\037\213\010"):
631 return "gz"
Lars Gustäbel9a388632011-12-06 13:07:09 +0100632 if self.buf[0:3] == "BZh" and self.buf[4:10] == "1AY&SY":
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000633 return "bz2"
634 return "tar"
635
636 def close(self):
637 self.fileobj.close()
638# class StreamProxy
639
Georg Brandl49c8f4c2006-05-15 19:30:35 +0000640class _BZ2Proxy(object):
641 """Small proxy class that enables external file object
642 support for "r:bz2" and "w:bz2" modes. This is actually
643 a workaround for a limitation in bz2 module's BZ2File
644 class which (unlike gzip.GzipFile) has no support for
645 a file object argument.
646 """
647
648 blocksize = 16 * 1024
649
650 def __init__(self, fileobj, mode):
651 self.fileobj = fileobj
652 self.mode = mode
Lars Gustäbelc64e4022007-03-13 10:47:19 +0000653 self.name = getattr(self.fileobj, "name", None)
Georg Brandl49c8f4c2006-05-15 19:30:35 +0000654 self.init()
655
656 def init(self):
657 import bz2
658 self.pos = 0
659 if self.mode == "r":
660 self.bz2obj = bz2.BZ2Decompressor()
661 self.fileobj.seek(0)
662 self.buf = ""
663 else:
664 self.bz2obj = bz2.BZ2Compressor()
665
666 def read(self, size):
667 b = [self.buf]
668 x = len(self.buf)
669 while x < size:
Lars Gustäbel2020a592009-03-22 20:09:33 +0000670 raw = self.fileobj.read(self.blocksize)
671 if not raw:
Georg Brandl49c8f4c2006-05-15 19:30:35 +0000672 break
Lars Gustäbel2020a592009-03-22 20:09:33 +0000673 data = self.bz2obj.decompress(raw)
674 b.append(data)
Georg Brandl49c8f4c2006-05-15 19:30:35 +0000675 x += len(data)
676 self.buf = "".join(b)
677
678 buf = self.buf[:size]
679 self.buf = self.buf[size:]
680 self.pos += len(buf)
681 return buf
682
683 def seek(self, pos):
684 if pos < self.pos:
685 self.init()
686 self.read(pos - self.pos)
687
688 def tell(self):
689 return self.pos
690
691 def write(self, data):
692 self.pos += len(data)
693 raw = self.bz2obj.compress(data)
694 self.fileobj.write(raw)
695
696 def close(self):
697 if self.mode == "w":
698 raw = self.bz2obj.flush()
699 self.fileobj.write(raw)
Georg Brandl49c8f4c2006-05-15 19:30:35 +0000700# class _BZ2Proxy
701
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000702#------------------------
703# Extraction file object
704#------------------------
Lars Gustäbel6baa5022006-12-23 16:40:13 +0000705class _FileInFile(object):
706 """A thin wrapper around an existing file object that
707 provides a part of its data as an individual file
708 object.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000709 """
710
Lars Gustäbel6baa5022006-12-23 16:40:13 +0000711 def __init__(self, fileobj, offset, size, sparse=None):
712 self.fileobj = fileobj
713 self.offset = offset
714 self.size = size
715 self.sparse = sparse
716 self.position = 0
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000717
Lars Gustäbel6baa5022006-12-23 16:40:13 +0000718 def tell(self):
719 """Return the current file position.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000720 """
Lars Gustäbel6baa5022006-12-23 16:40:13 +0000721 return self.position
722
723 def seek(self, position):
724 """Seek to a position in the file.
725 """
726 self.position = position
727
728 def read(self, size=None):
729 """Read data from the file.
730 """
731 if size is None:
732 size = self.size - self.position
733 else:
734 size = min(size, self.size - self.position)
735
736 if self.sparse is None:
737 return self.readnormal(size)
738 else:
739 return self.readsparse(size)
740
741 def readnormal(self, size):
742 """Read operation for regular files.
743 """
744 self.fileobj.seek(self.offset + self.position)
745 self.position += size
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000746 return self.fileobj.read(size)
747
Lars Gustäbel6baa5022006-12-23 16:40:13 +0000748 def readsparse(self, size):
749 """Read operation for sparse files.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000750 """
Lars Gustäbel6baa5022006-12-23 16:40:13 +0000751 data = []
752 while size > 0:
753 buf = self.readsparsesection(size)
754 if not buf:
755 break
756 size -= len(buf)
757 data.append(buf)
758 return "".join(data)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000759
Lars Gustäbel6baa5022006-12-23 16:40:13 +0000760 def readsparsesection(self, size):
761 """Read a single section of a sparse file.
762 """
763 section = self.sparse.find(self.position)
764
765 if section is None:
766 return ""
767
768 size = min(size, section.offset + section.size - self.position)
769
770 if isinstance(section, _data):
771 realpos = section.realpos + self.position - section.offset
772 self.fileobj.seek(self.offset + realpos)
773 self.position += size
774 return self.fileobj.read(size)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000775 else:
Lars Gustäbel6baa5022006-12-23 16:40:13 +0000776 self.position += size
777 return NUL * size
778#class _FileInFile
779
780
781class ExFileObject(object):
782 """File-like object for reading an archive member.
783 Is returned by TarFile.extractfile().
784 """
785 blocksize = 1024
786
787 def __init__(self, tarfile, tarinfo):
788 self.fileobj = _FileInFile(tarfile.fileobj,
789 tarinfo.offset_data,
790 tarinfo.size,
791 getattr(tarinfo, "sparse", None))
792 self.name = tarinfo.name
793 self.mode = "r"
794 self.closed = False
795 self.size = tarinfo.size
796
797 self.position = 0
798 self.buffer = ""
799
800 def read(self, size=None):
801 """Read at most size bytes from the file. If size is not
802 present or None, read all data until EOF is reached.
803 """
804 if self.closed:
805 raise ValueError("I/O operation on closed file")
806
807 buf = ""
808 if self.buffer:
809 if size is None:
810 buf = self.buffer
811 self.buffer = ""
812 else:
813 buf = self.buffer[:size]
814 self.buffer = self.buffer[size:]
815
816 if size is None:
817 buf += self.fileobj.read()
818 else:
819 buf += self.fileobj.read(size - len(buf))
820
821 self.position += len(buf)
822 return buf
823
824 def readline(self, size=-1):
825 """Read one entire line from the file. If size is present
826 and non-negative, return a string with at most that
827 size, which may be an incomplete line.
828 """
829 if self.closed:
830 raise ValueError("I/O operation on closed file")
831
832 if "\n" in self.buffer:
833 pos = self.buffer.find("\n") + 1
834 else:
835 buffers = [self.buffer]
836 while True:
837 buf = self.fileobj.read(self.blocksize)
838 buffers.append(buf)
839 if not buf or "\n" in buf:
840 self.buffer = "".join(buffers)
841 pos = self.buffer.find("\n") + 1
842 if pos == 0:
843 # no newline found.
844 pos = len(self.buffer)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000845 break
Lars Gustäbel6baa5022006-12-23 16:40:13 +0000846
847 if size != -1:
848 pos = min(size, pos)
849
850 buf = self.buffer[:pos]
851 self.buffer = self.buffer[pos:]
852 self.position += len(buf)
853 return buf
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000854
855 def readlines(self):
Lars Gustäbel6baa5022006-12-23 16:40:13 +0000856 """Return a list with all remaining lines.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000857 """
858 result = []
859 while True:
860 line = self.readline()
861 if not line: break
862 result.append(line)
863 return result
864
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000865 def tell(self):
866 """Return the current file position.
867 """
Lars Gustäbel6baa5022006-12-23 16:40:13 +0000868 if self.closed:
869 raise ValueError("I/O operation on closed file")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000870
Lars Gustäbel6baa5022006-12-23 16:40:13 +0000871 return self.position
872
873 def seek(self, pos, whence=os.SEEK_SET):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000874 """Seek to a position in the file.
875 """
Lars Gustäbel6baa5022006-12-23 16:40:13 +0000876 if self.closed:
877 raise ValueError("I/O operation on closed file")
878
879 if whence == os.SEEK_SET:
880 self.position = min(max(pos, 0), self.size)
881 elif whence == os.SEEK_CUR:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000882 if pos < 0:
Lars Gustäbel6baa5022006-12-23 16:40:13 +0000883 self.position = max(self.position + pos, 0)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000884 else:
Lars Gustäbel6baa5022006-12-23 16:40:13 +0000885 self.position = min(self.position + pos, self.size)
886 elif whence == os.SEEK_END:
887 self.position = max(min(self.size + pos, self.size), 0)
888 else:
889 raise ValueError("Invalid argument")
890
891 self.buffer = ""
892 self.fileobj.seek(self.position)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000893
894 def close(self):
895 """Close the file object.
896 """
897 self.closed = True
Martin v. Löwisdf241532005-03-03 08:17:42 +0000898
899 def __iter__(self):
Lars Gustäbel6baa5022006-12-23 16:40:13 +0000900 """Get an iterator over the file's lines.
Martin v. Löwisdf241532005-03-03 08:17:42 +0000901 """
Lars Gustäbel6baa5022006-12-23 16:40:13 +0000902 while True:
903 line = self.readline()
904 if not line:
905 break
906 yield line
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000907#class ExFileObject
908
909#------------------
910# Exported Classes
911#------------------
912class TarInfo(object):
913 """Informational class which holds the details about an
914 archive member given by a tar header block.
915 TarInfo objects are returned by TarFile.getmember(),
916 TarFile.getmembers() and TarFile.gettarinfo() and are
917 usually created internally.
918 """
919
920 def __init__(self, name=""):
921 """Construct a TarInfo object. name is the optional name
922 of the member.
923 """
Lars Gustäbelc64e4022007-03-13 10:47:19 +0000924 self.name = name # member name
925 self.mode = 0644 # file permissions
Georg Brandl38c6a222006-05-10 16:26:03 +0000926 self.uid = 0 # user id
927 self.gid = 0 # group id
928 self.size = 0 # file size
929 self.mtime = 0 # modification time
930 self.chksum = 0 # header checksum
931 self.type = REGTYPE # member type
932 self.linkname = "" # link name
Lars Gustäbel6aab8d02010-10-04 15:37:53 +0000933 self.uname = "" # user name
934 self.gname = "" # group name
Georg Brandl38c6a222006-05-10 16:26:03 +0000935 self.devmajor = 0 # device major number
936 self.devminor = 0 # device minor number
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000937
Georg Brandl38c6a222006-05-10 16:26:03 +0000938 self.offset = 0 # the tar header starts here
939 self.offset_data = 0 # the file's data starts here
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000940
Lars Gustäbelc64e4022007-03-13 10:47:19 +0000941 self.pax_headers = {} # pax header information
942
943 # In pax headers the "name" and "linkname" field are called
944 # "path" and "linkpath".
945 def _getpath(self):
946 return self.name
947 def _setpath(self, name):
948 self.name = name
949 path = property(_getpath, _setpath)
950
951 def _getlinkpath(self):
952 return self.linkname
953 def _setlinkpath(self, linkname):
954 self.linkname = linkname
955 linkpath = property(_getlinkpath, _setlinkpath)
956
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000957 def __repr__(self):
958 return "<%s %r at %#x>" % (self.__class__.__name__,self.name,id(self))
959
Lars Gustäbela0fcb932007-05-27 19:49:30 +0000960 def get_info(self, encoding, errors):
Lars Gustäbelc64e4022007-03-13 10:47:19 +0000961 """Return the TarInfo's attributes as a dictionary.
962 """
963 info = {
Lars Gustäbelf7cda522009-08-28 19:23:44 +0000964 "name": self.name,
Lars Gustäbelc64e4022007-03-13 10:47:19 +0000965 "mode": self.mode & 07777,
966 "uid": self.uid,
967 "gid": self.gid,
968 "size": self.size,
969 "mtime": self.mtime,
970 "chksum": self.chksum,
971 "type": self.type,
Lars Gustäbelf7cda522009-08-28 19:23:44 +0000972 "linkname": self.linkname,
Lars Gustäbelc64e4022007-03-13 10:47:19 +0000973 "uname": self.uname,
974 "gname": self.gname,
975 "devmajor": self.devmajor,
976 "devminor": self.devminor
977 }
978
979 if info["type"] == DIRTYPE and not info["name"].endswith("/"):
980 info["name"] += "/"
981
Lars Gustäbela0fcb932007-05-27 19:49:30 +0000982 for key in ("name", "linkname", "uname", "gname"):
983 if type(info[key]) is unicode:
984 info[key] = info[key].encode(encoding, errors)
985
Lars Gustäbelc64e4022007-03-13 10:47:19 +0000986 return info
987
Lars Gustäbela0fcb932007-05-27 19:49:30 +0000988 def tobuf(self, format=DEFAULT_FORMAT, encoding=ENCODING, errors="strict"):
Lars Gustäbelc64e4022007-03-13 10:47:19 +0000989 """Return a tar header as a string of 512 byte blocks.
990 """
Lars Gustäbela0fcb932007-05-27 19:49:30 +0000991 info = self.get_info(encoding, errors)
992
Lars Gustäbelc64e4022007-03-13 10:47:19 +0000993 if format == USTAR_FORMAT:
Lars Gustäbela0fcb932007-05-27 19:49:30 +0000994 return self.create_ustar_header(info)
Lars Gustäbelc64e4022007-03-13 10:47:19 +0000995 elif format == GNU_FORMAT:
Lars Gustäbela0fcb932007-05-27 19:49:30 +0000996 return self.create_gnu_header(info)
Lars Gustäbelc64e4022007-03-13 10:47:19 +0000997 elif format == PAX_FORMAT:
Lars Gustäbela0fcb932007-05-27 19:49:30 +0000998 return self.create_pax_header(info, encoding, errors)
Lars Gustäbelc64e4022007-03-13 10:47:19 +0000999 else:
1000 raise ValueError("invalid format")
1001
Lars Gustäbela0fcb932007-05-27 19:49:30 +00001002 def create_ustar_header(self, info):
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001003 """Return the object as a ustar header block.
1004 """
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001005 info["magic"] = POSIX_MAGIC
1006
1007 if len(info["linkname"]) > LENGTH_LINK:
1008 raise ValueError("linkname is too long")
1009
1010 if len(info["name"]) > LENGTH_NAME:
1011 info["prefix"], info["name"] = self._posix_split_name(info["name"])
1012
1013 return self._create_header(info, USTAR_FORMAT)
1014
Lars Gustäbela0fcb932007-05-27 19:49:30 +00001015 def create_gnu_header(self, info):
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001016 """Return the object as a GNU header block sequence.
1017 """
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001018 info["magic"] = GNU_MAGIC
1019
1020 buf = ""
1021 if len(info["linkname"]) > LENGTH_LINK:
1022 buf += self._create_gnu_long_header(info["linkname"], GNUTYPE_LONGLINK)
1023
1024 if len(info["name"]) > LENGTH_NAME:
1025 buf += self._create_gnu_long_header(info["name"], GNUTYPE_LONGNAME)
1026
1027 return buf + self._create_header(info, GNU_FORMAT)
1028
Lars Gustäbela0fcb932007-05-27 19:49:30 +00001029 def create_pax_header(self, info, encoding, errors):
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001030 """Return the object as a ustar header block. If it cannot be
1031 represented this way, prepend a pax extended header sequence
1032 with supplement information.
1033 """
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001034 info["magic"] = POSIX_MAGIC
1035 pax_headers = self.pax_headers.copy()
1036
1037 # Test string fields for values that exceed the field length or cannot
1038 # be represented in ASCII encoding.
1039 for name, hname, length in (
1040 ("name", "path", LENGTH_NAME), ("linkname", "linkpath", LENGTH_LINK),
1041 ("uname", "uname", 32), ("gname", "gname", 32)):
1042
Lars Gustäbela0fcb932007-05-27 19:49:30 +00001043 if hname in pax_headers:
1044 # The pax header has priority.
1045 continue
1046
1047 val = info[name].decode(encoding, errors)
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001048
1049 # Try to encode the string as ASCII.
1050 try:
1051 val.encode("ascii")
1052 except UnicodeEncodeError:
1053 pax_headers[hname] = val
1054 continue
1055
Lars Gustäbela0fcb932007-05-27 19:49:30 +00001056 if len(info[name]) > length:
1057 pax_headers[hname] = val
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001058
1059 # Test number fields for values that exceed the field limit or values
1060 # that like to be stored as float.
1061 for name, digits in (("uid", 8), ("gid", 8), ("size", 12), ("mtime", 12)):
Lars Gustäbela0fcb932007-05-27 19:49:30 +00001062 if name in pax_headers:
1063 # The pax header has priority. Avoid overflow.
1064 info[name] = 0
1065 continue
1066
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001067 val = info[name]
1068 if not 0 <= val < 8 ** (digits - 1) or isinstance(val, float):
1069 pax_headers[name] = unicode(val)
1070 info[name] = 0
1071
Lars Gustäbela0fcb932007-05-27 19:49:30 +00001072 # Create a pax extended header if necessary.
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001073 if pax_headers:
1074 buf = self._create_pax_generic_header(pax_headers)
1075 else:
1076 buf = ""
1077
1078 return buf + self._create_header(info, USTAR_FORMAT)
1079
1080 @classmethod
Lars Gustäbela0fcb932007-05-27 19:49:30 +00001081 def create_pax_global_header(cls, pax_headers):
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001082 """Return the object as a pax global header block sequence.
1083 """
Lars Gustäbela0fcb932007-05-27 19:49:30 +00001084 return cls._create_pax_generic_header(pax_headers, type=XGLTYPE)
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001085
1086 def _posix_split_name(self, name):
1087 """Split a name longer than 100 chars into a prefix
1088 and a name part.
1089 """
1090 prefix = name[:LENGTH_PREFIX + 1]
1091 while prefix and prefix[-1] != "/":
1092 prefix = prefix[:-1]
1093
1094 name = name[len(prefix):]
1095 prefix = prefix[:-1]
1096
1097 if not prefix or len(name) > LENGTH_NAME:
1098 raise ValueError("name is too long")
1099 return prefix, name
1100
1101 @staticmethod
1102 def _create_header(info, format):
1103 """Return a header block. info is a dictionary with file
1104 information, format must be one of the *_FORMAT constants.
1105 """
1106 parts = [
1107 stn(info.get("name", ""), 100),
1108 itn(info.get("mode", 0) & 07777, 8, format),
1109 itn(info.get("uid", 0), 8, format),
1110 itn(info.get("gid", 0), 8, format),
1111 itn(info.get("size", 0), 12, format),
1112 itn(info.get("mtime", 0), 12, format),
1113 " ", # checksum field
1114 info.get("type", REGTYPE),
1115 stn(info.get("linkname", ""), 100),
Lars Gustäbela0fcb932007-05-27 19:49:30 +00001116 stn(info.get("magic", POSIX_MAGIC), 8),
Lars Gustäbel6aab8d02010-10-04 15:37:53 +00001117 stn(info.get("uname", ""), 32),
1118 stn(info.get("gname", ""), 32),
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001119 itn(info.get("devmajor", 0), 8, format),
1120 itn(info.get("devminor", 0), 8, format),
1121 stn(info.get("prefix", ""), 155)
1122 ]
1123
1124 buf = struct.pack("%ds" % BLOCKSIZE, "".join(parts))
1125 chksum = calc_chksums(buf[-BLOCKSIZE:])[0]
1126 buf = buf[:-364] + "%06o\0" % chksum + buf[-357:]
1127 return buf
1128
1129 @staticmethod
1130 def _create_payload(payload):
1131 """Return the string payload filled with zero bytes
1132 up to the next 512 byte border.
1133 """
1134 blocks, remainder = divmod(len(payload), BLOCKSIZE)
1135 if remainder > 0:
1136 payload += (BLOCKSIZE - remainder) * NUL
1137 return payload
1138
1139 @classmethod
1140 def _create_gnu_long_header(cls, name, type):
1141 """Return a GNUTYPE_LONGNAME or GNUTYPE_LONGLINK sequence
1142 for name.
1143 """
1144 name += NUL
1145
1146 info = {}
1147 info["name"] = "././@LongLink"
1148 info["type"] = type
1149 info["size"] = len(name)
1150 info["magic"] = GNU_MAGIC
1151
1152 # create extended header + name blocks.
1153 return cls._create_header(info, USTAR_FORMAT) + \
1154 cls._create_payload(name)
1155
1156 @classmethod
1157 def _create_pax_generic_header(cls, pax_headers, type=XHDTYPE):
1158 """Return a POSIX.1-2001 extended or global header sequence
1159 that contains a list of keyword, value pairs. The values
1160 must be unicode objects.
1161 """
1162 records = []
1163 for keyword, value in pax_headers.iteritems():
1164 keyword = keyword.encode("utf8")
1165 value = value.encode("utf8")
1166 l = len(keyword) + len(value) + 3 # ' ' + '=' + '\n'
1167 n = p = 0
1168 while True:
1169 n = l + len(str(p))
1170 if n == p:
1171 break
1172 p = n
1173 records.append("%d %s=%s\n" % (p, keyword, value))
1174 records = "".join(records)
1175
1176 # We use a hardcoded "././@PaxHeader" name like star does
1177 # instead of the one that POSIX recommends.
1178 info = {}
1179 info["name"] = "././@PaxHeader"
1180 info["type"] = type
1181 info["size"] = len(records)
1182 info["magic"] = POSIX_MAGIC
1183
1184 # Create pax header + record blocks.
1185 return cls._create_header(info, USTAR_FORMAT) + \
1186 cls._create_payload(records)
1187
Guido van Rossum75b64e62005-01-16 00:16:11 +00001188 @classmethod
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001189 def frombuf(cls, buf):
1190 """Construct a TarInfo object from a 512 byte string buffer.
1191 """
Lars Gustäbeldd866d52009-11-22 18:30:53 +00001192 if len(buf) == 0:
1193 raise EmptyHeaderError("empty header")
Georg Brandl38c6a222006-05-10 16:26:03 +00001194 if len(buf) != BLOCKSIZE:
Lars Gustäbeldd866d52009-11-22 18:30:53 +00001195 raise TruncatedHeaderError("truncated header")
Georg Brandl38c6a222006-05-10 16:26:03 +00001196 if buf.count(NUL) == BLOCKSIZE:
Lars Gustäbeldd866d52009-11-22 18:30:53 +00001197 raise EOFHeaderError("end of file header")
Georg Brandlebbeed72006-12-19 22:06:46 +00001198
Georg Brandlded1c4d2006-12-20 11:55:16 +00001199 chksum = nti(buf[148:156])
Georg Brandlebbeed72006-12-19 22:06:46 +00001200 if chksum not in calc_chksums(buf):
Lars Gustäbeldd866d52009-11-22 18:30:53 +00001201 raise InvalidHeaderError("bad checksum")
Georg Brandl38c6a222006-05-10 16:26:03 +00001202
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001203 obj = cls()
1204 obj.buf = buf
1205 obj.name = nts(buf[0:100])
1206 obj.mode = nti(buf[100:108])
1207 obj.uid = nti(buf[108:116])
1208 obj.gid = nti(buf[116:124])
1209 obj.size = nti(buf[124:136])
1210 obj.mtime = nti(buf[136:148])
1211 obj.chksum = chksum
1212 obj.type = buf[156:157]
1213 obj.linkname = nts(buf[157:257])
1214 obj.uname = nts(buf[265:297])
1215 obj.gname = nts(buf[297:329])
1216 obj.devmajor = nti(buf[329:337])
1217 obj.devminor = nti(buf[337:345])
1218 prefix = nts(buf[345:500])
Georg Brandl3354f282006-10-29 09:16:12 +00001219
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001220 # Old V7 tar format represents a directory as a regular
1221 # file with a trailing slash.
1222 if obj.type == AREGTYPE and obj.name.endswith("/"):
1223 obj.type = DIRTYPE
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001224
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001225 # Remove redundant slashes from directories.
1226 if obj.isdir():
1227 obj.name = obj.name.rstrip("/")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001228
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001229 # Reconstruct a ustar longname.
1230 if prefix and obj.type not in GNU_TYPES:
1231 obj.name = prefix + "/" + obj.name
1232 return obj
1233
1234 @classmethod
1235 def fromtarfile(cls, tarfile):
1236 """Return the next TarInfo object from TarFile object
1237 tarfile.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001238 """
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001239 buf = tarfile.fileobj.read(BLOCKSIZE)
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001240 obj = cls.frombuf(buf)
1241 obj.offset = tarfile.fileobj.tell() - BLOCKSIZE
1242 return obj._proc_member(tarfile)
Georg Brandl3354f282006-10-29 09:16:12 +00001243
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001244 #--------------------------------------------------------------------------
1245 # The following are methods that are called depending on the type of a
1246 # member. The entry point is _proc_member() which can be overridden in a
1247 # subclass to add custom _proc_*() methods. A _proc_*() method MUST
1248 # implement the following
1249 # operations:
1250 # 1. Set self.offset_data to the position where the data blocks begin,
1251 # if there is data that follows.
1252 # 2. Set tarfile.offset to the position where the next member's header will
1253 # begin.
1254 # 3. Return self or another valid TarInfo object.
1255 def _proc_member(self, tarfile):
1256 """Choose the right processing method depending on
1257 the type and call it.
Georg Brandl3354f282006-10-29 09:16:12 +00001258 """
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001259 if self.type in (GNUTYPE_LONGNAME, GNUTYPE_LONGLINK):
1260 return self._proc_gnulong(tarfile)
1261 elif self.type == GNUTYPE_SPARSE:
1262 return self._proc_sparse(tarfile)
1263 elif self.type in (XHDTYPE, XGLTYPE, SOLARIS_XHDTYPE):
1264 return self._proc_pax(tarfile)
1265 else:
1266 return self._proc_builtin(tarfile)
Georg Brandl3354f282006-10-29 09:16:12 +00001267
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001268 def _proc_builtin(self, tarfile):
1269 """Process a builtin type or an unknown type which
1270 will be treated as a regular file.
1271 """
1272 self.offset_data = tarfile.fileobj.tell()
1273 offset = self.offset_data
1274 if self.isreg() or self.type not in SUPPORTED_TYPES:
1275 # Skip the following data blocks.
1276 offset += self._block(self.size)
1277 tarfile.offset = offset
Georg Brandl3354f282006-10-29 09:16:12 +00001278
Lars Gustäbela0fcb932007-05-27 19:49:30 +00001279 # Patch the TarInfo object with saved global
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001280 # header information.
Lars Gustäbela0fcb932007-05-27 19:49:30 +00001281 self._apply_pax_info(tarfile.pax_headers, tarfile.encoding, tarfile.errors)
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001282
1283 return self
1284
1285 def _proc_gnulong(self, tarfile):
1286 """Process the blocks that hold a GNU longname
1287 or longlink member.
1288 """
1289 buf = tarfile.fileobj.read(self._block(self.size))
1290
1291 # Fetch the next header and process it.
Lars Gustäbeldd866d52009-11-22 18:30:53 +00001292 try:
1293 next = self.fromtarfile(tarfile)
1294 except HeaderError:
1295 raise SubsequentHeaderError("missing or bad subsequent header")
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001296
1297 # Patch the TarInfo object from the next header with
1298 # the longname information.
1299 next.offset = self.offset
1300 if self.type == GNUTYPE_LONGNAME:
Lars Gustäbela0fcb932007-05-27 19:49:30 +00001301 next.name = nts(buf)
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001302 elif self.type == GNUTYPE_LONGLINK:
Lars Gustäbela0fcb932007-05-27 19:49:30 +00001303 next.linkname = nts(buf)
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001304
1305 return next
1306
1307 def _proc_sparse(self, tarfile):
1308 """Process a GNU sparse header plus extra headers.
1309 """
1310 buf = self.buf
1311 sp = _ringbuffer()
1312 pos = 386
1313 lastpos = 0L
1314 realpos = 0L
1315 # There are 4 possible sparse structs in the
1316 # first header.
1317 for i in xrange(4):
1318 try:
1319 offset = nti(buf[pos:pos + 12])
1320 numbytes = nti(buf[pos + 12:pos + 24])
1321 except ValueError:
1322 break
1323 if offset > lastpos:
1324 sp.append(_hole(lastpos, offset - lastpos))
1325 sp.append(_data(offset, numbytes, realpos))
1326 realpos += numbytes
1327 lastpos = offset + numbytes
1328 pos += 24
1329
1330 isextended = ord(buf[482])
1331 origsize = nti(buf[483:495])
1332
1333 # If the isextended flag is given,
1334 # there are extra headers to process.
1335 while isextended == 1:
1336 buf = tarfile.fileobj.read(BLOCKSIZE)
1337 pos = 0
1338 for i in xrange(21):
1339 try:
1340 offset = nti(buf[pos:pos + 12])
1341 numbytes = nti(buf[pos + 12:pos + 24])
1342 except ValueError:
1343 break
1344 if offset > lastpos:
1345 sp.append(_hole(lastpos, offset - lastpos))
1346 sp.append(_data(offset, numbytes, realpos))
1347 realpos += numbytes
1348 lastpos = offset + numbytes
1349 pos += 24
1350 isextended = ord(buf[504])
1351
1352 if lastpos < origsize:
1353 sp.append(_hole(lastpos, origsize - lastpos))
1354
1355 self.sparse = sp
1356
1357 self.offset_data = tarfile.fileobj.tell()
1358 tarfile.offset = self.offset_data + self._block(self.size)
1359 self.size = origsize
1360
1361 return self
1362
1363 def _proc_pax(self, tarfile):
1364 """Process an extended or global header as described in
1365 POSIX.1-2001.
1366 """
1367 # Read the header information.
1368 buf = tarfile.fileobj.read(self._block(self.size))
1369
1370 # A pax header stores supplemental information for either
1371 # the following file (extended) or all following files
1372 # (global).
1373 if self.type == XGLTYPE:
1374 pax_headers = tarfile.pax_headers
1375 else:
1376 pax_headers = tarfile.pax_headers.copy()
1377
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001378 # Parse pax header information. A record looks like that:
1379 # "%d %s=%s\n" % (length, keyword, value). length is the size
1380 # of the complete record including the length field itself and
Lars Gustäbela0fcb932007-05-27 19:49:30 +00001381 # the newline. keyword and value are both UTF-8 encoded strings.
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001382 regex = re.compile(r"(\d+) ([^=]+)=", re.U)
1383 pos = 0
1384 while True:
1385 match = regex.match(buf, pos)
1386 if not match:
1387 break
1388
1389 length, keyword = match.groups()
1390 length = int(length)
1391 value = buf[match.end(2) + 1:match.start(1) + length - 1]
1392
1393 keyword = keyword.decode("utf8")
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001394 value = value.decode("utf8")
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001395
1396 pax_headers[keyword] = value
1397 pos += length
1398
Lars Gustäbela0fcb932007-05-27 19:49:30 +00001399 # Fetch the next header.
Lars Gustäbeldd866d52009-11-22 18:30:53 +00001400 try:
1401 next = self.fromtarfile(tarfile)
1402 except HeaderError:
1403 raise SubsequentHeaderError("missing or bad subsequent header")
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001404
Lars Gustäbela0fcb932007-05-27 19:49:30 +00001405 if self.type in (XHDTYPE, SOLARIS_XHDTYPE):
Lars Gustäbela0fcb932007-05-27 19:49:30 +00001406 # Patch the TarInfo object with the extended header info.
1407 next._apply_pax_info(pax_headers, tarfile.encoding, tarfile.errors)
1408 next.offset = self.offset
1409
Brett Cannon132fc542008-08-04 21:23:07 +00001410 if "size" in pax_headers:
Lars Gustäbela0fcb932007-05-27 19:49:30 +00001411 # If the extended header replaces the size field,
1412 # we need to recalculate the offset where the next
1413 # header starts.
1414 offset = next.offset_data
1415 if next.isreg() or next.type not in SUPPORTED_TYPES:
1416 offset += next._block(next.size)
1417 tarfile.offset = offset
1418
1419 return next
1420
1421 def _apply_pax_info(self, pax_headers, encoding, errors):
1422 """Replace fields with supplemental information from a previous
1423 pax extended or global header.
1424 """
1425 for keyword, value in pax_headers.iteritems():
1426 if keyword not in PAX_FIELDS:
1427 continue
1428
1429 if keyword == "path":
1430 value = value.rstrip("/")
1431
1432 if keyword in PAX_NUMBER_FIELDS:
1433 try:
1434 value = PAX_NUMBER_FIELDS[keyword](value)
1435 except ValueError:
1436 value = 0
1437 else:
1438 value = uts(value, encoding, errors)
1439
1440 setattr(self, keyword, value)
1441
1442 self.pax_headers = pax_headers.copy()
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001443
1444 def _block(self, count):
1445 """Round up a byte count by BLOCKSIZE and return it,
1446 e.g. _block(834) => 1024.
1447 """
1448 blocks, remainder = divmod(count, BLOCKSIZE)
1449 if remainder:
1450 blocks += 1
1451 return blocks * BLOCKSIZE
Georg Brandl3354f282006-10-29 09:16:12 +00001452
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001453 def isreg(self):
1454 return self.type in REGULAR_TYPES
1455 def isfile(self):
1456 return self.isreg()
1457 def isdir(self):
1458 return self.type == DIRTYPE
1459 def issym(self):
1460 return self.type == SYMTYPE
1461 def islnk(self):
1462 return self.type == LNKTYPE
1463 def ischr(self):
1464 return self.type == CHRTYPE
1465 def isblk(self):
1466 return self.type == BLKTYPE
1467 def isfifo(self):
1468 return self.type == FIFOTYPE
1469 def issparse(self):
1470 return self.type == GNUTYPE_SPARSE
1471 def isdev(self):
1472 return self.type in (CHRTYPE, BLKTYPE, FIFOTYPE)
1473# class TarInfo
1474
1475class TarFile(object):
1476 """The TarFile Class provides an interface to tar archives.
1477 """
1478
1479 debug = 0 # May be set from 0 (no msgs) to 3 (all msgs)
1480
1481 dereference = False # If true, add content of linked file to the
1482 # tar file, else the link.
1483
1484 ignore_zeros = False # If true, skips empty or invalid blocks and
1485 # continues processing.
1486
Lars Gustäbel92ca7562009-12-13 11:32:27 +00001487 errorlevel = 1 # If 0, fatal errors only appear in debug
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001488 # messages (if debug >= 0). If > 0, errors
1489 # are passed to the caller as exceptions.
1490
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001491 format = DEFAULT_FORMAT # The format to use when creating an archive.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001492
Lars Gustäbela0fcb932007-05-27 19:49:30 +00001493 encoding = ENCODING # Encoding for 8-bit character strings.
1494
1495 errors = None # Error handler for unicode conversion.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001496
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001497 tarinfo = TarInfo # The default TarInfo class to use.
1498
1499 fileobject = ExFileObject # The default ExFileObject class to use.
1500
1501 def __init__(self, name=None, mode="r", fileobj=None, format=None,
1502 tarinfo=None, dereference=None, ignore_zeros=None, encoding=None,
Lars Gustäbela0fcb932007-05-27 19:49:30 +00001503 errors=None, pax_headers=None, debug=None, errorlevel=None):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001504 """Open an (uncompressed) tar archive `name'. `mode' is either 'r' to
1505 read from an existing archive, 'a' to append data to an existing
1506 file or 'w' to create a new file overwriting an existing one. `mode'
1507 defaults to 'r'.
1508 If `fileobj' is given, it is used for reading or writing data. If it
1509 can be determined, `mode' is overridden by `fileobj's mode.
1510 `fileobj' is not closed, when TarFile is closed.
1511 """
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001512 if len(mode) > 1 or mode not in "raw":
Georg Brandle4751e32006-05-18 06:11:19 +00001513 raise ValueError("mode must be 'r', 'a' or 'w'")
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001514 self.mode = mode
1515 self._mode = {"r": "rb", "a": "r+b", "w": "wb"}[mode]
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001516
1517 if not fileobj:
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001518 if self.mode == "a" and not os.path.exists(name):
Lars Gustäbel3f8aca12007-02-06 18:38:13 +00001519 # Create nonexistent files in append mode.
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001520 self.mode = "w"
1521 self._mode = "wb"
Brett Cannon6cef0762007-05-25 20:17:15 +00001522 fileobj = bltn_open(name, self._mode)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001523 self._extfileobj = False
1524 else:
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001525 if name is None and hasattr(fileobj, "name"):
1526 name = fileobj.name
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001527 if hasattr(fileobj, "mode"):
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001528 self._mode = fileobj.mode
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001529 self._extfileobj = True
Lars Gustäbel0f4a14b2007-08-28 12:31:09 +00001530 self.name = os.path.abspath(name) if name else None
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001531 self.fileobj = fileobj
1532
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001533 # Init attributes.
1534 if format is not None:
1535 self.format = format
1536 if tarinfo is not None:
1537 self.tarinfo = tarinfo
1538 if dereference is not None:
1539 self.dereference = dereference
1540 if ignore_zeros is not None:
1541 self.ignore_zeros = ignore_zeros
1542 if encoding is not None:
1543 self.encoding = encoding
Lars Gustäbela0fcb932007-05-27 19:49:30 +00001544
1545 if errors is not None:
1546 self.errors = errors
1547 elif mode == "r":
1548 self.errors = "utf-8"
1549 else:
1550 self.errors = "strict"
1551
1552 if pax_headers is not None and self.format == PAX_FORMAT:
1553 self.pax_headers = pax_headers
1554 else:
1555 self.pax_headers = {}
1556
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001557 if debug is not None:
1558 self.debug = debug
1559 if errorlevel is not None:
1560 self.errorlevel = errorlevel
1561
1562 # Init datastructures.
Georg Brandl38c6a222006-05-10 16:26:03 +00001563 self.closed = False
1564 self.members = [] # list of members as TarInfo objects
1565 self._loaded = False # flag if all members have been read
Lars Gustäbel77b2d632007-12-01 21:02:12 +00001566 self.offset = self.fileobj.tell()
1567 # current position in the archive file
Georg Brandl38c6a222006-05-10 16:26:03 +00001568 self.inodes = {} # dictionary caching the inodes of
1569 # archive members already added
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001570
Lars Gustäbel355538e2009-11-18 20:24:54 +00001571 try:
1572 if self.mode == "r":
1573 self.firstmember = None
1574 self.firstmember = self.next()
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001575
Lars Gustäbel355538e2009-11-18 20:24:54 +00001576 if self.mode == "a":
1577 # Move to the end of the archive,
1578 # before the first empty block.
Lars Gustäbel355538e2009-11-18 20:24:54 +00001579 while True:
Lars Gustäbeldd866d52009-11-22 18:30:53 +00001580 self.fileobj.seek(self.offset)
1581 try:
1582 tarinfo = self.tarinfo.fromtarfile(self)
1583 self.members.append(tarinfo)
1584 except EOFHeaderError:
1585 self.fileobj.seek(self.offset)
Lars Gustäbel355538e2009-11-18 20:24:54 +00001586 break
Lars Gustäbeldd866d52009-11-22 18:30:53 +00001587 except HeaderError, e:
1588 raise ReadError(str(e))
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001589
Lars Gustäbel355538e2009-11-18 20:24:54 +00001590 if self.mode in "aw":
1591 self._loaded = True
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001592
Lars Gustäbel355538e2009-11-18 20:24:54 +00001593 if self.pax_headers:
1594 buf = self.tarinfo.create_pax_global_header(self.pax_headers.copy())
1595 self.fileobj.write(buf)
1596 self.offset += len(buf)
1597 except:
1598 if not self._extfileobj:
1599 self.fileobj.close()
1600 self.closed = True
1601 raise
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001602
1603 def _getposix(self):
1604 return self.format == USTAR_FORMAT
1605 def _setposix(self, value):
1606 import warnings
Philip Jenveyd846f1d2009-05-08 02:28:39 +00001607 warnings.warn("use the format attribute instead", DeprecationWarning,
1608 2)
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001609 if value:
1610 self.format = USTAR_FORMAT
1611 else:
1612 self.format = GNU_FORMAT
1613 posix = property(_getposix, _setposix)
1614
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001615 #--------------------------------------------------------------------------
1616 # Below are the classmethods which act as alternate constructors to the
1617 # TarFile class. The open() method is the only one that is needed for
1618 # public use; it is the "super"-constructor and is able to select an
1619 # adequate "sub"-constructor for a particular compression using the mapping
1620 # from OPEN_METH.
1621 #
1622 # This concept allows one to subclass TarFile without losing the comfort of
1623 # the super-constructor. A sub-constructor is registered and made available
1624 # by adding it to the mapping in OPEN_METH.
1625
Guido van Rossum75b64e62005-01-16 00:16:11 +00001626 @classmethod
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001627 def open(cls, name=None, mode="r", fileobj=None, bufsize=RECORDSIZE, **kwargs):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001628 """Open a tar archive for reading, writing or appending. Return
1629 an appropriate TarFile class.
1630
1631 mode:
Martin v. Löwis78be7df2005-03-05 12:47:42 +00001632 'r' or 'r:*' open for reading with transparent compression
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001633 'r:' open for reading exclusively uncompressed
1634 'r:gz' open for reading with gzip compression
1635 'r:bz2' open for reading with bzip2 compression
Lars Gustäbel3f8aca12007-02-06 18:38:13 +00001636 'a' or 'a:' open for appending, creating the file if necessary
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001637 'w' or 'w:' open for writing without compression
1638 'w:gz' open for writing with gzip compression
1639 'w:bz2' open for writing with bzip2 compression
Martin v. Löwis78be7df2005-03-05 12:47:42 +00001640
1641 'r|*' open a stream of tar blocks with transparent compression
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001642 'r|' open an uncompressed stream of tar blocks for reading
1643 'r|gz' open a gzip compressed stream of tar blocks
1644 'r|bz2' open a bzip2 compressed stream of tar blocks
1645 'w|' open an uncompressed stream for writing
1646 'w|gz' open a gzip compressed stream for writing
1647 'w|bz2' open a bzip2 compressed stream for writing
1648 """
1649
1650 if not name and not fileobj:
Georg Brandle4751e32006-05-18 06:11:19 +00001651 raise ValueError("nothing to open")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001652
Martin v. Löwis78be7df2005-03-05 12:47:42 +00001653 if mode in ("r", "r:*"):
1654 # Find out which *open() is appropriate for opening the file.
1655 for comptype in cls.OPEN_METH:
1656 func = getattr(cls, cls.OPEN_METH[comptype])
Lars Gustäbela7ba6fc2006-12-27 10:30:46 +00001657 if fileobj is not None:
1658 saved_pos = fileobj.tell()
Martin v. Löwis78be7df2005-03-05 12:47:42 +00001659 try:
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001660 return func(name, "r", fileobj, **kwargs)
1661 except (ReadError, CompressionError), e:
Lars Gustäbela7ba6fc2006-12-27 10:30:46 +00001662 if fileobj is not None:
1663 fileobj.seek(saved_pos)
Martin v. Löwis78be7df2005-03-05 12:47:42 +00001664 continue
Georg Brandle4751e32006-05-18 06:11:19 +00001665 raise ReadError("file could not be opened successfully")
Martin v. Löwis78be7df2005-03-05 12:47:42 +00001666
1667 elif ":" in mode:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001668 filemode, comptype = mode.split(":", 1)
1669 filemode = filemode or "r"
1670 comptype = comptype or "tar"
1671
1672 # Select the *open() function according to
1673 # given compression.
1674 if comptype in cls.OPEN_METH:
1675 func = getattr(cls, cls.OPEN_METH[comptype])
1676 else:
Georg Brandle4751e32006-05-18 06:11:19 +00001677 raise CompressionError("unknown compression type %r" % comptype)
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001678 return func(name, filemode, fileobj, **kwargs)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001679
1680 elif "|" in mode:
1681 filemode, comptype = mode.split("|", 1)
1682 filemode = filemode or "r"
1683 comptype = comptype or "tar"
1684
1685 if filemode not in "rw":
Georg Brandle4751e32006-05-18 06:11:19 +00001686 raise ValueError("mode must be 'r' or 'w'")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001687
1688 t = cls(name, filemode,
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001689 _Stream(name, filemode, comptype, fileobj, bufsize),
1690 **kwargs)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001691 t._extfileobj = False
1692 return t
1693
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001694 elif mode in "aw":
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001695 return cls.taropen(name, mode, fileobj, **kwargs)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001696
Georg Brandle4751e32006-05-18 06:11:19 +00001697 raise ValueError("undiscernible mode")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001698
Guido van Rossum75b64e62005-01-16 00:16:11 +00001699 @classmethod
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001700 def taropen(cls, name, mode="r", fileobj=None, **kwargs):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001701 """Open uncompressed tar archive name for reading or writing.
1702 """
1703 if len(mode) > 1 or mode not in "raw":
Georg Brandle4751e32006-05-18 06:11:19 +00001704 raise ValueError("mode must be 'r', 'a' or 'w'")
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001705 return cls(name, mode, fileobj, **kwargs)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001706
Guido van Rossum75b64e62005-01-16 00:16:11 +00001707 @classmethod
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001708 def gzopen(cls, name, mode="r", fileobj=None, compresslevel=9, **kwargs):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001709 """Open gzip compressed tar archive name for reading or writing.
1710 Appending is not allowed.
1711 """
1712 if len(mode) > 1 or mode not in "rw":
Georg Brandle4751e32006-05-18 06:11:19 +00001713 raise ValueError("mode must be 'r' or 'w'")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001714
1715 try:
1716 import gzip
Neal Norwitz4ec68242003-04-11 03:05:56 +00001717 gzip.GzipFile
1718 except (ImportError, AttributeError):
Georg Brandle4751e32006-05-18 06:11:19 +00001719 raise CompressionError("gzip module is not available")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001720
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001721 if fileobj is None:
Brett Cannon6cef0762007-05-25 20:17:15 +00001722 fileobj = bltn_open(name, mode + "b")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001723
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001724 try:
Lars Gustäbela4b23812006-12-23 17:57:23 +00001725 t = cls.taropen(name, mode,
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001726 gzip.GzipFile(name, mode, compresslevel, fileobj),
1727 **kwargs)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001728 except IOError:
Georg Brandle4751e32006-05-18 06:11:19 +00001729 raise ReadError("not a gzip file")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001730 t._extfileobj = False
1731 return t
1732
Guido van Rossum75b64e62005-01-16 00:16:11 +00001733 @classmethod
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001734 def bz2open(cls, name, mode="r", fileobj=None, compresslevel=9, **kwargs):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001735 """Open bzip2 compressed tar archive name for reading or writing.
1736 Appending is not allowed.
1737 """
1738 if len(mode) > 1 or mode not in "rw":
Georg Brandle4751e32006-05-18 06:11:19 +00001739 raise ValueError("mode must be 'r' or 'w'.")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001740
1741 try:
1742 import bz2
1743 except ImportError:
Georg Brandle4751e32006-05-18 06:11:19 +00001744 raise CompressionError("bz2 module is not available")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001745
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001746 if fileobj is not None:
Georg Brandl49c8f4c2006-05-15 19:30:35 +00001747 fileobj = _BZ2Proxy(fileobj, mode)
1748 else:
1749 fileobj = bz2.BZ2File(name, mode, compresslevel=compresslevel)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001750
1751 try:
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001752 t = cls.taropen(name, mode, fileobj, **kwargs)
Lars Gustäbeldd866d52009-11-22 18:30:53 +00001753 except (IOError, EOFError):
Georg Brandle4751e32006-05-18 06:11:19 +00001754 raise ReadError("not a bzip2 file")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001755 t._extfileobj = False
1756 return t
1757
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001758 # All *open() methods are registered here.
1759 OPEN_METH = {
1760 "tar": "taropen", # uncompressed tar
1761 "gz": "gzopen", # gzip compressed tar
1762 "bz2": "bz2open" # bzip2 compressed tar
1763 }
1764
1765 #--------------------------------------------------------------------------
1766 # The public methods which TarFile provides:
1767
1768 def close(self):
1769 """Close the TarFile. In write-mode, two finishing zero blocks are
1770 appended to the archive.
1771 """
1772 if self.closed:
1773 return
1774
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001775 if self.mode in "aw":
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001776 self.fileobj.write(NUL * (BLOCKSIZE * 2))
1777 self.offset += (BLOCKSIZE * 2)
1778 # fill up the end with zero-blocks
1779 # (like option -b20 for tar does)
1780 blocks, remainder = divmod(self.offset, RECORDSIZE)
1781 if remainder > 0:
1782 self.fileobj.write(NUL * (RECORDSIZE - remainder))
1783
1784 if not self._extfileobj:
1785 self.fileobj.close()
1786 self.closed = True
1787
1788 def getmember(self, name):
1789 """Return a TarInfo object for member `name'. If `name' can not be
1790 found in the archive, KeyError is raised. If a member occurs more
Mark Dickinson3e4caeb2009-02-21 20:27:01 +00001791 than once in the archive, its last occurrence is assumed to be the
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001792 most up-to-date version.
1793 """
Martin v. Löwisf3c56112004-09-18 09:08:52 +00001794 tarinfo = self._getmember(name)
1795 if tarinfo is None:
Georg Brandle4751e32006-05-18 06:11:19 +00001796 raise KeyError("filename %r not found" % name)
Martin v. Löwisf3c56112004-09-18 09:08:52 +00001797 return tarinfo
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001798
1799 def getmembers(self):
1800 """Return the members of the archive as a list of TarInfo objects. The
1801 list has the same order as the members in the archive.
1802 """
1803 self._check()
1804 if not self._loaded: # if we want to obtain a list of
1805 self._load() # all members, we first have to
1806 # scan the whole archive.
1807 return self.members
1808
1809 def getnames(self):
1810 """Return the members of the archive as a list of their names. It has
1811 the same order as the list returned by getmembers().
1812 """
Martin v. Löwisf3c56112004-09-18 09:08:52 +00001813 return [tarinfo.name for tarinfo in self.getmembers()]
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001814
1815 def gettarinfo(self, name=None, arcname=None, fileobj=None):
1816 """Create a TarInfo object for either the file `name' or the file
1817 object `fileobj' (using os.fstat on its file descriptor). You can
1818 modify some of the TarInfo's attributes before you add it using
1819 addfile(). If given, `arcname' specifies an alternative name for the
1820 file in the archive.
1821 """
1822 self._check("aw")
1823
1824 # When fileobj is given, replace name by
1825 # fileobj's real name.
1826 if fileobj is not None:
1827 name = fileobj.name
1828
1829 # Building the name of the member in the archive.
1830 # Backward slashes are converted to forward slashes,
1831 # Absolute paths are turned to relative paths.
1832 if arcname is None:
1833 arcname = name
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001834 drv, arcname = os.path.splitdrive(arcname)
Lars Gustäbelf7cda522009-08-28 19:23:44 +00001835 arcname = arcname.replace(os.sep, "/")
1836 arcname = arcname.lstrip("/")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001837
1838 # Now, fill the TarInfo object with
1839 # information specific for the file.
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001840 tarinfo = self.tarinfo()
1841 tarinfo.tarfile = self
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001842
1843 # Use os.stat or os.lstat, depending on platform
1844 # and if symlinks shall be resolved.
1845 if fileobj is None:
1846 if hasattr(os, "lstat") and not self.dereference:
1847 statres = os.lstat(name)
1848 else:
1849 statres = os.stat(name)
1850 else:
1851 statres = os.fstat(fileobj.fileno())
1852 linkname = ""
1853
1854 stmd = statres.st_mode
1855 if stat.S_ISREG(stmd):
1856 inode = (statres.st_ino, statres.st_dev)
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001857 if not self.dereference and statres.st_nlink > 1 and \
1858 inode in self.inodes and arcname != self.inodes[inode]:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001859 # Is it a hardlink to an already
1860 # archived file?
1861 type = LNKTYPE
1862 linkname = self.inodes[inode]
1863 else:
1864 # The inode is added only if its valid.
1865 # For win32 it is always 0.
1866 type = REGTYPE
1867 if inode[0]:
1868 self.inodes[inode] = arcname
1869 elif stat.S_ISDIR(stmd):
1870 type = DIRTYPE
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001871 elif stat.S_ISFIFO(stmd):
1872 type = FIFOTYPE
1873 elif stat.S_ISLNK(stmd):
1874 type = SYMTYPE
1875 linkname = os.readlink(name)
1876 elif stat.S_ISCHR(stmd):
1877 type = CHRTYPE
1878 elif stat.S_ISBLK(stmd):
1879 type = BLKTYPE
1880 else:
1881 return None
1882
1883 # Fill the TarInfo object with all
1884 # information we can get.
Martin v. Löwis5dbdc592005-08-27 10:07:56 +00001885 tarinfo.name = arcname
1886 tarinfo.mode = stmd
1887 tarinfo.uid = statres.st_uid
1888 tarinfo.gid = statres.st_gid
Lars Gustäbel2ee9c6f2010-06-03 09:56:22 +00001889 if type == REGTYPE:
Martin v. Löwis61d77e02004-08-20 06:35:46 +00001890 tarinfo.size = statres.st_size
Martin v. Löwis5dbdc592005-08-27 10:07:56 +00001891 else:
1892 tarinfo.size = 0L
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001893 tarinfo.mtime = statres.st_mtime
Martin v. Löwis5dbdc592005-08-27 10:07:56 +00001894 tarinfo.type = type
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001895 tarinfo.linkname = linkname
1896 if pwd:
1897 try:
1898 tarinfo.uname = pwd.getpwuid(tarinfo.uid)[0]
1899 except KeyError:
1900 pass
1901 if grp:
1902 try:
1903 tarinfo.gname = grp.getgrgid(tarinfo.gid)[0]
1904 except KeyError:
1905 pass
1906
1907 if type in (CHRTYPE, BLKTYPE):
1908 if hasattr(os, "major") and hasattr(os, "minor"):
1909 tarinfo.devmajor = os.major(statres.st_rdev)
1910 tarinfo.devminor = os.minor(statres.st_rdev)
1911 return tarinfo
1912
1913 def list(self, verbose=True):
1914 """Print a table of contents to sys.stdout. If `verbose' is False, only
1915 the names of the members are printed. If it is True, an `ls -l'-like
1916 output is produced.
1917 """
1918 self._check()
1919
1920 for tarinfo in self:
1921 if verbose:
1922 print filemode(tarinfo.mode),
1923 print "%s/%s" % (tarinfo.uname or tarinfo.uid,
1924 tarinfo.gname or tarinfo.gid),
1925 if tarinfo.ischr() or tarinfo.isblk():
1926 print "%10s" % ("%d,%d" \
1927 % (tarinfo.devmajor, tarinfo.devminor)),
1928 else:
1929 print "%10d" % tarinfo.size,
1930 print "%d-%02d-%02d %02d:%02d:%02d" \
1931 % time.localtime(tarinfo.mtime)[:6],
1932
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001933 print tarinfo.name + ("/" if tarinfo.isdir() else ""),
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001934
1935 if verbose:
1936 if tarinfo.issym():
1937 print "->", tarinfo.linkname,
1938 if tarinfo.islnk():
1939 print "link to", tarinfo.linkname,
1940 print
1941
Lars Gustäbel21121e62009-09-12 10:28:15 +00001942 def add(self, name, arcname=None, recursive=True, exclude=None, filter=None):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001943 """Add the file `name' to the archive. `name' may be any type of file
1944 (directory, fifo, symbolic link, etc.). If given, `arcname'
1945 specifies an alternative name for the file in the archive.
1946 Directories are added recursively by default. This can be avoided by
Lars Gustäbel104490e2007-06-18 11:42:11 +00001947 setting `recursive' to False. `exclude' is a function that should
Lars Gustäbel21121e62009-09-12 10:28:15 +00001948 return True for each filename to be excluded. `filter' is a function
1949 that expects a TarInfo object argument and returns the changed
1950 TarInfo object, if it returns None the TarInfo object will be
1951 excluded from the archive.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001952 """
1953 self._check("aw")
1954
1955 if arcname is None:
1956 arcname = name
1957
Lars Gustäbel104490e2007-06-18 11:42:11 +00001958 # Exclude pathnames.
Lars Gustäbel21121e62009-09-12 10:28:15 +00001959 if exclude is not None:
1960 import warnings
1961 warnings.warn("use the filter argument instead",
1962 DeprecationWarning, 2)
1963 if exclude(name):
1964 self._dbg(2, "tarfile: Excluded %r" % name)
1965 return
Lars Gustäbel104490e2007-06-18 11:42:11 +00001966
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001967 # Skip if somebody tries to archive the archive...
Lars Gustäbela4b23812006-12-23 17:57:23 +00001968 if self.name is not None and os.path.abspath(name) == self.name:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001969 self._dbg(2, "tarfile: Skipped %r" % name)
1970 return
1971
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001972 self._dbg(1, name)
1973
1974 # Create a TarInfo object from the file.
1975 tarinfo = self.gettarinfo(name, arcname)
1976
1977 if tarinfo is None:
1978 self._dbg(1, "tarfile: Unsupported type %r" % name)
1979 return
1980
Lars Gustäbel21121e62009-09-12 10:28:15 +00001981 # Change or exclude the TarInfo object.
1982 if filter is not None:
1983 tarinfo = filter(tarinfo)
1984 if tarinfo is None:
1985 self._dbg(2, "tarfile: Excluded %r" % name)
1986 return
1987
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001988 # Append the tar header and data to the archive.
1989 if tarinfo.isreg():
Andrew Svetlovac26a2e2012-11-29 14:22:26 +02001990 with bltn_open(name, "rb") as f:
1991 self.addfile(tarinfo, f)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001992
Martin v. Löwis5dbdc592005-08-27 10:07:56 +00001993 elif tarinfo.isdir():
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001994 self.addfile(tarinfo)
1995 if recursive:
1996 for f in os.listdir(name):
Lars Gustäbel21121e62009-09-12 10:28:15 +00001997 self.add(os.path.join(name, f), os.path.join(arcname, f),
1998 recursive, exclude, filter)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001999
Martin v. Löwis5dbdc592005-08-27 10:07:56 +00002000 else:
2001 self.addfile(tarinfo)
2002
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002003 def addfile(self, tarinfo, fileobj=None):
2004 """Add the TarInfo object `tarinfo' to the archive. If `fileobj' is
2005 given, tarinfo.size bytes are read from it and added to the archive.
2006 You can create TarInfo objects using gettarinfo().
2007 On Windows platforms, `fileobj' should always be opened with mode
2008 'rb' to avoid irritation about the file size.
2009 """
2010 self._check("aw")
2011
Georg Brandl3354f282006-10-29 09:16:12 +00002012 tarinfo = copy.copy(tarinfo)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002013
Lars Gustäbela0fcb932007-05-27 19:49:30 +00002014 buf = tarinfo.tobuf(self.format, self.encoding, self.errors)
Georg Brandl3354f282006-10-29 09:16:12 +00002015 self.fileobj.write(buf)
2016 self.offset += len(buf)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002017
2018 # If there's data to follow, append it.
2019 if fileobj is not None:
2020 copyfileobj(fileobj, self.fileobj, tarinfo.size)
2021 blocks, remainder = divmod(tarinfo.size, BLOCKSIZE)
2022 if remainder > 0:
2023 self.fileobj.write(NUL * (BLOCKSIZE - remainder))
2024 blocks += 1
2025 self.offset += blocks * BLOCKSIZE
2026
Martin v. Löwisf3c56112004-09-18 09:08:52 +00002027 self.members.append(tarinfo)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002028
Martin v. Löwis00a73e72005-03-04 19:40:34 +00002029 def extractall(self, path=".", members=None):
2030 """Extract all members from the archive to the current working
2031 directory and set owner, modification time and permissions on
2032 directories afterwards. `path' specifies a different directory
2033 to extract to. `members' is optional and must be a subset of the
2034 list returned by getmembers().
2035 """
2036 directories = []
2037
2038 if members is None:
2039 members = self
2040
2041 for tarinfo in members:
2042 if tarinfo.isdir():
Lars Gustäbel0192e432008-02-05 11:51:40 +00002043 # Extract directories with a safe mode.
Martin v. Löwis00a73e72005-03-04 19:40:34 +00002044 directories.append(tarinfo)
Lars Gustäbel0192e432008-02-05 11:51:40 +00002045 tarinfo = copy.copy(tarinfo)
2046 tarinfo.mode = 0700
2047 self.extract(tarinfo, path)
Martin v. Löwis00a73e72005-03-04 19:40:34 +00002048
2049 # Reverse sort directories.
Brett Cannon132fc542008-08-04 21:23:07 +00002050 directories.sort(key=operator.attrgetter('name'))
Martin v. Löwis00a73e72005-03-04 19:40:34 +00002051 directories.reverse()
2052
2053 # Set correct owner, mtime and filemode on directories.
2054 for tarinfo in directories:
Lars Gustäbel2ee1c762008-01-04 14:00:33 +00002055 dirpath = os.path.join(path, tarinfo.name)
Martin v. Löwis00a73e72005-03-04 19:40:34 +00002056 try:
Lars Gustäbel2ee1c762008-01-04 14:00:33 +00002057 self.chown(tarinfo, dirpath)
2058 self.utime(tarinfo, dirpath)
2059 self.chmod(tarinfo, dirpath)
Martin v. Löwis00a73e72005-03-04 19:40:34 +00002060 except ExtractError, e:
2061 if self.errorlevel > 1:
2062 raise
2063 else:
2064 self._dbg(1, "tarfile: %s" % e)
2065
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002066 def extract(self, member, path=""):
2067 """Extract a member from the archive to the current working directory,
2068 using its full name. Its file information is extracted as accurately
2069 as possible. `member' may be a filename or a TarInfo object. You can
2070 specify a different directory using `path'.
2071 """
2072 self._check("r")
2073
Lars Gustäbelc64e4022007-03-13 10:47:19 +00002074 if isinstance(member, basestring):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002075 tarinfo = self.getmember(member)
Lars Gustäbelc64e4022007-03-13 10:47:19 +00002076 else:
2077 tarinfo = member
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002078
Neal Norwitza4f651a2004-07-20 22:07:44 +00002079 # Prepare the link target for makelink().
2080 if tarinfo.islnk():
2081 tarinfo._link_target = os.path.join(path, tarinfo.linkname)
2082
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002083 try:
2084 self._extract_member(tarinfo, os.path.join(path, tarinfo.name))
2085 except EnvironmentError, e:
2086 if self.errorlevel > 0:
2087 raise
2088 else:
2089 if e.filename is None:
2090 self._dbg(1, "tarfile: %s" % e.strerror)
2091 else:
2092 self._dbg(1, "tarfile: %s %r" % (e.strerror, e.filename))
2093 except ExtractError, e:
2094 if self.errorlevel > 1:
2095 raise
2096 else:
2097 self._dbg(1, "tarfile: %s" % e)
2098
2099 def extractfile(self, member):
2100 """Extract a member from the archive as a file object. `member' may be
2101 a filename or a TarInfo object. If `member' is a regular file, a
2102 file-like object is returned. If `member' is a link, a file-like
2103 object is constructed from the link's target. If `member' is none of
2104 the above, None is returned.
2105 The file-like object is read-only and provides the following
2106 methods: read(), readline(), readlines(), seek() and tell()
2107 """
2108 self._check("r")
2109
Lars Gustäbelc64e4022007-03-13 10:47:19 +00002110 if isinstance(member, basestring):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002111 tarinfo = self.getmember(member)
Lars Gustäbelc64e4022007-03-13 10:47:19 +00002112 else:
2113 tarinfo = member
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002114
2115 if tarinfo.isreg():
2116 return self.fileobject(self, tarinfo)
2117
2118 elif tarinfo.type not in SUPPORTED_TYPES:
2119 # If a member's type is unknown, it is treated as a
2120 # regular file.
2121 return self.fileobject(self, tarinfo)
2122
2123 elif tarinfo.islnk() or tarinfo.issym():
2124 if isinstance(self.fileobj, _Stream):
2125 # A small but ugly workaround for the case that someone tries
2126 # to extract a (sym)link as a file-object from a non-seekable
2127 # stream of tar blocks.
Georg Brandle4751e32006-05-18 06:11:19 +00002128 raise StreamError("cannot extract (sym)link as file object")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002129 else:
Georg Brandl7eb4b7d2005-07-22 21:49:32 +00002130 # A (sym)link's file object is its target's file object.
Lars Gustäbel4da7d412010-06-03 12:34:14 +00002131 return self.extractfile(self._find_link_target(tarinfo))
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002132 else:
2133 # If there's no data associated with the member (directory, chrdev,
2134 # blkdev, etc.), return None instead of a file object.
2135 return None
2136
2137 def _extract_member(self, tarinfo, targetpath):
2138 """Extract the TarInfo object tarinfo to a physical
2139 file called targetpath.
2140 """
2141 # Fetch the TarInfo object for the given name
2142 # and build the destination pathname, replacing
2143 # forward slashes to platform specific separators.
Lars Gustäbelf7cda522009-08-28 19:23:44 +00002144 targetpath = targetpath.rstrip("/")
2145 targetpath = targetpath.replace("/", os.sep)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002146
2147 # Create all upper directories.
2148 upperdirs = os.path.dirname(targetpath)
2149 if upperdirs and not os.path.exists(upperdirs):
Lars Gustäbel0192e432008-02-05 11:51:40 +00002150 # Create directories that are not part of the archive with
2151 # default permissions.
Lars Gustäbeld2e22902007-01-23 11:17:33 +00002152 os.makedirs(upperdirs)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002153
2154 if tarinfo.islnk() or tarinfo.issym():
2155 self._dbg(1, "%s -> %s" % (tarinfo.name, tarinfo.linkname))
2156 else:
2157 self._dbg(1, tarinfo.name)
2158
2159 if tarinfo.isreg():
2160 self.makefile(tarinfo, targetpath)
2161 elif tarinfo.isdir():
2162 self.makedir(tarinfo, targetpath)
2163 elif tarinfo.isfifo():
2164 self.makefifo(tarinfo, targetpath)
2165 elif tarinfo.ischr() or tarinfo.isblk():
2166 self.makedev(tarinfo, targetpath)
2167 elif tarinfo.islnk() or tarinfo.issym():
2168 self.makelink(tarinfo, targetpath)
2169 elif tarinfo.type not in SUPPORTED_TYPES:
2170 self.makeunknown(tarinfo, targetpath)
2171 else:
2172 self.makefile(tarinfo, targetpath)
2173
2174 self.chown(tarinfo, targetpath)
2175 if not tarinfo.issym():
2176 self.chmod(tarinfo, targetpath)
2177 self.utime(tarinfo, targetpath)
2178
2179 #--------------------------------------------------------------------------
2180 # Below are the different file methods. They are called via
2181 # _extract_member() when extract() is called. They can be replaced in a
2182 # subclass to implement other functionality.
2183
2184 def makedir(self, tarinfo, targetpath):
2185 """Make a directory called targetpath.
2186 """
2187 try:
Lars Gustäbel0192e432008-02-05 11:51:40 +00002188 # Use a safe mode for the directory, the real mode is set
2189 # later in _extract_member().
2190 os.mkdir(targetpath, 0700)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002191 except EnvironmentError, e:
2192 if e.errno != errno.EEXIST:
2193 raise
2194
2195 def makefile(self, tarinfo, targetpath):
2196 """Make a file called targetpath.
2197 """
2198 source = self.extractfile(tarinfo)
Andrew Svetlovac26a2e2012-11-29 14:22:26 +02002199 try:
2200 with bltn_open(targetpath, "wb") as target:
2201 copyfileobj(source, target)
2202 finally:
2203 source.close()
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002204
2205 def makeunknown(self, tarinfo, targetpath):
2206 """Make a file from a TarInfo object with an unknown type
2207 at targetpath.
2208 """
2209 self.makefile(tarinfo, targetpath)
2210 self._dbg(1, "tarfile: Unknown file type %r, " \
2211 "extracted as regular file." % tarinfo.type)
2212
2213 def makefifo(self, tarinfo, targetpath):
2214 """Make a fifo called targetpath.
2215 """
2216 if hasattr(os, "mkfifo"):
2217 os.mkfifo(targetpath)
2218 else:
Georg Brandle4751e32006-05-18 06:11:19 +00002219 raise ExtractError("fifo not supported by system")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002220
2221 def makedev(self, tarinfo, targetpath):
2222 """Make a character or block device called targetpath.
2223 """
2224 if not hasattr(os, "mknod") or not hasattr(os, "makedev"):
Georg Brandle4751e32006-05-18 06:11:19 +00002225 raise ExtractError("special devices not supported by system")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002226
2227 mode = tarinfo.mode
2228 if tarinfo.isblk():
2229 mode |= stat.S_IFBLK
2230 else:
2231 mode |= stat.S_IFCHR
2232
2233 os.mknod(targetpath, mode,
2234 os.makedev(tarinfo.devmajor, tarinfo.devminor))
2235
2236 def makelink(self, tarinfo, targetpath):
2237 """Make a (symbolic) link called targetpath. If it cannot be created
2238 (platform limitation), we try to make a copy of the referenced file
2239 instead of a link.
2240 """
Lars Gustäbel4da7d412010-06-03 12:34:14 +00002241 if hasattr(os, "symlink") and hasattr(os, "link"):
2242 # For systems that support symbolic and hard links.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002243 if tarinfo.issym():
Senthil Kumaran4dd89ce2011-05-17 10:12:18 +08002244 if os.path.lexists(targetpath):
Senthil Kumaran011525e2011-04-28 15:30:31 +08002245 os.unlink(targetpath)
Lars Gustäbelf7cda522009-08-28 19:23:44 +00002246 os.symlink(tarinfo.linkname, targetpath)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002247 else:
Neal Norwitza4f651a2004-07-20 22:07:44 +00002248 # See extract().
Lars Gustäbel4da7d412010-06-03 12:34:14 +00002249 if os.path.exists(tarinfo._link_target):
Senthil Kumaran4dd89ce2011-05-17 10:12:18 +08002250 if os.path.lexists(targetpath):
2251 os.unlink(targetpath)
Lars Gustäbel4da7d412010-06-03 12:34:14 +00002252 os.link(tarinfo._link_target, targetpath)
2253 else:
2254 self._extract_member(self._find_link_target(tarinfo), targetpath)
2255 else:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002256 try:
Lars Gustäbel4da7d412010-06-03 12:34:14 +00002257 self._extract_member(self._find_link_target(tarinfo), targetpath)
2258 except KeyError:
2259 raise ExtractError("unable to resolve link inside archive")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002260
2261 def chown(self, tarinfo, targetpath):
2262 """Set owner of targetpath according to tarinfo.
2263 """
2264 if pwd and hasattr(os, "geteuid") and os.geteuid() == 0:
2265 # We have to be root to do so.
2266 try:
2267 g = grp.getgrnam(tarinfo.gname)[2]
2268 except KeyError:
Lars Gustäbel8babfdf2011-09-05 17:04:18 +02002269 g = tarinfo.gid
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002270 try:
2271 u = pwd.getpwnam(tarinfo.uname)[2]
2272 except KeyError:
Lars Gustäbel8babfdf2011-09-05 17:04:18 +02002273 u = tarinfo.uid
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002274 try:
2275 if tarinfo.issym() and hasattr(os, "lchown"):
2276 os.lchown(targetpath, u, g)
2277 else:
Andrew MacIntyre7970d202003-02-19 12:51:34 +00002278 if sys.platform != "os2emx":
2279 os.chown(targetpath, u, g)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002280 except EnvironmentError, e:
Georg Brandle4751e32006-05-18 06:11:19 +00002281 raise ExtractError("could not change owner")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002282
2283 def chmod(self, tarinfo, targetpath):
2284 """Set file permissions of targetpath according to tarinfo.
2285 """
Jack Jansen834eff62003-03-07 12:47:06 +00002286 if hasattr(os, 'chmod'):
2287 try:
2288 os.chmod(targetpath, tarinfo.mode)
2289 except EnvironmentError, e:
Georg Brandle4751e32006-05-18 06:11:19 +00002290 raise ExtractError("could not change mode")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002291
2292 def utime(self, tarinfo, targetpath):
2293 """Set modification time of targetpath according to tarinfo.
2294 """
Jack Jansen834eff62003-03-07 12:47:06 +00002295 if not hasattr(os, 'utime'):
Tim Petersf9347782003-03-07 15:36:41 +00002296 return
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002297 try:
2298 os.utime(targetpath, (tarinfo.mtime, tarinfo.mtime))
2299 except EnvironmentError, e:
Georg Brandle4751e32006-05-18 06:11:19 +00002300 raise ExtractError("could not change modification time")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002301
2302 #--------------------------------------------------------------------------
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002303 def next(self):
2304 """Return the next member of the archive as a TarInfo object, when
2305 TarFile is opened for reading. Return None if there is no more
2306 available.
2307 """
2308 self._check("ra")
2309 if self.firstmember is not None:
2310 m = self.firstmember
2311 self.firstmember = None
2312 return m
2313
2314 # Read the next block.
Andrew M. Kuchling864bba12004-07-10 22:02:11 +00002315 self.fileobj.seek(self.offset)
Lars Gustäbeldd866d52009-11-22 18:30:53 +00002316 tarinfo = None
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002317 while True:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002318 try:
Lars Gustäbelc64e4022007-03-13 10:47:19 +00002319 tarinfo = self.tarinfo.fromtarfile(self)
Lars Gustäbeldd866d52009-11-22 18:30:53 +00002320 except EOFHeaderError, e:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002321 if self.ignore_zeros:
Georg Brandlebbeed72006-12-19 22:06:46 +00002322 self._dbg(2, "0x%X: %s" % (self.offset, e))
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002323 self.offset += BLOCKSIZE
2324 continue
Lars Gustäbeldd866d52009-11-22 18:30:53 +00002325 except InvalidHeaderError, e:
2326 if self.ignore_zeros:
2327 self._dbg(2, "0x%X: %s" % (self.offset, e))
2328 self.offset += BLOCKSIZE
2329 continue
2330 elif self.offset == 0:
2331 raise ReadError(str(e))
2332 except EmptyHeaderError:
2333 if self.offset == 0:
2334 raise ReadError("empty file")
2335 except TruncatedHeaderError, e:
2336 if self.offset == 0:
2337 raise ReadError(str(e))
2338 except SubsequentHeaderError, e:
2339 raise ReadError(str(e))
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002340 break
2341
Lars Gustäbeldd866d52009-11-22 18:30:53 +00002342 if tarinfo is not None:
2343 self.members.append(tarinfo)
2344 else:
2345 self._loaded = True
2346
Georg Brandl38c6a222006-05-10 16:26:03 +00002347 return tarinfo
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002348
2349 #--------------------------------------------------------------------------
2350 # Little helper methods:
2351
Lars Gustäbel4da7d412010-06-03 12:34:14 +00002352 def _getmember(self, name, tarinfo=None, normalize=False):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002353 """Find an archive member by name from bottom to top.
2354 If tarinfo is given, it is used as the starting point.
2355 """
Martin v. Löwisf3c56112004-09-18 09:08:52 +00002356 # Ensure that all members have been loaded.
2357 members = self.getmembers()
2358
Lars Gustäbel4da7d412010-06-03 12:34:14 +00002359 # Limit the member search list up to tarinfo.
2360 if tarinfo is not None:
2361 members = members[:members.index(tarinfo)]
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002362
Lars Gustäbel4da7d412010-06-03 12:34:14 +00002363 if normalize:
2364 name = os.path.normpath(name)
2365
2366 for member in reversed(members):
2367 if normalize:
2368 member_name = os.path.normpath(member.name)
2369 else:
2370 member_name = member.name
2371
2372 if name == member_name:
2373 return member
Andrew M. Kuchling864bba12004-07-10 22:02:11 +00002374
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002375 def _load(self):
2376 """Read through the entire archive file and look for readable
2377 members.
2378 """
2379 while True:
2380 tarinfo = self.next()
2381 if tarinfo is None:
2382 break
2383 self._loaded = True
2384
2385 def _check(self, mode=None):
2386 """Check if TarFile is still open, and if the operation's mode
2387 corresponds to TarFile's mode.
2388 """
2389 if self.closed:
Georg Brandle4751e32006-05-18 06:11:19 +00002390 raise IOError("%s is closed" % self.__class__.__name__)
Lars Gustäbelc64e4022007-03-13 10:47:19 +00002391 if mode is not None and self.mode not in mode:
2392 raise IOError("bad operation for mode %r" % self.mode)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002393
Lars Gustäbel4da7d412010-06-03 12:34:14 +00002394 def _find_link_target(self, tarinfo):
2395 """Find the target member of a symlink or hardlink member in the
2396 archive.
2397 """
2398 if tarinfo.issym():
2399 # Always search the entire archive.
Lars Gustäbel231d4742012-04-24 22:42:08 +02002400 linkname = "/".join(filter(None, (os.path.dirname(tarinfo.name), tarinfo.linkname)))
Lars Gustäbel4da7d412010-06-03 12:34:14 +00002401 limit = None
2402 else:
2403 # Search the archive before the link, because a hard link is
2404 # just a reference to an already archived file.
2405 linkname = tarinfo.linkname
2406 limit = tarinfo
2407
2408 member = self._getmember(linkname, tarinfo=limit, normalize=True)
2409 if member is None:
2410 raise KeyError("linkname %r not found" % linkname)
2411 return member
2412
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002413 def __iter__(self):
2414 """Provide an iterator object.
2415 """
2416 if self._loaded:
2417 return iter(self.members)
2418 else:
2419 return TarIter(self)
2420
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002421 def _dbg(self, level, msg):
2422 """Write debugging output to sys.stderr.
2423 """
2424 if level <= self.debug:
2425 print >> sys.stderr, msg
Lars Gustäbel64581042010-03-03 11:55:48 +00002426
2427 def __enter__(self):
2428 self._check()
2429 return self
2430
2431 def __exit__(self, type, value, traceback):
2432 if type is None:
2433 self.close()
2434 else:
2435 # An exception occurred. We must not call close() because
2436 # it would try to write end-of-archive blocks and padding.
2437 if not self._extfileobj:
2438 self.fileobj.close()
2439 self.closed = True
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002440# class TarFile
2441
2442class TarIter:
2443 """Iterator Class.
2444
2445 for tarinfo in TarFile(...):
2446 suite...
2447 """
2448
2449 def __init__(self, tarfile):
2450 """Construct a TarIter object.
2451 """
2452 self.tarfile = tarfile
Martin v. Löwis637431b2005-03-03 23:12:42 +00002453 self.index = 0
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002454 def __iter__(self):
2455 """Return iterator object.
2456 """
2457 return self
2458 def next(self):
2459 """Return the next item using TarFile's next() method.
2460 When all members have been read, set TarFile as _loaded.
2461 """
Martin v. Löwis637431b2005-03-03 23:12:42 +00002462 # Fix for SF #1100429: Under rare circumstances it can
2463 # happen that getmembers() is called during iteration,
2464 # which will cause TarIter to stop prematurely.
Serhiy Storchakace34ba62013-05-09 14:22:05 +03002465
2466 if self.index == 0 and self.tarfile.firstmember is not None:
2467 tarinfo = self.tarfile.next()
2468 elif self.index < len(self.tarfile.members):
2469 tarinfo = self.tarfile.members[self.index]
2470 elif not self.tarfile._loaded:
Martin v. Löwis637431b2005-03-03 23:12:42 +00002471 tarinfo = self.tarfile.next()
2472 if not tarinfo:
2473 self.tarfile._loaded = True
2474 raise StopIteration
2475 else:
Serhiy Storchakace34ba62013-05-09 14:22:05 +03002476 raise StopIteration
Martin v. Löwis637431b2005-03-03 23:12:42 +00002477 self.index += 1
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002478 return tarinfo
2479
2480# Helper classes for sparse file support
2481class _section:
2482 """Base class for _data and _hole.
2483 """
2484 def __init__(self, offset, size):
2485 self.offset = offset
2486 self.size = size
2487 def __contains__(self, offset):
2488 return self.offset <= offset < self.offset + self.size
2489
2490class _data(_section):
2491 """Represent a data section in a sparse file.
2492 """
2493 def __init__(self, offset, size, realpos):
2494 _section.__init__(self, offset, size)
2495 self.realpos = realpos
2496
2497class _hole(_section):
2498 """Represent a hole section in a sparse file.
2499 """
2500 pass
2501
2502class _ringbuffer(list):
2503 """Ringbuffer class which increases performance
2504 over a regular list.
2505 """
2506 def __init__(self):
2507 self.idx = 0
2508 def find(self, offset):
2509 idx = self.idx
2510 while True:
2511 item = self[idx]
2512 if offset in item:
2513 break
2514 idx += 1
2515 if idx == len(self):
2516 idx = 0
2517 if idx == self.idx:
2518 # End of File
2519 return None
2520 self.idx = idx
2521 return item
2522
2523#---------------------------------------------
2524# zipfile compatible TarFile class
2525#---------------------------------------------
2526TAR_PLAIN = 0 # zipfile.ZIP_STORED
2527TAR_GZIPPED = 8 # zipfile.ZIP_DEFLATED
2528class TarFileCompat:
2529 """TarFile class compatible with standard module zipfile's
2530 ZipFile class.
2531 """
2532 def __init__(self, file, mode="r", compression=TAR_PLAIN):
Lars Gustäbel727bd0b2008-08-02 11:26:39 +00002533 from warnings import warnpy3k
2534 warnpy3k("the TarFileCompat class has been removed in Python 3.0",
2535 stacklevel=2)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002536 if compression == TAR_PLAIN:
2537 self.tarfile = TarFile.taropen(file, mode)
2538 elif compression == TAR_GZIPPED:
2539 self.tarfile = TarFile.gzopen(file, mode)
2540 else:
Georg Brandle4751e32006-05-18 06:11:19 +00002541 raise ValueError("unknown compression constant")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002542 if mode[0:1] == "r":
2543 members = self.tarfile.getmembers()
Raymond Hettingera1d09e22005-09-11 16:34:05 +00002544 for m in members:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002545 m.filename = m.name
2546 m.file_size = m.size
2547 m.date_time = time.gmtime(m.mtime)[:6]
2548 def namelist(self):
2549 return map(lambda m: m.name, self.infolist())
2550 def infolist(self):
2551 return filter(lambda m: m.type in REGULAR_TYPES,
2552 self.tarfile.getmembers())
2553 def printdir(self):
2554 self.tarfile.list()
2555 def testzip(self):
2556 return
2557 def getinfo(self, name):
2558 return self.tarfile.getmember(name)
2559 def read(self, name):
2560 return self.tarfile.extractfile(self.tarfile.getmember(name)).read()
2561 def write(self, filename, arcname=None, compress_type=None):
2562 self.tarfile.add(filename, arcname)
2563 def writestr(self, zinfo, bytes):
Raymond Hettingera6172712004-12-31 19:15:26 +00002564 try:
2565 from cStringIO import StringIO
2566 except ImportError:
2567 from StringIO import StringIO
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002568 import calendar
Lars Gustäbel727bd0b2008-08-02 11:26:39 +00002569 tinfo = TarInfo(zinfo.filename)
2570 tinfo.size = len(bytes)
2571 tinfo.mtime = calendar.timegm(zinfo.date_time)
2572 self.tarfile.addfile(tinfo, StringIO(bytes))
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002573 def close(self):
2574 self.tarfile.close()
2575#class TarFileCompat
2576
2577#--------------------
2578# exported functions
2579#--------------------
2580def is_tarfile(name):
2581 """Return True if name points to a tar archive that we
2582 are able to handle, else return False.
2583 """
2584 try:
2585 t = open(name)
2586 t.close()
2587 return True
2588 except TarError:
2589 return False
2590
Brett Cannon6cef0762007-05-25 20:17:15 +00002591bltn_open = open
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002592open = TarFile.open