blob: bc0b09fa88cd29e0c2884008e96070eaa3d373ce [file] [log] [blame]
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001# -*- coding: iso-8859-1 -*-
2#-------------------------------------------------------------------
3# tarfile.py
4#-------------------------------------------------------------------
5# Copyright (C) 2002 Lars Gustäbel <lars@gustaebel.de>
6# All rights reserved.
7#
8# Permission is hereby granted, free of charge, to any person
9# obtaining a copy of this software and associated documentation
10# files (the "Software"), to deal in the Software without
11# restriction, including without limitation the rights to use,
12# copy, modify, merge, publish, distribute, sublicense, and/or sell
13# copies of the Software, and to permit persons to whom the
14# Software is furnished to do so, subject to the following
15# conditions:
16#
17# The above copyright notice and this permission notice shall be
18# included in all copies or substantial portions of the Software.
19#
20# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
21# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
22# OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
23# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
24# HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
25# WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
26# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
27# OTHER DEALINGS IN THE SOFTWARE.
28#
29"""Read from and write to tar format archives.
30"""
31
Senthil Kumaran4af1c6a2011-07-28 22:30:27 +080032__version__ = "$Revision: 85213 $"
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +000033# $Source$
34
Lars Gustäbelc64e4022007-03-13 10:47:19 +000035version = "0.9.0"
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +000036__author__ = "Lars Gustäbel (lars@gustaebel.de)"
37__date__ = "$Date$"
38__cvsid__ = "$Id$"
39__credits__ = "Gustavo Niemeyer, Niels Gustäbel, Richard Townsend."
40
41#---------
42# Imports
43#---------
Serhiy Storchaka205408d2015-03-11 17:31:59 +020044from __builtin__ import open as bltn_open
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +000045import sys
46import os
47import shutil
48import stat
49import errno
50import time
51import struct
Georg Brandl3354f282006-10-29 09:16:12 +000052import copy
Lars Gustäbelc64e4022007-03-13 10:47:19 +000053import re
Brett Cannon132fc542008-08-04 21:23:07 +000054import operator
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +000055
56try:
57 import grp, pwd
58except ImportError:
59 grp = pwd = None
60
61# from tarfile import *
62__all__ = ["TarFile", "TarInfo", "is_tarfile", "TarError"]
63
64#---------------------------------------------------------
65# tar constants
66#---------------------------------------------------------
Lars Gustäbelc64e4022007-03-13 10:47:19 +000067NUL = "\0" # the null character
68BLOCKSIZE = 512 # length of processing blocks
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +000069RECORDSIZE = BLOCKSIZE * 20 # length of records
Lars Gustäbelc64e4022007-03-13 10:47:19 +000070GNU_MAGIC = "ustar \0" # magic gnu tar string
71POSIX_MAGIC = "ustar\x0000" # magic posix tar string
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +000072
Lars Gustäbelc64e4022007-03-13 10:47:19 +000073LENGTH_NAME = 100 # maximum length of a filename
74LENGTH_LINK = 100 # maximum length of a linkname
75LENGTH_PREFIX = 155 # maximum length of the prefix field
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +000076
Lars Gustäbelc64e4022007-03-13 10:47:19 +000077REGTYPE = "0" # regular file
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +000078AREGTYPE = "\0" # regular file
Lars Gustäbelc64e4022007-03-13 10:47:19 +000079LNKTYPE = "1" # link (inside tarfile)
80SYMTYPE = "2" # symbolic link
81CHRTYPE = "3" # character special device
82BLKTYPE = "4" # block special device
83DIRTYPE = "5" # directory
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +000084FIFOTYPE = "6" # fifo special device
85CONTTYPE = "7" # contiguous file
86
Lars Gustäbelc64e4022007-03-13 10:47:19 +000087GNUTYPE_LONGNAME = "L" # GNU tar longname
88GNUTYPE_LONGLINK = "K" # GNU tar longlink
89GNUTYPE_SPARSE = "S" # GNU tar sparse file
90
91XHDTYPE = "x" # POSIX.1-2001 extended header
92XGLTYPE = "g" # POSIX.1-2001 global header
93SOLARIS_XHDTYPE = "X" # Solaris extended header
94
95USTAR_FORMAT = 0 # POSIX.1-1988 (ustar) format
96GNU_FORMAT = 1 # GNU tar format
97PAX_FORMAT = 2 # POSIX.1-2001 (pax) format
98DEFAULT_FORMAT = GNU_FORMAT
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +000099
100#---------------------------------------------------------
101# tarfile constants
102#---------------------------------------------------------
Lars Gustäbelc64e4022007-03-13 10:47:19 +0000103# File types that tarfile supports:
104SUPPORTED_TYPES = (REGTYPE, AREGTYPE, LNKTYPE,
105 SYMTYPE, DIRTYPE, FIFOTYPE,
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000106 CONTTYPE, CHRTYPE, BLKTYPE,
107 GNUTYPE_LONGNAME, GNUTYPE_LONGLINK,
108 GNUTYPE_SPARSE)
109
Lars Gustäbelc64e4022007-03-13 10:47:19 +0000110# File types that will be treated as a regular file.
111REGULAR_TYPES = (REGTYPE, AREGTYPE,
112 CONTTYPE, GNUTYPE_SPARSE)
113
114# File types that are part of the GNU tar format.
115GNU_TYPES = (GNUTYPE_LONGNAME, GNUTYPE_LONGLINK,
116 GNUTYPE_SPARSE)
117
118# Fields from a pax header that override a TarInfo attribute.
119PAX_FIELDS = ("path", "linkpath", "size", "mtime",
120 "uid", "gid", "uname", "gname")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000121
Lars Gustäbela0fcb932007-05-27 19:49:30 +0000122# Fields in a pax header that are numbers, all other fields
123# are treated as strings.
124PAX_NUMBER_FIELDS = {
125 "atime": float,
126 "ctime": float,
127 "mtime": float,
128 "uid": int,
129 "gid": int,
130 "size": int
131}
132
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000133#---------------------------------------------------------
134# Bits used in the mode field, values in octal.
135#---------------------------------------------------------
136S_IFLNK = 0120000 # symbolic link
137S_IFREG = 0100000 # regular file
138S_IFBLK = 0060000 # block device
139S_IFDIR = 0040000 # directory
140S_IFCHR = 0020000 # character device
141S_IFIFO = 0010000 # fifo
142
143TSUID = 04000 # set UID on execution
144TSGID = 02000 # set GID on execution
145TSVTX = 01000 # reserved
146
147TUREAD = 0400 # read by owner
148TUWRITE = 0200 # write by owner
149TUEXEC = 0100 # execute/search by owner
150TGREAD = 0040 # read by group
151TGWRITE = 0020 # write by group
152TGEXEC = 0010 # execute/search by group
153TOREAD = 0004 # read by other
154TOWRITE = 0002 # write by other
155TOEXEC = 0001 # execute/search by other
156
157#---------------------------------------------------------
Lars Gustäbelc64e4022007-03-13 10:47:19 +0000158# initialization
159#---------------------------------------------------------
160ENCODING = sys.getfilesystemencoding()
161if ENCODING is None:
Lars Gustäbela0fcb932007-05-27 19:49:30 +0000162 ENCODING = sys.getdefaultencoding()
Lars Gustäbelc64e4022007-03-13 10:47:19 +0000163
164#---------------------------------------------------------
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000165# Some useful functions
166#---------------------------------------------------------
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000167
Georg Brandl38c6a222006-05-10 16:26:03 +0000168def stn(s, length):
169 """Convert a python string to a null-terminated string buffer.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000170 """
Georg Brandla32e0a02006-10-24 16:54:16 +0000171 return s[:length] + (length - len(s)) * NUL
Georg Brandl38c6a222006-05-10 16:26:03 +0000172
Lars Gustäbelc64e4022007-03-13 10:47:19 +0000173def nts(s):
174 """Convert a null-terminated string field to a python string.
175 """
176 # Use the string up to the first null char.
177 p = s.find("\0")
178 if p == -1:
179 return s
180 return s[:p]
181
Georg Brandl38c6a222006-05-10 16:26:03 +0000182def nti(s):
183 """Convert a number field to a python number.
184 """
185 # There are two possible encodings for a number field, see
186 # itn() below.
187 if s[0] != chr(0200):
Georg Brandlded1c4d2006-12-20 11:55:16 +0000188 try:
Lars Gustäbelc64e4022007-03-13 10:47:19 +0000189 n = int(nts(s) or "0", 8)
Georg Brandlded1c4d2006-12-20 11:55:16 +0000190 except ValueError:
Lars Gustäbeldd866d52009-11-22 18:30:53 +0000191 raise InvalidHeaderError("invalid header")
Georg Brandl38c6a222006-05-10 16:26:03 +0000192 else:
193 n = 0L
194 for i in xrange(len(s) - 1):
195 n <<= 8
196 n += ord(s[i + 1])
197 return n
198
Lars Gustäbelc64e4022007-03-13 10:47:19 +0000199def itn(n, digits=8, format=DEFAULT_FORMAT):
Georg Brandl38c6a222006-05-10 16:26:03 +0000200 """Convert a python number to a number field.
201 """
202 # POSIX 1003.1-1988 requires numbers to be encoded as a string of
203 # octal digits followed by a null-byte, this allows values up to
204 # (8**(digits-1))-1. GNU tar allows storing numbers greater than
205 # that if necessary. A leading 0200 byte indicates this particular
206 # encoding, the following digits-1 bytes are a big-endian
207 # representation. This allows values up to (256**(digits-1))-1.
208 if 0 <= n < 8 ** (digits - 1):
209 s = "%0*o" % (digits - 1, n) + NUL
210 else:
Lars Gustäbelc64e4022007-03-13 10:47:19 +0000211 if format != GNU_FORMAT or n >= 256 ** (digits - 1):
Georg Brandle4751e32006-05-18 06:11:19 +0000212 raise ValueError("overflow in number field")
Georg Brandl38c6a222006-05-10 16:26:03 +0000213
214 if n < 0:
215 # XXX We mimic GNU tar's behaviour with negative numbers,
216 # this could raise OverflowError.
217 n = struct.unpack("L", struct.pack("l", n))[0]
218
219 s = ""
220 for i in xrange(digits - 1):
221 s = chr(n & 0377) + s
222 n >>= 8
223 s = chr(0200) + s
224 return s
225
Lars Gustäbela0fcb932007-05-27 19:49:30 +0000226def uts(s, encoding, errors):
227 """Convert a unicode object to a string.
228 """
229 if errors == "utf-8":
230 # An extra error handler similar to the -o invalid=UTF-8 option
231 # in POSIX.1-2001. Replace untranslatable characters with their
232 # UTF-8 representation.
233 try:
234 return s.encode(encoding, "strict")
235 except UnicodeEncodeError:
236 x = []
237 for c in s:
238 try:
239 x.append(c.encode(encoding, "strict"))
240 except UnicodeEncodeError:
241 x.append(c.encode("utf8"))
242 return "".join(x)
243 else:
244 return s.encode(encoding, errors)
245
Georg Brandl38c6a222006-05-10 16:26:03 +0000246def calc_chksums(buf):
247 """Calculate the checksum for a member's header by summing up all
248 characters except for the chksum field which is treated as if
249 it was filled with spaces. According to the GNU tar sources,
250 some tars (Sun and NeXT) calculate chksum with signed char,
251 which will be different if there are chars in the buffer with
252 the high bit set. So we calculate two checksums, unsigned and
253 signed.
254 """
255 unsigned_chksum = 256 + sum(struct.unpack("148B", buf[:148]) + struct.unpack("356B", buf[156:512]))
256 signed_chksum = 256 + sum(struct.unpack("148b", buf[:148]) + struct.unpack("356b", buf[156:512]))
257 return unsigned_chksum, signed_chksum
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000258
259def copyfileobj(src, dst, length=None):
260 """Copy length bytes from fileobj src to fileobj dst.
261 If length is None, copy the entire content.
262 """
263 if length == 0:
264 return
265 if length is None:
266 shutil.copyfileobj(src, dst)
267 return
268
269 BUFSIZE = 16 * 1024
270 blocks, remainder = divmod(length, BUFSIZE)
271 for b in xrange(blocks):
272 buf = src.read(BUFSIZE)
273 if len(buf) < BUFSIZE:
Georg Brandle4751e32006-05-18 06:11:19 +0000274 raise IOError("end of file reached")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000275 dst.write(buf)
276
277 if remainder != 0:
278 buf = src.read(remainder)
279 if len(buf) < remainder:
Georg Brandle4751e32006-05-18 06:11:19 +0000280 raise IOError("end of file reached")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000281 dst.write(buf)
282 return
283
284filemode_table = (
Andrew M. Kuchling8bc462f2004-10-20 11:48:42 +0000285 ((S_IFLNK, "l"),
286 (S_IFREG, "-"),
287 (S_IFBLK, "b"),
288 (S_IFDIR, "d"),
289 (S_IFCHR, "c"),
290 (S_IFIFO, "p")),
291
292 ((TUREAD, "r"),),
293 ((TUWRITE, "w"),),
294 ((TUEXEC|TSUID, "s"),
295 (TSUID, "S"),
296 (TUEXEC, "x")),
297
298 ((TGREAD, "r"),),
299 ((TGWRITE, "w"),),
300 ((TGEXEC|TSGID, "s"),
301 (TSGID, "S"),
302 (TGEXEC, "x")),
303
304 ((TOREAD, "r"),),
305 ((TOWRITE, "w"),),
306 ((TOEXEC|TSVTX, "t"),
307 (TSVTX, "T"),
308 (TOEXEC, "x"))
309)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000310
311def filemode(mode):
312 """Convert a file's mode to a string of the form
313 -rwxrwxrwx.
314 Used by TarFile.list()
315 """
Andrew M. Kuchling8bc462f2004-10-20 11:48:42 +0000316 perm = []
317 for table in filemode_table:
318 for bit, char in table:
319 if mode & bit == bit:
320 perm.append(char)
321 break
322 else:
323 perm.append("-")
324 return "".join(perm)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000325
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000326class TarError(Exception):
327 """Base exception."""
328 pass
329class ExtractError(TarError):
330 """General exception for extract errors."""
331 pass
332class ReadError(TarError):
Ezio Melottif5469cf2013-08-17 15:43:51 +0300333 """Exception for unreadable tar archives."""
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000334 pass
335class CompressionError(TarError):
336 """Exception for unavailable compression methods."""
337 pass
338class StreamError(TarError):
339 """Exception for unsupported operations on stream-like TarFiles."""
340 pass
Georg Brandlebbeed72006-12-19 22:06:46 +0000341class HeaderError(TarError):
Lars Gustäbeldd866d52009-11-22 18:30:53 +0000342 """Base exception for header errors."""
343 pass
344class EmptyHeaderError(HeaderError):
345 """Exception for empty headers."""
346 pass
347class TruncatedHeaderError(HeaderError):
348 """Exception for truncated headers."""
349 pass
350class EOFHeaderError(HeaderError):
351 """Exception for end of file headers."""
352 pass
353class InvalidHeaderError(HeaderError):
Georg Brandlebbeed72006-12-19 22:06:46 +0000354 """Exception for invalid headers."""
355 pass
Lars Gustäbeldd866d52009-11-22 18:30:53 +0000356class SubsequentHeaderError(HeaderError):
357 """Exception for missing and invalid extended headers."""
358 pass
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000359
360#---------------------------
361# internal stream interface
362#---------------------------
363class _LowLevelFile:
364 """Low-level file object. Supports reading and writing.
365 It is used instead of a regular file object for streaming
366 access.
367 """
368
369 def __init__(self, name, mode):
370 mode = {
371 "r": os.O_RDONLY,
372 "w": os.O_WRONLY | os.O_CREAT | os.O_TRUNC,
373 }[mode]
374 if hasattr(os, "O_BINARY"):
375 mode |= os.O_BINARY
Lars Gustäbel5c4c4612010-04-29 15:23:38 +0000376 self.fd = os.open(name, mode, 0666)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000377
378 def close(self):
379 os.close(self.fd)
380
381 def read(self, size):
382 return os.read(self.fd, size)
383
384 def write(self, s):
385 os.write(self.fd, s)
386
387class _Stream:
388 """Class that serves as an adapter between TarFile and
389 a stream-like object. The stream-like object only
390 needs to have a read() or write() method and is accessed
391 blockwise. Use of gzip or bzip2 compression is possible.
392 A stream-like object could be for example: sys.stdin,
393 sys.stdout, a socket, a tape device etc.
394
395 _Stream is intended to be used only internally.
396 """
397
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000398 def __init__(self, name, mode, comptype, fileobj, bufsize):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000399 """Construct a _Stream object.
400 """
401 self._extfileobj = True
402 if fileobj is None:
403 fileobj = _LowLevelFile(name, mode)
404 self._extfileobj = False
405
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000406 if comptype == '*':
407 # Enable transparent compression detection for the
408 # stream interface
409 fileobj = _StreamProxy(fileobj)
410 comptype = fileobj.getcomptype()
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000411
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000412 self.name = name or ""
413 self.mode = mode
414 self.comptype = comptype
415 self.fileobj = fileobj
416 self.bufsize = bufsize
417 self.buf = ""
418 self.pos = 0L
419 self.closed = False
420
Benjamin Peterson7fd59e02014-08-27 20:31:21 -0400421 try:
422 if comptype == "gz":
423 try:
424 import zlib
425 except ImportError:
426 raise CompressionError("zlib module is not available")
427 self.zlib = zlib
428 self.crc = zlib.crc32("") & 0xffffffffL
429 if mode == "r":
430 self._init_read_gz()
431 else:
432 self._init_write_gz()
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000433
Benjamin Peterson7fd59e02014-08-27 20:31:21 -0400434 elif comptype == "bz2":
435 try:
436 import bz2
437 except ImportError:
438 raise CompressionError("bz2 module is not available")
439 if mode == "r":
440 self.dbuf = ""
441 self.cmp = bz2.BZ2Decompressor()
442 else:
443 self.cmp = bz2.BZ2Compressor()
444 except:
445 if not self._extfileobj:
446 self.fileobj.close()
447 self.closed = True
448 raise
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000449
450 def __del__(self):
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000451 if hasattr(self, "closed") and not self.closed:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000452 self.close()
453
454 def _init_write_gz(self):
455 """Initialize for writing with gzip compression.
456 """
457 self.cmp = self.zlib.compressobj(9, self.zlib.DEFLATED,
458 -self.zlib.MAX_WBITS,
459 self.zlib.DEF_MEM_LEVEL,
460 0)
461 timestamp = struct.pack("<L", long(time.time()))
462 self.__write("\037\213\010\010%s\002\377" % timestamp)
Lars Gustäbel7d4d0742011-12-21 19:27:50 +0100463 if type(self.name) is unicode:
464 self.name = self.name.encode("iso-8859-1", "replace")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000465 if self.name.endswith(".gz"):
466 self.name = self.name[:-3]
467 self.__write(self.name + NUL)
468
469 def write(self, s):
470 """Write string s to the stream.
471 """
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000472 if self.comptype == "gz":
Gregory P. Smith88440962008-03-25 06:12:45 +0000473 self.crc = self.zlib.crc32(s, self.crc) & 0xffffffffL
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000474 self.pos += len(s)
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000475 if self.comptype != "tar":
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000476 s = self.cmp.compress(s)
477 self.__write(s)
478
479 def __write(self, s):
480 """Write string s to the stream if a whole new block
481 is ready to be written.
482 """
483 self.buf += s
484 while len(self.buf) > self.bufsize:
485 self.fileobj.write(self.buf[:self.bufsize])
486 self.buf = self.buf[self.bufsize:]
487
488 def close(self):
489 """Close the _Stream object. No operation should be
490 done on it afterwards.
491 """
492 if self.closed:
493 return
494
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000495 if self.mode == "w" and self.comptype != "tar":
Martin v. Löwisc234a522004-08-22 21:28:33 +0000496 self.buf += self.cmp.flush()
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000497
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000498 if self.mode == "w" and self.buf:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000499 self.fileobj.write(self.buf)
500 self.buf = ""
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000501 if self.comptype == "gz":
Tim Petersa05f6e22006-08-02 05:20:08 +0000502 # The native zlib crc is an unsigned 32-bit integer, but
503 # the Python wrapper implicitly casts that to a signed C
504 # long. So, on a 32-bit box self.crc may "look negative",
505 # while the same crc on a 64-bit box may "look positive".
506 # To avoid irksome warnings from the `struct` module, force
507 # it to look positive on all boxes.
508 self.fileobj.write(struct.pack("<L", self.crc & 0xffffffffL))
Andrew M. Kuchling10a44492003-10-24 17:38:34 +0000509 self.fileobj.write(struct.pack("<L", self.pos & 0xffffFFFFL))
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000510
511 if not self._extfileobj:
512 self.fileobj.close()
513
514 self.closed = True
515
516 def _init_read_gz(self):
517 """Initialize for reading a gzip compressed fileobj.
518 """
519 self.cmp = self.zlib.decompressobj(-self.zlib.MAX_WBITS)
520 self.dbuf = ""
521
522 # taken from gzip.GzipFile with some alterations
523 if self.__read(2) != "\037\213":
Georg Brandle4751e32006-05-18 06:11:19 +0000524 raise ReadError("not a gzip file")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000525 if self.__read(1) != "\010":
Georg Brandle4751e32006-05-18 06:11:19 +0000526 raise CompressionError("unsupported compression method")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000527
528 flag = ord(self.__read(1))
529 self.__read(6)
530
531 if flag & 4:
532 xlen = ord(self.__read(1)) + 256 * ord(self.__read(1))
533 self.read(xlen)
534 if flag & 8:
535 while True:
536 s = self.__read(1)
537 if not s or s == NUL:
538 break
539 if flag & 16:
540 while True:
541 s = self.__read(1)
542 if not s or s == NUL:
543 break
544 if flag & 2:
545 self.__read(2)
546
547 def tell(self):
548 """Return the stream's file pointer position.
549 """
550 return self.pos
551
552 def seek(self, pos=0):
553 """Set the stream's file pointer to pos. Negative seeking
554 is forbidden.
555 """
556 if pos - self.pos >= 0:
557 blocks, remainder = divmod(pos - self.pos, self.bufsize)
558 for i in xrange(blocks):
559 self.read(self.bufsize)
560 self.read(remainder)
561 else:
Georg Brandle4751e32006-05-18 06:11:19 +0000562 raise StreamError("seeking backwards is not allowed")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000563 return self.pos
564
565 def read(self, size=None):
566 """Return the next size number of bytes from the stream.
567 If size is not defined, return all bytes of the stream
568 up to EOF.
569 """
570 if size is None:
571 t = []
572 while True:
573 buf = self._read(self.bufsize)
574 if not buf:
575 break
576 t.append(buf)
577 buf = "".join(t)
578 else:
579 buf = self._read(size)
580 self.pos += len(buf)
581 return buf
582
583 def _read(self, size):
584 """Return size bytes from the stream.
585 """
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000586 if self.comptype == "tar":
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000587 return self.__read(size)
588
589 c = len(self.dbuf)
590 t = [self.dbuf]
591 while c < size:
592 buf = self.__read(self.bufsize)
593 if not buf:
594 break
Lars Gustäbelc64e4022007-03-13 10:47:19 +0000595 try:
596 buf = self.cmp.decompress(buf)
597 except IOError:
598 raise ReadError("invalid compressed data")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000599 t.append(buf)
600 c += len(buf)
601 t = "".join(t)
602 self.dbuf = t[size:]
603 return t[:size]
604
605 def __read(self, size):
606 """Return size bytes from stream. If internal buffer is empty,
607 read another block from the stream.
608 """
609 c = len(self.buf)
610 t = [self.buf]
611 while c < size:
612 buf = self.fileobj.read(self.bufsize)
613 if not buf:
614 break
615 t.append(buf)
616 c += len(buf)
617 t = "".join(t)
618 self.buf = t[size:]
619 return t[:size]
620# class _Stream
621
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000622class _StreamProxy(object):
623 """Small proxy class that enables transparent compression
624 detection for the Stream interface (mode 'r|*').
625 """
626
627 def __init__(self, fileobj):
628 self.fileobj = fileobj
629 self.buf = self.fileobj.read(BLOCKSIZE)
630
631 def read(self, size):
632 self.read = self.fileobj.read
633 return self.buf
634
635 def getcomptype(self):
636 if self.buf.startswith("\037\213\010"):
637 return "gz"
Lars Gustäbel9a388632011-12-06 13:07:09 +0100638 if self.buf[0:3] == "BZh" and self.buf[4:10] == "1AY&SY":
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000639 return "bz2"
640 return "tar"
641
642 def close(self):
643 self.fileobj.close()
644# class StreamProxy
645
Georg Brandl49c8f4c2006-05-15 19:30:35 +0000646class _BZ2Proxy(object):
647 """Small proxy class that enables external file object
648 support for "r:bz2" and "w:bz2" modes. This is actually
649 a workaround for a limitation in bz2 module's BZ2File
650 class which (unlike gzip.GzipFile) has no support for
651 a file object argument.
652 """
653
654 blocksize = 16 * 1024
655
656 def __init__(self, fileobj, mode):
657 self.fileobj = fileobj
658 self.mode = mode
Lars Gustäbelc64e4022007-03-13 10:47:19 +0000659 self.name = getattr(self.fileobj, "name", None)
Georg Brandl49c8f4c2006-05-15 19:30:35 +0000660 self.init()
661
662 def init(self):
663 import bz2
664 self.pos = 0
665 if self.mode == "r":
666 self.bz2obj = bz2.BZ2Decompressor()
667 self.fileobj.seek(0)
668 self.buf = ""
669 else:
670 self.bz2obj = bz2.BZ2Compressor()
671
672 def read(self, size):
673 b = [self.buf]
674 x = len(self.buf)
675 while x < size:
Lars Gustäbel2020a592009-03-22 20:09:33 +0000676 raw = self.fileobj.read(self.blocksize)
677 if not raw:
Georg Brandl49c8f4c2006-05-15 19:30:35 +0000678 break
Lars Gustäbel2020a592009-03-22 20:09:33 +0000679 data = self.bz2obj.decompress(raw)
680 b.append(data)
Georg Brandl49c8f4c2006-05-15 19:30:35 +0000681 x += len(data)
682 self.buf = "".join(b)
683
684 buf = self.buf[:size]
685 self.buf = self.buf[size:]
686 self.pos += len(buf)
687 return buf
688
689 def seek(self, pos):
690 if pos < self.pos:
691 self.init()
692 self.read(pos - self.pos)
693
694 def tell(self):
695 return self.pos
696
697 def write(self, data):
698 self.pos += len(data)
699 raw = self.bz2obj.compress(data)
700 self.fileobj.write(raw)
701
702 def close(self):
703 if self.mode == "w":
704 raw = self.bz2obj.flush()
705 self.fileobj.write(raw)
Georg Brandl49c8f4c2006-05-15 19:30:35 +0000706# class _BZ2Proxy
707
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000708#------------------------
709# Extraction file object
710#------------------------
Lars Gustäbel6baa5022006-12-23 16:40:13 +0000711class _FileInFile(object):
712 """A thin wrapper around an existing file object that
713 provides a part of its data as an individual file
714 object.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000715 """
716
Lars Gustäbel6baa5022006-12-23 16:40:13 +0000717 def __init__(self, fileobj, offset, size, sparse=None):
718 self.fileobj = fileobj
719 self.offset = offset
720 self.size = size
721 self.sparse = sparse
722 self.position = 0
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000723
Lars Gustäbel6baa5022006-12-23 16:40:13 +0000724 def tell(self):
725 """Return the current file position.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000726 """
Lars Gustäbel6baa5022006-12-23 16:40:13 +0000727 return self.position
728
729 def seek(self, position):
730 """Seek to a position in the file.
731 """
732 self.position = position
733
734 def read(self, size=None):
735 """Read data from the file.
736 """
737 if size is None:
738 size = self.size - self.position
739 else:
740 size = min(size, self.size - self.position)
741
742 if self.sparse is None:
743 return self.readnormal(size)
744 else:
745 return self.readsparse(size)
746
747 def readnormal(self, size):
748 """Read operation for regular files.
749 """
750 self.fileobj.seek(self.offset + self.position)
751 self.position += size
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000752 return self.fileobj.read(size)
753
Lars Gustäbel6baa5022006-12-23 16:40:13 +0000754 def readsparse(self, size):
755 """Read operation for sparse files.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000756 """
Lars Gustäbel6baa5022006-12-23 16:40:13 +0000757 data = []
758 while size > 0:
759 buf = self.readsparsesection(size)
760 if not buf:
761 break
762 size -= len(buf)
763 data.append(buf)
764 return "".join(data)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000765
Lars Gustäbel6baa5022006-12-23 16:40:13 +0000766 def readsparsesection(self, size):
767 """Read a single section of a sparse file.
768 """
769 section = self.sparse.find(self.position)
770
771 if section is None:
772 return ""
773
774 size = min(size, section.offset + section.size - self.position)
775
776 if isinstance(section, _data):
777 realpos = section.realpos + self.position - section.offset
778 self.fileobj.seek(self.offset + realpos)
779 self.position += size
780 return self.fileobj.read(size)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000781 else:
Lars Gustäbel6baa5022006-12-23 16:40:13 +0000782 self.position += size
783 return NUL * size
784#class _FileInFile
785
786
787class ExFileObject(object):
788 """File-like object for reading an archive member.
789 Is returned by TarFile.extractfile().
790 """
791 blocksize = 1024
792
793 def __init__(self, tarfile, tarinfo):
794 self.fileobj = _FileInFile(tarfile.fileobj,
795 tarinfo.offset_data,
796 tarinfo.size,
797 getattr(tarinfo, "sparse", None))
798 self.name = tarinfo.name
799 self.mode = "r"
800 self.closed = False
801 self.size = tarinfo.size
802
803 self.position = 0
804 self.buffer = ""
805
806 def read(self, size=None):
807 """Read at most size bytes from the file. If size is not
808 present or None, read all data until EOF is reached.
809 """
810 if self.closed:
811 raise ValueError("I/O operation on closed file")
812
813 buf = ""
814 if self.buffer:
815 if size is None:
816 buf = self.buffer
817 self.buffer = ""
818 else:
819 buf = self.buffer[:size]
820 self.buffer = self.buffer[size:]
821
822 if size is None:
823 buf += self.fileobj.read()
824 else:
825 buf += self.fileobj.read(size - len(buf))
826
827 self.position += len(buf)
828 return buf
829
830 def readline(self, size=-1):
831 """Read one entire line from the file. If size is present
832 and non-negative, return a string with at most that
833 size, which may be an incomplete line.
834 """
835 if self.closed:
836 raise ValueError("I/O operation on closed file")
837
838 if "\n" in self.buffer:
839 pos = self.buffer.find("\n") + 1
840 else:
841 buffers = [self.buffer]
842 while True:
843 buf = self.fileobj.read(self.blocksize)
844 buffers.append(buf)
845 if not buf or "\n" in buf:
846 self.buffer = "".join(buffers)
847 pos = self.buffer.find("\n") + 1
848 if pos == 0:
849 # no newline found.
850 pos = len(self.buffer)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000851 break
Lars Gustäbel6baa5022006-12-23 16:40:13 +0000852
853 if size != -1:
854 pos = min(size, pos)
855
856 buf = self.buffer[:pos]
857 self.buffer = self.buffer[pos:]
858 self.position += len(buf)
859 return buf
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000860
861 def readlines(self):
Lars Gustäbel6baa5022006-12-23 16:40:13 +0000862 """Return a list with all remaining lines.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000863 """
864 result = []
865 while True:
866 line = self.readline()
867 if not line: break
868 result.append(line)
869 return result
870
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000871 def tell(self):
872 """Return the current file position.
873 """
Lars Gustäbel6baa5022006-12-23 16:40:13 +0000874 if self.closed:
875 raise ValueError("I/O operation on closed file")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000876
Lars Gustäbel6baa5022006-12-23 16:40:13 +0000877 return self.position
878
879 def seek(self, pos, whence=os.SEEK_SET):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000880 """Seek to a position in the file.
881 """
Lars Gustäbel6baa5022006-12-23 16:40:13 +0000882 if self.closed:
883 raise ValueError("I/O operation on closed file")
884
885 if whence == os.SEEK_SET:
886 self.position = min(max(pos, 0), self.size)
887 elif whence == os.SEEK_CUR:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000888 if pos < 0:
Lars Gustäbel6baa5022006-12-23 16:40:13 +0000889 self.position = max(self.position + pos, 0)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000890 else:
Lars Gustäbel6baa5022006-12-23 16:40:13 +0000891 self.position = min(self.position + pos, self.size)
892 elif whence == os.SEEK_END:
893 self.position = max(min(self.size + pos, self.size), 0)
894 else:
895 raise ValueError("Invalid argument")
896
897 self.buffer = ""
898 self.fileobj.seek(self.position)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000899
900 def close(self):
901 """Close the file object.
902 """
903 self.closed = True
Martin v. Löwisdf241532005-03-03 08:17:42 +0000904
905 def __iter__(self):
Lars Gustäbel6baa5022006-12-23 16:40:13 +0000906 """Get an iterator over the file's lines.
Martin v. Löwisdf241532005-03-03 08:17:42 +0000907 """
Lars Gustäbel6baa5022006-12-23 16:40:13 +0000908 while True:
909 line = self.readline()
910 if not line:
911 break
912 yield line
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000913#class ExFileObject
914
915#------------------
916# Exported Classes
917#------------------
918class TarInfo(object):
919 """Informational class which holds the details about an
920 archive member given by a tar header block.
921 TarInfo objects are returned by TarFile.getmember(),
922 TarFile.getmembers() and TarFile.gettarinfo() and are
923 usually created internally.
924 """
925
926 def __init__(self, name=""):
927 """Construct a TarInfo object. name is the optional name
928 of the member.
929 """
Lars Gustäbelc64e4022007-03-13 10:47:19 +0000930 self.name = name # member name
931 self.mode = 0644 # file permissions
Georg Brandl38c6a222006-05-10 16:26:03 +0000932 self.uid = 0 # user id
933 self.gid = 0 # group id
934 self.size = 0 # file size
935 self.mtime = 0 # modification time
936 self.chksum = 0 # header checksum
937 self.type = REGTYPE # member type
938 self.linkname = "" # link name
Lars Gustäbel6aab8d02010-10-04 15:37:53 +0000939 self.uname = "" # user name
940 self.gname = "" # group name
Georg Brandl38c6a222006-05-10 16:26:03 +0000941 self.devmajor = 0 # device major number
942 self.devminor = 0 # device minor number
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000943
Georg Brandl38c6a222006-05-10 16:26:03 +0000944 self.offset = 0 # the tar header starts here
945 self.offset_data = 0 # the file's data starts here
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000946
Lars Gustäbelc64e4022007-03-13 10:47:19 +0000947 self.pax_headers = {} # pax header information
948
949 # In pax headers the "name" and "linkname" field are called
950 # "path" and "linkpath".
951 def _getpath(self):
952 return self.name
953 def _setpath(self, name):
954 self.name = name
955 path = property(_getpath, _setpath)
956
957 def _getlinkpath(self):
958 return self.linkname
959 def _setlinkpath(self, linkname):
960 self.linkname = linkname
961 linkpath = property(_getlinkpath, _setlinkpath)
962
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000963 def __repr__(self):
964 return "<%s %r at %#x>" % (self.__class__.__name__,self.name,id(self))
965
Lars Gustäbela0fcb932007-05-27 19:49:30 +0000966 def get_info(self, encoding, errors):
Lars Gustäbelc64e4022007-03-13 10:47:19 +0000967 """Return the TarInfo's attributes as a dictionary.
968 """
969 info = {
Lars Gustäbelf7cda522009-08-28 19:23:44 +0000970 "name": self.name,
Lars Gustäbelc64e4022007-03-13 10:47:19 +0000971 "mode": self.mode & 07777,
972 "uid": self.uid,
973 "gid": self.gid,
974 "size": self.size,
975 "mtime": self.mtime,
976 "chksum": self.chksum,
977 "type": self.type,
Lars Gustäbelf7cda522009-08-28 19:23:44 +0000978 "linkname": self.linkname,
Lars Gustäbelc64e4022007-03-13 10:47:19 +0000979 "uname": self.uname,
980 "gname": self.gname,
981 "devmajor": self.devmajor,
982 "devminor": self.devminor
983 }
984
985 if info["type"] == DIRTYPE and not info["name"].endswith("/"):
986 info["name"] += "/"
987
Lars Gustäbela0fcb932007-05-27 19:49:30 +0000988 for key in ("name", "linkname", "uname", "gname"):
989 if type(info[key]) is unicode:
990 info[key] = info[key].encode(encoding, errors)
991
Lars Gustäbelc64e4022007-03-13 10:47:19 +0000992 return info
993
Lars Gustäbela0fcb932007-05-27 19:49:30 +0000994 def tobuf(self, format=DEFAULT_FORMAT, encoding=ENCODING, errors="strict"):
Lars Gustäbelc64e4022007-03-13 10:47:19 +0000995 """Return a tar header as a string of 512 byte blocks.
996 """
Lars Gustäbela0fcb932007-05-27 19:49:30 +0000997 info = self.get_info(encoding, errors)
998
Lars Gustäbelc64e4022007-03-13 10:47:19 +0000999 if format == USTAR_FORMAT:
Lars Gustäbela0fcb932007-05-27 19:49:30 +00001000 return self.create_ustar_header(info)
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001001 elif format == GNU_FORMAT:
Lars Gustäbela0fcb932007-05-27 19:49:30 +00001002 return self.create_gnu_header(info)
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001003 elif format == PAX_FORMAT:
Lars Gustäbela0fcb932007-05-27 19:49:30 +00001004 return self.create_pax_header(info, encoding, errors)
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001005 else:
1006 raise ValueError("invalid format")
1007
Lars Gustäbela0fcb932007-05-27 19:49:30 +00001008 def create_ustar_header(self, info):
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001009 """Return the object as a ustar header block.
1010 """
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001011 info["magic"] = POSIX_MAGIC
1012
1013 if len(info["linkname"]) > LENGTH_LINK:
1014 raise ValueError("linkname is too long")
1015
1016 if len(info["name"]) > LENGTH_NAME:
1017 info["prefix"], info["name"] = self._posix_split_name(info["name"])
1018
1019 return self._create_header(info, USTAR_FORMAT)
1020
Lars Gustäbela0fcb932007-05-27 19:49:30 +00001021 def create_gnu_header(self, info):
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001022 """Return the object as a GNU header block sequence.
1023 """
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001024 info["magic"] = GNU_MAGIC
1025
1026 buf = ""
1027 if len(info["linkname"]) > LENGTH_LINK:
1028 buf += self._create_gnu_long_header(info["linkname"], GNUTYPE_LONGLINK)
1029
1030 if len(info["name"]) > LENGTH_NAME:
1031 buf += self._create_gnu_long_header(info["name"], GNUTYPE_LONGNAME)
1032
1033 return buf + self._create_header(info, GNU_FORMAT)
1034
Lars Gustäbela0fcb932007-05-27 19:49:30 +00001035 def create_pax_header(self, info, encoding, errors):
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001036 """Return the object as a ustar header block. If it cannot be
1037 represented this way, prepend a pax extended header sequence
1038 with supplement information.
1039 """
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001040 info["magic"] = POSIX_MAGIC
1041 pax_headers = self.pax_headers.copy()
1042
1043 # Test string fields for values that exceed the field length or cannot
1044 # be represented in ASCII encoding.
1045 for name, hname, length in (
1046 ("name", "path", LENGTH_NAME), ("linkname", "linkpath", LENGTH_LINK),
1047 ("uname", "uname", 32), ("gname", "gname", 32)):
1048
Lars Gustäbela0fcb932007-05-27 19:49:30 +00001049 if hname in pax_headers:
1050 # The pax header has priority.
1051 continue
1052
1053 val = info[name].decode(encoding, errors)
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001054
1055 # Try to encode the string as ASCII.
1056 try:
1057 val.encode("ascii")
1058 except UnicodeEncodeError:
1059 pax_headers[hname] = val
1060 continue
1061
Lars Gustäbela0fcb932007-05-27 19:49:30 +00001062 if len(info[name]) > length:
1063 pax_headers[hname] = val
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001064
1065 # Test number fields for values that exceed the field limit or values
1066 # that like to be stored as float.
1067 for name, digits in (("uid", 8), ("gid", 8), ("size", 12), ("mtime", 12)):
Lars Gustäbela0fcb932007-05-27 19:49:30 +00001068 if name in pax_headers:
1069 # The pax header has priority. Avoid overflow.
1070 info[name] = 0
1071 continue
1072
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001073 val = info[name]
1074 if not 0 <= val < 8 ** (digits - 1) or isinstance(val, float):
1075 pax_headers[name] = unicode(val)
1076 info[name] = 0
1077
Lars Gustäbela0fcb932007-05-27 19:49:30 +00001078 # Create a pax extended header if necessary.
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001079 if pax_headers:
1080 buf = self._create_pax_generic_header(pax_headers)
1081 else:
1082 buf = ""
1083
1084 return buf + self._create_header(info, USTAR_FORMAT)
1085
1086 @classmethod
Lars Gustäbela0fcb932007-05-27 19:49:30 +00001087 def create_pax_global_header(cls, pax_headers):
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001088 """Return the object as a pax global header block sequence.
1089 """
Lars Gustäbela0fcb932007-05-27 19:49:30 +00001090 return cls._create_pax_generic_header(pax_headers, type=XGLTYPE)
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001091
1092 def _posix_split_name(self, name):
1093 """Split a name longer than 100 chars into a prefix
1094 and a name part.
1095 """
1096 prefix = name[:LENGTH_PREFIX + 1]
1097 while prefix and prefix[-1] != "/":
1098 prefix = prefix[:-1]
1099
1100 name = name[len(prefix):]
1101 prefix = prefix[:-1]
1102
1103 if not prefix or len(name) > LENGTH_NAME:
1104 raise ValueError("name is too long")
1105 return prefix, name
1106
1107 @staticmethod
1108 def _create_header(info, format):
1109 """Return a header block. info is a dictionary with file
1110 information, format must be one of the *_FORMAT constants.
1111 """
1112 parts = [
1113 stn(info.get("name", ""), 100),
1114 itn(info.get("mode", 0) & 07777, 8, format),
1115 itn(info.get("uid", 0), 8, format),
1116 itn(info.get("gid", 0), 8, format),
1117 itn(info.get("size", 0), 12, format),
1118 itn(info.get("mtime", 0), 12, format),
1119 " ", # checksum field
1120 info.get("type", REGTYPE),
1121 stn(info.get("linkname", ""), 100),
Lars Gustäbela0fcb932007-05-27 19:49:30 +00001122 stn(info.get("magic", POSIX_MAGIC), 8),
Lars Gustäbel6aab8d02010-10-04 15:37:53 +00001123 stn(info.get("uname", ""), 32),
1124 stn(info.get("gname", ""), 32),
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001125 itn(info.get("devmajor", 0), 8, format),
1126 itn(info.get("devminor", 0), 8, format),
1127 stn(info.get("prefix", ""), 155)
1128 ]
1129
1130 buf = struct.pack("%ds" % BLOCKSIZE, "".join(parts))
1131 chksum = calc_chksums(buf[-BLOCKSIZE:])[0]
1132 buf = buf[:-364] + "%06o\0" % chksum + buf[-357:]
1133 return buf
1134
1135 @staticmethod
1136 def _create_payload(payload):
1137 """Return the string payload filled with zero bytes
1138 up to the next 512 byte border.
1139 """
1140 blocks, remainder = divmod(len(payload), BLOCKSIZE)
1141 if remainder > 0:
1142 payload += (BLOCKSIZE - remainder) * NUL
1143 return payload
1144
1145 @classmethod
1146 def _create_gnu_long_header(cls, name, type):
1147 """Return a GNUTYPE_LONGNAME or GNUTYPE_LONGLINK sequence
1148 for name.
1149 """
1150 name += NUL
1151
1152 info = {}
1153 info["name"] = "././@LongLink"
1154 info["type"] = type
1155 info["size"] = len(name)
1156 info["magic"] = GNU_MAGIC
1157
1158 # create extended header + name blocks.
1159 return cls._create_header(info, USTAR_FORMAT) + \
1160 cls._create_payload(name)
1161
1162 @classmethod
1163 def _create_pax_generic_header(cls, pax_headers, type=XHDTYPE):
1164 """Return a POSIX.1-2001 extended or global header sequence
1165 that contains a list of keyword, value pairs. The values
1166 must be unicode objects.
1167 """
1168 records = []
1169 for keyword, value in pax_headers.iteritems():
1170 keyword = keyword.encode("utf8")
1171 value = value.encode("utf8")
1172 l = len(keyword) + len(value) + 3 # ' ' + '=' + '\n'
1173 n = p = 0
1174 while True:
1175 n = l + len(str(p))
1176 if n == p:
1177 break
1178 p = n
1179 records.append("%d %s=%s\n" % (p, keyword, value))
1180 records = "".join(records)
1181
1182 # We use a hardcoded "././@PaxHeader" name like star does
1183 # instead of the one that POSIX recommends.
1184 info = {}
1185 info["name"] = "././@PaxHeader"
1186 info["type"] = type
1187 info["size"] = len(records)
1188 info["magic"] = POSIX_MAGIC
1189
1190 # Create pax header + record blocks.
1191 return cls._create_header(info, USTAR_FORMAT) + \
1192 cls._create_payload(records)
1193
Guido van Rossum75b64e62005-01-16 00:16:11 +00001194 @classmethod
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001195 def frombuf(cls, buf):
1196 """Construct a TarInfo object from a 512 byte string buffer.
1197 """
Lars Gustäbeldd866d52009-11-22 18:30:53 +00001198 if len(buf) == 0:
1199 raise EmptyHeaderError("empty header")
Georg Brandl38c6a222006-05-10 16:26:03 +00001200 if len(buf) != BLOCKSIZE:
Lars Gustäbeldd866d52009-11-22 18:30:53 +00001201 raise TruncatedHeaderError("truncated header")
Georg Brandl38c6a222006-05-10 16:26:03 +00001202 if buf.count(NUL) == BLOCKSIZE:
Lars Gustäbeldd866d52009-11-22 18:30:53 +00001203 raise EOFHeaderError("end of file header")
Georg Brandlebbeed72006-12-19 22:06:46 +00001204
Georg Brandlded1c4d2006-12-20 11:55:16 +00001205 chksum = nti(buf[148:156])
Georg Brandlebbeed72006-12-19 22:06:46 +00001206 if chksum not in calc_chksums(buf):
Lars Gustäbeldd866d52009-11-22 18:30:53 +00001207 raise InvalidHeaderError("bad checksum")
Georg Brandl38c6a222006-05-10 16:26:03 +00001208
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001209 obj = cls()
1210 obj.buf = buf
1211 obj.name = nts(buf[0:100])
1212 obj.mode = nti(buf[100:108])
1213 obj.uid = nti(buf[108:116])
1214 obj.gid = nti(buf[116:124])
1215 obj.size = nti(buf[124:136])
1216 obj.mtime = nti(buf[136:148])
1217 obj.chksum = chksum
1218 obj.type = buf[156:157]
1219 obj.linkname = nts(buf[157:257])
1220 obj.uname = nts(buf[265:297])
1221 obj.gname = nts(buf[297:329])
1222 obj.devmajor = nti(buf[329:337])
1223 obj.devminor = nti(buf[337:345])
1224 prefix = nts(buf[345:500])
Georg Brandl3354f282006-10-29 09:16:12 +00001225
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001226 # Old V7 tar format represents a directory as a regular
1227 # file with a trailing slash.
1228 if obj.type == AREGTYPE and obj.name.endswith("/"):
1229 obj.type = DIRTYPE
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001230
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001231 # Remove redundant slashes from directories.
1232 if obj.isdir():
1233 obj.name = obj.name.rstrip("/")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001234
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001235 # Reconstruct a ustar longname.
1236 if prefix and obj.type not in GNU_TYPES:
1237 obj.name = prefix + "/" + obj.name
1238 return obj
1239
1240 @classmethod
1241 def fromtarfile(cls, tarfile):
1242 """Return the next TarInfo object from TarFile object
1243 tarfile.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001244 """
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001245 buf = tarfile.fileobj.read(BLOCKSIZE)
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001246 obj = cls.frombuf(buf)
1247 obj.offset = tarfile.fileobj.tell() - BLOCKSIZE
1248 return obj._proc_member(tarfile)
Georg Brandl3354f282006-10-29 09:16:12 +00001249
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001250 #--------------------------------------------------------------------------
1251 # The following are methods that are called depending on the type of a
1252 # member. The entry point is _proc_member() which can be overridden in a
1253 # subclass to add custom _proc_*() methods. A _proc_*() method MUST
1254 # implement the following
1255 # operations:
1256 # 1. Set self.offset_data to the position where the data blocks begin,
1257 # if there is data that follows.
1258 # 2. Set tarfile.offset to the position where the next member's header will
1259 # begin.
1260 # 3. Return self or another valid TarInfo object.
1261 def _proc_member(self, tarfile):
1262 """Choose the right processing method depending on
1263 the type and call it.
Georg Brandl3354f282006-10-29 09:16:12 +00001264 """
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001265 if self.type in (GNUTYPE_LONGNAME, GNUTYPE_LONGLINK):
1266 return self._proc_gnulong(tarfile)
1267 elif self.type == GNUTYPE_SPARSE:
1268 return self._proc_sparse(tarfile)
1269 elif self.type in (XHDTYPE, XGLTYPE, SOLARIS_XHDTYPE):
1270 return self._proc_pax(tarfile)
1271 else:
1272 return self._proc_builtin(tarfile)
Georg Brandl3354f282006-10-29 09:16:12 +00001273
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001274 def _proc_builtin(self, tarfile):
1275 """Process a builtin type or an unknown type which
1276 will be treated as a regular file.
1277 """
1278 self.offset_data = tarfile.fileobj.tell()
1279 offset = self.offset_data
1280 if self.isreg() or self.type not in SUPPORTED_TYPES:
1281 # Skip the following data blocks.
1282 offset += self._block(self.size)
1283 tarfile.offset = offset
Georg Brandl3354f282006-10-29 09:16:12 +00001284
Lars Gustäbela0fcb932007-05-27 19:49:30 +00001285 # Patch the TarInfo object with saved global
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001286 # header information.
Lars Gustäbela0fcb932007-05-27 19:49:30 +00001287 self._apply_pax_info(tarfile.pax_headers, tarfile.encoding, tarfile.errors)
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001288
1289 return self
1290
1291 def _proc_gnulong(self, tarfile):
1292 """Process the blocks that hold a GNU longname
1293 or longlink member.
1294 """
1295 buf = tarfile.fileobj.read(self._block(self.size))
1296
1297 # Fetch the next header and process it.
Lars Gustäbeldd866d52009-11-22 18:30:53 +00001298 try:
1299 next = self.fromtarfile(tarfile)
1300 except HeaderError:
1301 raise SubsequentHeaderError("missing or bad subsequent header")
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001302
1303 # Patch the TarInfo object from the next header with
1304 # the longname information.
1305 next.offset = self.offset
1306 if self.type == GNUTYPE_LONGNAME:
Lars Gustäbela0fcb932007-05-27 19:49:30 +00001307 next.name = nts(buf)
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001308 elif self.type == GNUTYPE_LONGLINK:
Lars Gustäbela0fcb932007-05-27 19:49:30 +00001309 next.linkname = nts(buf)
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001310
1311 return next
1312
1313 def _proc_sparse(self, tarfile):
1314 """Process a GNU sparse header plus extra headers.
1315 """
1316 buf = self.buf
1317 sp = _ringbuffer()
1318 pos = 386
1319 lastpos = 0L
1320 realpos = 0L
1321 # There are 4 possible sparse structs in the
1322 # first header.
1323 for i in xrange(4):
1324 try:
1325 offset = nti(buf[pos:pos + 12])
1326 numbytes = nti(buf[pos + 12:pos + 24])
1327 except ValueError:
1328 break
1329 if offset > lastpos:
1330 sp.append(_hole(lastpos, offset - lastpos))
1331 sp.append(_data(offset, numbytes, realpos))
1332 realpos += numbytes
1333 lastpos = offset + numbytes
1334 pos += 24
1335
1336 isextended = ord(buf[482])
1337 origsize = nti(buf[483:495])
1338
1339 # If the isextended flag is given,
1340 # there are extra headers to process.
1341 while isextended == 1:
1342 buf = tarfile.fileobj.read(BLOCKSIZE)
1343 pos = 0
1344 for i in xrange(21):
1345 try:
1346 offset = nti(buf[pos:pos + 12])
1347 numbytes = nti(buf[pos + 12:pos + 24])
1348 except ValueError:
1349 break
1350 if offset > lastpos:
1351 sp.append(_hole(lastpos, offset - lastpos))
1352 sp.append(_data(offset, numbytes, realpos))
1353 realpos += numbytes
1354 lastpos = offset + numbytes
1355 pos += 24
1356 isextended = ord(buf[504])
1357
1358 if lastpos < origsize:
1359 sp.append(_hole(lastpos, origsize - lastpos))
1360
1361 self.sparse = sp
1362
1363 self.offset_data = tarfile.fileobj.tell()
1364 tarfile.offset = self.offset_data + self._block(self.size)
1365 self.size = origsize
1366
1367 return self
1368
1369 def _proc_pax(self, tarfile):
1370 """Process an extended or global header as described in
1371 POSIX.1-2001.
1372 """
1373 # Read the header information.
1374 buf = tarfile.fileobj.read(self._block(self.size))
1375
1376 # A pax header stores supplemental information for either
1377 # the following file (extended) or all following files
1378 # (global).
1379 if self.type == XGLTYPE:
1380 pax_headers = tarfile.pax_headers
1381 else:
1382 pax_headers = tarfile.pax_headers.copy()
1383
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001384 # Parse pax header information. A record looks like that:
1385 # "%d %s=%s\n" % (length, keyword, value). length is the size
1386 # of the complete record including the length field itself and
Lars Gustäbela0fcb932007-05-27 19:49:30 +00001387 # the newline. keyword and value are both UTF-8 encoded strings.
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001388 regex = re.compile(r"(\d+) ([^=]+)=", re.U)
1389 pos = 0
1390 while True:
1391 match = regex.match(buf, pos)
1392 if not match:
1393 break
1394
1395 length, keyword = match.groups()
1396 length = int(length)
1397 value = buf[match.end(2) + 1:match.start(1) + length - 1]
1398
1399 keyword = keyword.decode("utf8")
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001400 value = value.decode("utf8")
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001401
1402 pax_headers[keyword] = value
1403 pos += length
1404
Lars Gustäbela0fcb932007-05-27 19:49:30 +00001405 # Fetch the next header.
Lars Gustäbeldd866d52009-11-22 18:30:53 +00001406 try:
1407 next = self.fromtarfile(tarfile)
1408 except HeaderError:
1409 raise SubsequentHeaderError("missing or bad subsequent header")
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001410
Lars Gustäbela0fcb932007-05-27 19:49:30 +00001411 if self.type in (XHDTYPE, SOLARIS_XHDTYPE):
Lars Gustäbela0fcb932007-05-27 19:49:30 +00001412 # Patch the TarInfo object with the extended header info.
1413 next._apply_pax_info(pax_headers, tarfile.encoding, tarfile.errors)
1414 next.offset = self.offset
1415
Brett Cannon132fc542008-08-04 21:23:07 +00001416 if "size" in pax_headers:
Lars Gustäbela0fcb932007-05-27 19:49:30 +00001417 # If the extended header replaces the size field,
1418 # we need to recalculate the offset where the next
1419 # header starts.
1420 offset = next.offset_data
1421 if next.isreg() or next.type not in SUPPORTED_TYPES:
1422 offset += next._block(next.size)
1423 tarfile.offset = offset
1424
1425 return next
1426
1427 def _apply_pax_info(self, pax_headers, encoding, errors):
1428 """Replace fields with supplemental information from a previous
1429 pax extended or global header.
1430 """
1431 for keyword, value in pax_headers.iteritems():
1432 if keyword not in PAX_FIELDS:
1433 continue
1434
1435 if keyword == "path":
1436 value = value.rstrip("/")
1437
1438 if keyword in PAX_NUMBER_FIELDS:
1439 try:
1440 value = PAX_NUMBER_FIELDS[keyword](value)
1441 except ValueError:
1442 value = 0
1443 else:
1444 value = uts(value, encoding, errors)
1445
1446 setattr(self, keyword, value)
1447
1448 self.pax_headers = pax_headers.copy()
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001449
1450 def _block(self, count):
1451 """Round up a byte count by BLOCKSIZE and return it,
1452 e.g. _block(834) => 1024.
1453 """
1454 blocks, remainder = divmod(count, BLOCKSIZE)
1455 if remainder:
1456 blocks += 1
1457 return blocks * BLOCKSIZE
Georg Brandl3354f282006-10-29 09:16:12 +00001458
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001459 def isreg(self):
1460 return self.type in REGULAR_TYPES
1461 def isfile(self):
1462 return self.isreg()
1463 def isdir(self):
1464 return self.type == DIRTYPE
1465 def issym(self):
1466 return self.type == SYMTYPE
1467 def islnk(self):
1468 return self.type == LNKTYPE
1469 def ischr(self):
1470 return self.type == CHRTYPE
1471 def isblk(self):
1472 return self.type == BLKTYPE
1473 def isfifo(self):
1474 return self.type == FIFOTYPE
1475 def issparse(self):
1476 return self.type == GNUTYPE_SPARSE
1477 def isdev(self):
1478 return self.type in (CHRTYPE, BLKTYPE, FIFOTYPE)
1479# class TarInfo
1480
1481class TarFile(object):
1482 """The TarFile Class provides an interface to tar archives.
1483 """
1484
1485 debug = 0 # May be set from 0 (no msgs) to 3 (all msgs)
1486
1487 dereference = False # If true, add content of linked file to the
1488 # tar file, else the link.
1489
1490 ignore_zeros = False # If true, skips empty or invalid blocks and
1491 # continues processing.
1492
Lars Gustäbel92ca7562009-12-13 11:32:27 +00001493 errorlevel = 1 # If 0, fatal errors only appear in debug
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001494 # messages (if debug >= 0). If > 0, errors
1495 # are passed to the caller as exceptions.
1496
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001497 format = DEFAULT_FORMAT # The format to use when creating an archive.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001498
Lars Gustäbela0fcb932007-05-27 19:49:30 +00001499 encoding = ENCODING # Encoding for 8-bit character strings.
1500
1501 errors = None # Error handler for unicode conversion.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001502
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001503 tarinfo = TarInfo # The default TarInfo class to use.
1504
1505 fileobject = ExFileObject # The default ExFileObject class to use.
1506
1507 def __init__(self, name=None, mode="r", fileobj=None, format=None,
1508 tarinfo=None, dereference=None, ignore_zeros=None, encoding=None,
Lars Gustäbela0fcb932007-05-27 19:49:30 +00001509 errors=None, pax_headers=None, debug=None, errorlevel=None):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001510 """Open an (uncompressed) tar archive `name'. `mode' is either 'r' to
1511 read from an existing archive, 'a' to append data to an existing
1512 file or 'w' to create a new file overwriting an existing one. `mode'
1513 defaults to 'r'.
1514 If `fileobj' is given, it is used for reading or writing data. If it
1515 can be determined, `mode' is overridden by `fileobj's mode.
1516 `fileobj' is not closed, when TarFile is closed.
1517 """
Serhiy Storchaka75ba21a2014-01-18 15:35:19 +02001518 modes = {"r": "rb", "a": "r+b", "w": "wb"}
1519 if mode not in modes:
Georg Brandle4751e32006-05-18 06:11:19 +00001520 raise ValueError("mode must be 'r', 'a' or 'w'")
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001521 self.mode = mode
Serhiy Storchaka75ba21a2014-01-18 15:35:19 +02001522 self._mode = modes[mode]
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001523
1524 if not fileobj:
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001525 if self.mode == "a" and not os.path.exists(name):
Lars Gustäbel3f8aca12007-02-06 18:38:13 +00001526 # Create nonexistent files in append mode.
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001527 self.mode = "w"
1528 self._mode = "wb"
Brett Cannon6cef0762007-05-25 20:17:15 +00001529 fileobj = bltn_open(name, self._mode)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001530 self._extfileobj = False
1531 else:
Serhiy Storchaka7cc3b0a2014-07-22 10:39:59 +03001532 if name is None and hasattr(fileobj, "name"):
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001533 name = fileobj.name
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001534 if hasattr(fileobj, "mode"):
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001535 self._mode = fileobj.mode
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001536 self._extfileobj = True
Lars Gustäbel0f4a14b2007-08-28 12:31:09 +00001537 self.name = os.path.abspath(name) if name else None
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001538 self.fileobj = fileobj
1539
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001540 # Init attributes.
1541 if format is not None:
1542 self.format = format
1543 if tarinfo is not None:
1544 self.tarinfo = tarinfo
1545 if dereference is not None:
1546 self.dereference = dereference
1547 if ignore_zeros is not None:
1548 self.ignore_zeros = ignore_zeros
1549 if encoding is not None:
1550 self.encoding = encoding
Lars Gustäbela0fcb932007-05-27 19:49:30 +00001551
1552 if errors is not None:
1553 self.errors = errors
1554 elif mode == "r":
1555 self.errors = "utf-8"
1556 else:
1557 self.errors = "strict"
1558
1559 if pax_headers is not None and self.format == PAX_FORMAT:
1560 self.pax_headers = pax_headers
1561 else:
1562 self.pax_headers = {}
1563
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001564 if debug is not None:
1565 self.debug = debug
1566 if errorlevel is not None:
1567 self.errorlevel = errorlevel
1568
1569 # Init datastructures.
Georg Brandl38c6a222006-05-10 16:26:03 +00001570 self.closed = False
1571 self.members = [] # list of members as TarInfo objects
1572 self._loaded = False # flag if all members have been read
Lars Gustäbel77b2d632007-12-01 21:02:12 +00001573 self.offset = self.fileobj.tell()
1574 # current position in the archive file
Georg Brandl38c6a222006-05-10 16:26:03 +00001575 self.inodes = {} # dictionary caching the inodes of
1576 # archive members already added
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001577
Lars Gustäbel355538e2009-11-18 20:24:54 +00001578 try:
1579 if self.mode == "r":
1580 self.firstmember = None
1581 self.firstmember = self.next()
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001582
Lars Gustäbel355538e2009-11-18 20:24:54 +00001583 if self.mode == "a":
1584 # Move to the end of the archive,
1585 # before the first empty block.
Lars Gustäbel355538e2009-11-18 20:24:54 +00001586 while True:
Lars Gustäbeldd866d52009-11-22 18:30:53 +00001587 self.fileobj.seek(self.offset)
1588 try:
1589 tarinfo = self.tarinfo.fromtarfile(self)
1590 self.members.append(tarinfo)
1591 except EOFHeaderError:
1592 self.fileobj.seek(self.offset)
Lars Gustäbel355538e2009-11-18 20:24:54 +00001593 break
Lars Gustäbeldd866d52009-11-22 18:30:53 +00001594 except HeaderError, e:
1595 raise ReadError(str(e))
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001596
Lars Gustäbel355538e2009-11-18 20:24:54 +00001597 if self.mode in "aw":
1598 self._loaded = True
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001599
Lars Gustäbel355538e2009-11-18 20:24:54 +00001600 if self.pax_headers:
1601 buf = self.tarinfo.create_pax_global_header(self.pax_headers.copy())
1602 self.fileobj.write(buf)
1603 self.offset += len(buf)
1604 except:
1605 if not self._extfileobj:
1606 self.fileobj.close()
1607 self.closed = True
1608 raise
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001609
1610 def _getposix(self):
1611 return self.format == USTAR_FORMAT
1612 def _setposix(self, value):
1613 import warnings
Philip Jenveyd846f1d2009-05-08 02:28:39 +00001614 warnings.warn("use the format attribute instead", DeprecationWarning,
1615 2)
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001616 if value:
1617 self.format = USTAR_FORMAT
1618 else:
1619 self.format = GNU_FORMAT
1620 posix = property(_getposix, _setposix)
1621
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001622 #--------------------------------------------------------------------------
1623 # Below are the classmethods which act as alternate constructors to the
1624 # TarFile class. The open() method is the only one that is needed for
1625 # public use; it is the "super"-constructor and is able to select an
1626 # adequate "sub"-constructor for a particular compression using the mapping
1627 # from OPEN_METH.
1628 #
1629 # This concept allows one to subclass TarFile without losing the comfort of
1630 # the super-constructor. A sub-constructor is registered and made available
1631 # by adding it to the mapping in OPEN_METH.
1632
Guido van Rossum75b64e62005-01-16 00:16:11 +00001633 @classmethod
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001634 def open(cls, name=None, mode="r", fileobj=None, bufsize=RECORDSIZE, **kwargs):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001635 """Open a tar archive for reading, writing or appending. Return
1636 an appropriate TarFile class.
1637
1638 mode:
Martin v. Löwis78be7df2005-03-05 12:47:42 +00001639 'r' or 'r:*' open for reading with transparent compression
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001640 'r:' open for reading exclusively uncompressed
1641 'r:gz' open for reading with gzip compression
1642 'r:bz2' open for reading with bzip2 compression
Lars Gustäbel3f8aca12007-02-06 18:38:13 +00001643 'a' or 'a:' open for appending, creating the file if necessary
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001644 'w' or 'w:' open for writing without compression
1645 'w:gz' open for writing with gzip compression
1646 'w:bz2' open for writing with bzip2 compression
Martin v. Löwis78be7df2005-03-05 12:47:42 +00001647
1648 'r|*' open a stream of tar blocks with transparent compression
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001649 'r|' open an uncompressed stream of tar blocks for reading
1650 'r|gz' open a gzip compressed stream of tar blocks
1651 'r|bz2' open a bzip2 compressed stream of tar blocks
1652 'w|' open an uncompressed stream for writing
1653 'w|gz' open a gzip compressed stream for writing
1654 'w|bz2' open a bzip2 compressed stream for writing
1655 """
1656
1657 if not name and not fileobj:
Georg Brandle4751e32006-05-18 06:11:19 +00001658 raise ValueError("nothing to open")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001659
Martin v. Löwis78be7df2005-03-05 12:47:42 +00001660 if mode in ("r", "r:*"):
1661 # Find out which *open() is appropriate for opening the file.
1662 for comptype in cls.OPEN_METH:
1663 func = getattr(cls, cls.OPEN_METH[comptype])
Lars Gustäbela7ba6fc2006-12-27 10:30:46 +00001664 if fileobj is not None:
1665 saved_pos = fileobj.tell()
Martin v. Löwis78be7df2005-03-05 12:47:42 +00001666 try:
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001667 return func(name, "r", fileobj, **kwargs)
1668 except (ReadError, CompressionError), e:
Lars Gustäbela7ba6fc2006-12-27 10:30:46 +00001669 if fileobj is not None:
1670 fileobj.seek(saved_pos)
Martin v. Löwis78be7df2005-03-05 12:47:42 +00001671 continue
Georg Brandle4751e32006-05-18 06:11:19 +00001672 raise ReadError("file could not be opened successfully")
Martin v. Löwis78be7df2005-03-05 12:47:42 +00001673
1674 elif ":" in mode:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001675 filemode, comptype = mode.split(":", 1)
1676 filemode = filemode or "r"
1677 comptype = comptype or "tar"
1678
1679 # Select the *open() function according to
1680 # given compression.
1681 if comptype in cls.OPEN_METH:
1682 func = getattr(cls, cls.OPEN_METH[comptype])
1683 else:
Georg Brandle4751e32006-05-18 06:11:19 +00001684 raise CompressionError("unknown compression type %r" % comptype)
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001685 return func(name, filemode, fileobj, **kwargs)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001686
1687 elif "|" in mode:
1688 filemode, comptype = mode.split("|", 1)
1689 filemode = filemode or "r"
1690 comptype = comptype or "tar"
1691
Serhiy Storchaka75ba21a2014-01-18 15:35:19 +02001692 if filemode not in ("r", "w"):
Georg Brandle4751e32006-05-18 06:11:19 +00001693 raise ValueError("mode must be 'r' or 'w'")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001694
Benjamin Peterson7fd59e02014-08-27 20:31:21 -04001695 stream = _Stream(name, filemode, comptype, fileobj, bufsize)
1696 try:
1697 t = cls(name, filemode, stream, **kwargs)
1698 except:
1699 stream.close()
1700 raise
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001701 t._extfileobj = False
1702 return t
1703
Serhiy Storchaka75ba21a2014-01-18 15:35:19 +02001704 elif mode in ("a", "w"):
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001705 return cls.taropen(name, mode, fileobj, **kwargs)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001706
Georg Brandle4751e32006-05-18 06:11:19 +00001707 raise ValueError("undiscernible mode")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001708
Guido van Rossum75b64e62005-01-16 00:16:11 +00001709 @classmethod
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001710 def taropen(cls, name, mode="r", fileobj=None, **kwargs):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001711 """Open uncompressed tar archive name for reading or writing.
1712 """
Serhiy Storchaka75ba21a2014-01-18 15:35:19 +02001713 if mode not in ("r", "a", "w"):
Georg Brandle4751e32006-05-18 06:11:19 +00001714 raise ValueError("mode must be 'r', 'a' or 'w'")
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001715 return cls(name, mode, fileobj, **kwargs)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001716
Guido van Rossum75b64e62005-01-16 00:16:11 +00001717 @classmethod
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001718 def gzopen(cls, name, mode="r", fileobj=None, compresslevel=9, **kwargs):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001719 """Open gzip compressed tar archive name for reading or writing.
1720 Appending is not allowed.
1721 """
Serhiy Storchaka75ba21a2014-01-18 15:35:19 +02001722 if mode not in ("r", "w"):
Georg Brandle4751e32006-05-18 06:11:19 +00001723 raise ValueError("mode must be 'r' or 'w'")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001724
1725 try:
1726 import gzip
Neal Norwitz4ec68242003-04-11 03:05:56 +00001727 gzip.GzipFile
1728 except (ImportError, AttributeError):
Georg Brandle4751e32006-05-18 06:11:19 +00001729 raise CompressionError("gzip module is not available")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001730
Benjamin Peterson7fd59e02014-08-27 20:31:21 -04001731 try:
1732 fileobj = gzip.GzipFile(name, mode, compresslevel, fileobj)
1733 except OSError:
1734 if fileobj is not None and mode == 'r':
1735 raise ReadError("not a gzip file")
1736 raise
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001737
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001738 try:
Benjamin Peterson7fd59e02014-08-27 20:31:21 -04001739 t = cls.taropen(name, mode, fileobj, **kwargs)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001740 except IOError:
Benjamin Peterson7fd59e02014-08-27 20:31:21 -04001741 fileobj.close()
Serhiy Storchaka7a278da2014-01-18 16:14:00 +02001742 if mode == 'r':
1743 raise ReadError("not a gzip file")
1744 raise
Benjamin Peterson7fd59e02014-08-27 20:31:21 -04001745 except:
1746 fileobj.close()
1747 raise
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001748 t._extfileobj = False
1749 return t
1750
Guido van Rossum75b64e62005-01-16 00:16:11 +00001751 @classmethod
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001752 def bz2open(cls, name, mode="r", fileobj=None, compresslevel=9, **kwargs):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001753 """Open bzip2 compressed tar archive name for reading or writing.
1754 Appending is not allowed.
1755 """
Serhiy Storchaka75ba21a2014-01-18 15:35:19 +02001756 if mode not in ("r", "w"):
Georg Brandle4751e32006-05-18 06:11:19 +00001757 raise ValueError("mode must be 'r' or 'w'.")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001758
1759 try:
1760 import bz2
1761 except ImportError:
Georg Brandle4751e32006-05-18 06:11:19 +00001762 raise CompressionError("bz2 module is not available")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001763
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001764 if fileobj is not None:
Georg Brandl49c8f4c2006-05-15 19:30:35 +00001765 fileobj = _BZ2Proxy(fileobj, mode)
1766 else:
1767 fileobj = bz2.BZ2File(name, mode, compresslevel=compresslevel)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001768
1769 try:
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001770 t = cls.taropen(name, mode, fileobj, **kwargs)
Lars Gustäbeldd866d52009-11-22 18:30:53 +00001771 except (IOError, EOFError):
Benjamin Peterson7fd59e02014-08-27 20:31:21 -04001772 fileobj.close()
Serhiy Storchaka7a278da2014-01-18 16:14:00 +02001773 if mode == 'r':
1774 raise ReadError("not a bzip2 file")
1775 raise
Benjamin Peterson7fd59e02014-08-27 20:31:21 -04001776 except:
1777 fileobj.close()
1778 raise
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001779 t._extfileobj = False
1780 return t
1781
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001782 # All *open() methods are registered here.
1783 OPEN_METH = {
1784 "tar": "taropen", # uncompressed tar
1785 "gz": "gzopen", # gzip compressed tar
1786 "bz2": "bz2open" # bzip2 compressed tar
1787 }
1788
1789 #--------------------------------------------------------------------------
1790 # The public methods which TarFile provides:
1791
1792 def close(self):
1793 """Close the TarFile. In write-mode, two finishing zero blocks are
1794 appended to the archive.
1795 """
1796 if self.closed:
1797 return
1798
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001799 if self.mode in "aw":
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001800 self.fileobj.write(NUL * (BLOCKSIZE * 2))
1801 self.offset += (BLOCKSIZE * 2)
1802 # fill up the end with zero-blocks
1803 # (like option -b20 for tar does)
1804 blocks, remainder = divmod(self.offset, RECORDSIZE)
1805 if remainder > 0:
1806 self.fileobj.write(NUL * (RECORDSIZE - remainder))
1807
1808 if not self._extfileobj:
1809 self.fileobj.close()
1810 self.closed = True
1811
1812 def getmember(self, name):
1813 """Return a TarInfo object for member `name'. If `name' can not be
1814 found in the archive, KeyError is raised. If a member occurs more
Mark Dickinson3e4caeb2009-02-21 20:27:01 +00001815 than once in the archive, its last occurrence is assumed to be the
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001816 most up-to-date version.
1817 """
Martin v. Löwisf3c56112004-09-18 09:08:52 +00001818 tarinfo = self._getmember(name)
1819 if tarinfo is None:
Georg Brandle4751e32006-05-18 06:11:19 +00001820 raise KeyError("filename %r not found" % name)
Martin v. Löwisf3c56112004-09-18 09:08:52 +00001821 return tarinfo
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001822
1823 def getmembers(self):
1824 """Return the members of the archive as a list of TarInfo objects. The
1825 list has the same order as the members in the archive.
1826 """
1827 self._check()
1828 if not self._loaded: # if we want to obtain a list of
1829 self._load() # all members, we first have to
1830 # scan the whole archive.
1831 return self.members
1832
1833 def getnames(self):
1834 """Return the members of the archive as a list of their names. It has
1835 the same order as the list returned by getmembers().
1836 """
Martin v. Löwisf3c56112004-09-18 09:08:52 +00001837 return [tarinfo.name for tarinfo in self.getmembers()]
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001838
1839 def gettarinfo(self, name=None, arcname=None, fileobj=None):
1840 """Create a TarInfo object for either the file `name' or the file
1841 object `fileobj' (using os.fstat on its file descriptor). You can
1842 modify some of the TarInfo's attributes before you add it using
1843 addfile(). If given, `arcname' specifies an alternative name for the
1844 file in the archive.
1845 """
1846 self._check("aw")
1847
1848 # When fileobj is given, replace name by
1849 # fileobj's real name.
1850 if fileobj is not None:
1851 name = fileobj.name
1852
1853 # Building the name of the member in the archive.
1854 # Backward slashes are converted to forward slashes,
1855 # Absolute paths are turned to relative paths.
1856 if arcname is None:
1857 arcname = name
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001858 drv, arcname = os.path.splitdrive(arcname)
Lars Gustäbelf7cda522009-08-28 19:23:44 +00001859 arcname = arcname.replace(os.sep, "/")
1860 arcname = arcname.lstrip("/")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001861
1862 # Now, fill the TarInfo object with
1863 # information specific for the file.
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001864 tarinfo = self.tarinfo()
1865 tarinfo.tarfile = self
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001866
1867 # Use os.stat or os.lstat, depending on platform
1868 # and if symlinks shall be resolved.
1869 if fileobj is None:
1870 if hasattr(os, "lstat") and not self.dereference:
1871 statres = os.lstat(name)
1872 else:
1873 statres = os.stat(name)
1874 else:
1875 statres = os.fstat(fileobj.fileno())
1876 linkname = ""
1877
1878 stmd = statres.st_mode
1879 if stat.S_ISREG(stmd):
1880 inode = (statres.st_ino, statres.st_dev)
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001881 if not self.dereference and statres.st_nlink > 1 and \
1882 inode in self.inodes and arcname != self.inodes[inode]:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001883 # Is it a hardlink to an already
1884 # archived file?
1885 type = LNKTYPE
1886 linkname = self.inodes[inode]
1887 else:
1888 # The inode is added only if its valid.
1889 # For win32 it is always 0.
1890 type = REGTYPE
1891 if inode[0]:
1892 self.inodes[inode] = arcname
1893 elif stat.S_ISDIR(stmd):
1894 type = DIRTYPE
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001895 elif stat.S_ISFIFO(stmd):
1896 type = FIFOTYPE
1897 elif stat.S_ISLNK(stmd):
1898 type = SYMTYPE
1899 linkname = os.readlink(name)
1900 elif stat.S_ISCHR(stmd):
1901 type = CHRTYPE
1902 elif stat.S_ISBLK(stmd):
1903 type = BLKTYPE
1904 else:
1905 return None
1906
1907 # Fill the TarInfo object with all
1908 # information we can get.
Martin v. Löwis5dbdc592005-08-27 10:07:56 +00001909 tarinfo.name = arcname
1910 tarinfo.mode = stmd
1911 tarinfo.uid = statres.st_uid
1912 tarinfo.gid = statres.st_gid
Lars Gustäbel2ee9c6f2010-06-03 09:56:22 +00001913 if type == REGTYPE:
Martin v. Löwis61d77e02004-08-20 06:35:46 +00001914 tarinfo.size = statres.st_size
Martin v. Löwis5dbdc592005-08-27 10:07:56 +00001915 else:
1916 tarinfo.size = 0L
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001917 tarinfo.mtime = statres.st_mtime
Martin v. Löwis5dbdc592005-08-27 10:07:56 +00001918 tarinfo.type = type
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001919 tarinfo.linkname = linkname
1920 if pwd:
1921 try:
1922 tarinfo.uname = pwd.getpwuid(tarinfo.uid)[0]
1923 except KeyError:
1924 pass
1925 if grp:
1926 try:
1927 tarinfo.gname = grp.getgrgid(tarinfo.gid)[0]
1928 except KeyError:
1929 pass
1930
1931 if type in (CHRTYPE, BLKTYPE):
1932 if hasattr(os, "major") and hasattr(os, "minor"):
1933 tarinfo.devmajor = os.major(statres.st_rdev)
1934 tarinfo.devminor = os.minor(statres.st_rdev)
1935 return tarinfo
1936
1937 def list(self, verbose=True):
1938 """Print a table of contents to sys.stdout. If `verbose' is False, only
1939 the names of the members are printed. If it is True, an `ls -l'-like
1940 output is produced.
1941 """
1942 self._check()
1943
1944 for tarinfo in self:
1945 if verbose:
1946 print filemode(tarinfo.mode),
1947 print "%s/%s" % (tarinfo.uname or tarinfo.uid,
1948 tarinfo.gname or tarinfo.gid),
1949 if tarinfo.ischr() or tarinfo.isblk():
1950 print "%10s" % ("%d,%d" \
1951 % (tarinfo.devmajor, tarinfo.devminor)),
1952 else:
1953 print "%10d" % tarinfo.size,
1954 print "%d-%02d-%02d %02d:%02d:%02d" \
1955 % time.localtime(tarinfo.mtime)[:6],
1956
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001957 print tarinfo.name + ("/" if tarinfo.isdir() else ""),
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001958
1959 if verbose:
1960 if tarinfo.issym():
1961 print "->", tarinfo.linkname,
1962 if tarinfo.islnk():
1963 print "link to", tarinfo.linkname,
1964 print
1965
Lars Gustäbel21121e62009-09-12 10:28:15 +00001966 def add(self, name, arcname=None, recursive=True, exclude=None, filter=None):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001967 """Add the file `name' to the archive. `name' may be any type of file
1968 (directory, fifo, symbolic link, etc.). If given, `arcname'
1969 specifies an alternative name for the file in the archive.
1970 Directories are added recursively by default. This can be avoided by
Lars Gustäbel104490e2007-06-18 11:42:11 +00001971 setting `recursive' to False. `exclude' is a function that should
Lars Gustäbel21121e62009-09-12 10:28:15 +00001972 return True for each filename to be excluded. `filter' is a function
1973 that expects a TarInfo object argument and returns the changed
1974 TarInfo object, if it returns None the TarInfo object will be
1975 excluded from the archive.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001976 """
1977 self._check("aw")
1978
1979 if arcname is None:
1980 arcname = name
1981
Lars Gustäbel104490e2007-06-18 11:42:11 +00001982 # Exclude pathnames.
Lars Gustäbel21121e62009-09-12 10:28:15 +00001983 if exclude is not None:
1984 import warnings
1985 warnings.warn("use the filter argument instead",
1986 DeprecationWarning, 2)
1987 if exclude(name):
1988 self._dbg(2, "tarfile: Excluded %r" % name)
1989 return
Lars Gustäbel104490e2007-06-18 11:42:11 +00001990
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001991 # Skip if somebody tries to archive the archive...
Lars Gustäbela4b23812006-12-23 17:57:23 +00001992 if self.name is not None and os.path.abspath(name) == self.name:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001993 self._dbg(2, "tarfile: Skipped %r" % name)
1994 return
1995
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001996 self._dbg(1, name)
1997
1998 # Create a TarInfo object from the file.
1999 tarinfo = self.gettarinfo(name, arcname)
2000
2001 if tarinfo is None:
2002 self._dbg(1, "tarfile: Unsupported type %r" % name)
2003 return
2004
Lars Gustäbel21121e62009-09-12 10:28:15 +00002005 # Change or exclude the TarInfo object.
2006 if filter is not None:
2007 tarinfo = filter(tarinfo)
2008 if tarinfo is None:
2009 self._dbg(2, "tarfile: Excluded %r" % name)
2010 return
2011
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002012 # Append the tar header and data to the archive.
2013 if tarinfo.isreg():
Andrew Svetlovac26a2e2012-11-29 14:22:26 +02002014 with bltn_open(name, "rb") as f:
2015 self.addfile(tarinfo, f)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002016
Martin v. Löwis5dbdc592005-08-27 10:07:56 +00002017 elif tarinfo.isdir():
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002018 self.addfile(tarinfo)
2019 if recursive:
2020 for f in os.listdir(name):
Lars Gustäbel21121e62009-09-12 10:28:15 +00002021 self.add(os.path.join(name, f), os.path.join(arcname, f),
2022 recursive, exclude, filter)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002023
Martin v. Löwis5dbdc592005-08-27 10:07:56 +00002024 else:
2025 self.addfile(tarinfo)
2026
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002027 def addfile(self, tarinfo, fileobj=None):
2028 """Add the TarInfo object `tarinfo' to the archive. If `fileobj' is
2029 given, tarinfo.size bytes are read from it and added to the archive.
2030 You can create TarInfo objects using gettarinfo().
2031 On Windows platforms, `fileobj' should always be opened with mode
2032 'rb' to avoid irritation about the file size.
2033 """
2034 self._check("aw")
2035
Georg Brandl3354f282006-10-29 09:16:12 +00002036 tarinfo = copy.copy(tarinfo)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002037
Lars Gustäbela0fcb932007-05-27 19:49:30 +00002038 buf = tarinfo.tobuf(self.format, self.encoding, self.errors)
Georg Brandl3354f282006-10-29 09:16:12 +00002039 self.fileobj.write(buf)
2040 self.offset += len(buf)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002041
2042 # If there's data to follow, append it.
2043 if fileobj is not None:
2044 copyfileobj(fileobj, self.fileobj, tarinfo.size)
2045 blocks, remainder = divmod(tarinfo.size, BLOCKSIZE)
2046 if remainder > 0:
2047 self.fileobj.write(NUL * (BLOCKSIZE - remainder))
2048 blocks += 1
2049 self.offset += blocks * BLOCKSIZE
2050
Martin v. Löwisf3c56112004-09-18 09:08:52 +00002051 self.members.append(tarinfo)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002052
Martin v. Löwis00a73e72005-03-04 19:40:34 +00002053 def extractall(self, path=".", members=None):
2054 """Extract all members from the archive to the current working
2055 directory and set owner, modification time and permissions on
2056 directories afterwards. `path' specifies a different directory
2057 to extract to. `members' is optional and must be a subset of the
2058 list returned by getmembers().
2059 """
2060 directories = []
2061
2062 if members is None:
2063 members = self
2064
2065 for tarinfo in members:
2066 if tarinfo.isdir():
Lars Gustäbel0192e432008-02-05 11:51:40 +00002067 # Extract directories with a safe mode.
Martin v. Löwis00a73e72005-03-04 19:40:34 +00002068 directories.append(tarinfo)
Lars Gustäbel0192e432008-02-05 11:51:40 +00002069 tarinfo = copy.copy(tarinfo)
2070 tarinfo.mode = 0700
2071 self.extract(tarinfo, path)
Martin v. Löwis00a73e72005-03-04 19:40:34 +00002072
2073 # Reverse sort directories.
Brett Cannon132fc542008-08-04 21:23:07 +00002074 directories.sort(key=operator.attrgetter('name'))
Martin v. Löwis00a73e72005-03-04 19:40:34 +00002075 directories.reverse()
2076
2077 # Set correct owner, mtime and filemode on directories.
2078 for tarinfo in directories:
Lars Gustäbel2ee1c762008-01-04 14:00:33 +00002079 dirpath = os.path.join(path, tarinfo.name)
Martin v. Löwis00a73e72005-03-04 19:40:34 +00002080 try:
Lars Gustäbel2ee1c762008-01-04 14:00:33 +00002081 self.chown(tarinfo, dirpath)
2082 self.utime(tarinfo, dirpath)
2083 self.chmod(tarinfo, dirpath)
Martin v. Löwis00a73e72005-03-04 19:40:34 +00002084 except ExtractError, e:
2085 if self.errorlevel > 1:
2086 raise
2087 else:
2088 self._dbg(1, "tarfile: %s" % e)
2089
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002090 def extract(self, member, path=""):
2091 """Extract a member from the archive to the current working directory,
2092 using its full name. Its file information is extracted as accurately
2093 as possible. `member' may be a filename or a TarInfo object. You can
2094 specify a different directory using `path'.
2095 """
2096 self._check("r")
2097
Lars Gustäbelc64e4022007-03-13 10:47:19 +00002098 if isinstance(member, basestring):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002099 tarinfo = self.getmember(member)
Lars Gustäbelc64e4022007-03-13 10:47:19 +00002100 else:
2101 tarinfo = member
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002102
Neal Norwitza4f651a2004-07-20 22:07:44 +00002103 # Prepare the link target for makelink().
2104 if tarinfo.islnk():
2105 tarinfo._link_target = os.path.join(path, tarinfo.linkname)
2106
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002107 try:
2108 self._extract_member(tarinfo, os.path.join(path, tarinfo.name))
2109 except EnvironmentError, e:
2110 if self.errorlevel > 0:
2111 raise
2112 else:
2113 if e.filename is None:
2114 self._dbg(1, "tarfile: %s" % e.strerror)
2115 else:
2116 self._dbg(1, "tarfile: %s %r" % (e.strerror, e.filename))
2117 except ExtractError, e:
2118 if self.errorlevel > 1:
2119 raise
2120 else:
2121 self._dbg(1, "tarfile: %s" % e)
2122
2123 def extractfile(self, member):
2124 """Extract a member from the archive as a file object. `member' may be
2125 a filename or a TarInfo object. If `member' is a regular file, a
2126 file-like object is returned. If `member' is a link, a file-like
2127 object is constructed from the link's target. If `member' is none of
2128 the above, None is returned.
2129 The file-like object is read-only and provides the following
2130 methods: read(), readline(), readlines(), seek() and tell()
2131 """
2132 self._check("r")
2133
Lars Gustäbelc64e4022007-03-13 10:47:19 +00002134 if isinstance(member, basestring):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002135 tarinfo = self.getmember(member)
Lars Gustäbelc64e4022007-03-13 10:47:19 +00002136 else:
2137 tarinfo = member
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002138
2139 if tarinfo.isreg():
2140 return self.fileobject(self, tarinfo)
2141
2142 elif tarinfo.type not in SUPPORTED_TYPES:
2143 # If a member's type is unknown, it is treated as a
2144 # regular file.
2145 return self.fileobject(self, tarinfo)
2146
2147 elif tarinfo.islnk() or tarinfo.issym():
2148 if isinstance(self.fileobj, _Stream):
2149 # A small but ugly workaround for the case that someone tries
2150 # to extract a (sym)link as a file-object from a non-seekable
2151 # stream of tar blocks.
Georg Brandle4751e32006-05-18 06:11:19 +00002152 raise StreamError("cannot extract (sym)link as file object")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002153 else:
Georg Brandl7eb4b7d2005-07-22 21:49:32 +00002154 # A (sym)link's file object is its target's file object.
Lars Gustäbel4da7d412010-06-03 12:34:14 +00002155 return self.extractfile(self._find_link_target(tarinfo))
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002156 else:
2157 # If there's no data associated with the member (directory, chrdev,
2158 # blkdev, etc.), return None instead of a file object.
2159 return None
2160
2161 def _extract_member(self, tarinfo, targetpath):
2162 """Extract the TarInfo object tarinfo to a physical
2163 file called targetpath.
2164 """
2165 # Fetch the TarInfo object for the given name
2166 # and build the destination pathname, replacing
2167 # forward slashes to platform specific separators.
Lars Gustäbelf7cda522009-08-28 19:23:44 +00002168 targetpath = targetpath.rstrip("/")
2169 targetpath = targetpath.replace("/", os.sep)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002170
2171 # Create all upper directories.
2172 upperdirs = os.path.dirname(targetpath)
2173 if upperdirs and not os.path.exists(upperdirs):
Lars Gustäbel0192e432008-02-05 11:51:40 +00002174 # Create directories that are not part of the archive with
2175 # default permissions.
Lars Gustäbeld2e22902007-01-23 11:17:33 +00002176 os.makedirs(upperdirs)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002177
2178 if tarinfo.islnk() or tarinfo.issym():
2179 self._dbg(1, "%s -> %s" % (tarinfo.name, tarinfo.linkname))
2180 else:
2181 self._dbg(1, tarinfo.name)
2182
2183 if tarinfo.isreg():
2184 self.makefile(tarinfo, targetpath)
2185 elif tarinfo.isdir():
2186 self.makedir(tarinfo, targetpath)
2187 elif tarinfo.isfifo():
2188 self.makefifo(tarinfo, targetpath)
2189 elif tarinfo.ischr() or tarinfo.isblk():
2190 self.makedev(tarinfo, targetpath)
2191 elif tarinfo.islnk() or tarinfo.issym():
2192 self.makelink(tarinfo, targetpath)
2193 elif tarinfo.type not in SUPPORTED_TYPES:
2194 self.makeunknown(tarinfo, targetpath)
2195 else:
2196 self.makefile(tarinfo, targetpath)
2197
2198 self.chown(tarinfo, targetpath)
2199 if not tarinfo.issym():
2200 self.chmod(tarinfo, targetpath)
2201 self.utime(tarinfo, targetpath)
2202
2203 #--------------------------------------------------------------------------
2204 # Below are the different file methods. They are called via
2205 # _extract_member() when extract() is called. They can be replaced in a
2206 # subclass to implement other functionality.
2207
2208 def makedir(self, tarinfo, targetpath):
2209 """Make a directory called targetpath.
2210 """
2211 try:
Lars Gustäbel0192e432008-02-05 11:51:40 +00002212 # Use a safe mode for the directory, the real mode is set
2213 # later in _extract_member().
2214 os.mkdir(targetpath, 0700)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002215 except EnvironmentError, e:
2216 if e.errno != errno.EEXIST:
2217 raise
2218
2219 def makefile(self, tarinfo, targetpath):
2220 """Make a file called targetpath.
2221 """
2222 source = self.extractfile(tarinfo)
Andrew Svetlovac26a2e2012-11-29 14:22:26 +02002223 try:
2224 with bltn_open(targetpath, "wb") as target:
2225 copyfileobj(source, target)
2226 finally:
2227 source.close()
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002228
2229 def makeunknown(self, tarinfo, targetpath):
2230 """Make a file from a TarInfo object with an unknown type
2231 at targetpath.
2232 """
2233 self.makefile(tarinfo, targetpath)
2234 self._dbg(1, "tarfile: Unknown file type %r, " \
2235 "extracted as regular file." % tarinfo.type)
2236
2237 def makefifo(self, tarinfo, targetpath):
2238 """Make a fifo called targetpath.
2239 """
2240 if hasattr(os, "mkfifo"):
2241 os.mkfifo(targetpath)
2242 else:
Georg Brandle4751e32006-05-18 06:11:19 +00002243 raise ExtractError("fifo not supported by system")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002244
2245 def makedev(self, tarinfo, targetpath):
2246 """Make a character or block device called targetpath.
2247 """
2248 if not hasattr(os, "mknod") or not hasattr(os, "makedev"):
Georg Brandle4751e32006-05-18 06:11:19 +00002249 raise ExtractError("special devices not supported by system")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002250
2251 mode = tarinfo.mode
2252 if tarinfo.isblk():
2253 mode |= stat.S_IFBLK
2254 else:
2255 mode |= stat.S_IFCHR
2256
2257 os.mknod(targetpath, mode,
2258 os.makedev(tarinfo.devmajor, tarinfo.devminor))
2259
2260 def makelink(self, tarinfo, targetpath):
2261 """Make a (symbolic) link called targetpath. If it cannot be created
2262 (platform limitation), we try to make a copy of the referenced file
2263 instead of a link.
2264 """
Lars Gustäbel4da7d412010-06-03 12:34:14 +00002265 if hasattr(os, "symlink") and hasattr(os, "link"):
2266 # For systems that support symbolic and hard links.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002267 if tarinfo.issym():
Senthil Kumaran4dd89ce2011-05-17 10:12:18 +08002268 if os.path.lexists(targetpath):
Senthil Kumaran011525e2011-04-28 15:30:31 +08002269 os.unlink(targetpath)
Lars Gustäbelf7cda522009-08-28 19:23:44 +00002270 os.symlink(tarinfo.linkname, targetpath)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002271 else:
Neal Norwitza4f651a2004-07-20 22:07:44 +00002272 # See extract().
Lars Gustäbel4da7d412010-06-03 12:34:14 +00002273 if os.path.exists(tarinfo._link_target):
Senthil Kumaran4dd89ce2011-05-17 10:12:18 +08002274 if os.path.lexists(targetpath):
2275 os.unlink(targetpath)
Lars Gustäbel4da7d412010-06-03 12:34:14 +00002276 os.link(tarinfo._link_target, targetpath)
2277 else:
2278 self._extract_member(self._find_link_target(tarinfo), targetpath)
2279 else:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002280 try:
Lars Gustäbel4da7d412010-06-03 12:34:14 +00002281 self._extract_member(self._find_link_target(tarinfo), targetpath)
2282 except KeyError:
2283 raise ExtractError("unable to resolve link inside archive")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002284
2285 def chown(self, tarinfo, targetpath):
2286 """Set owner of targetpath according to tarinfo.
2287 """
2288 if pwd and hasattr(os, "geteuid") and os.geteuid() == 0:
2289 # We have to be root to do so.
2290 try:
2291 g = grp.getgrnam(tarinfo.gname)[2]
2292 except KeyError:
Lars Gustäbel8babfdf2011-09-05 17:04:18 +02002293 g = tarinfo.gid
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002294 try:
2295 u = pwd.getpwnam(tarinfo.uname)[2]
2296 except KeyError:
Lars Gustäbel8babfdf2011-09-05 17:04:18 +02002297 u = tarinfo.uid
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002298 try:
2299 if tarinfo.issym() and hasattr(os, "lchown"):
2300 os.lchown(targetpath, u, g)
2301 else:
Andrew MacIntyre7970d202003-02-19 12:51:34 +00002302 if sys.platform != "os2emx":
2303 os.chown(targetpath, u, g)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002304 except EnvironmentError, e:
Georg Brandle4751e32006-05-18 06:11:19 +00002305 raise ExtractError("could not change owner")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002306
2307 def chmod(self, tarinfo, targetpath):
2308 """Set file permissions of targetpath according to tarinfo.
2309 """
Jack Jansen834eff62003-03-07 12:47:06 +00002310 if hasattr(os, 'chmod'):
2311 try:
2312 os.chmod(targetpath, tarinfo.mode)
2313 except EnvironmentError, e:
Georg Brandle4751e32006-05-18 06:11:19 +00002314 raise ExtractError("could not change mode")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002315
2316 def utime(self, tarinfo, targetpath):
2317 """Set modification time of targetpath according to tarinfo.
2318 """
Jack Jansen834eff62003-03-07 12:47:06 +00002319 if not hasattr(os, 'utime'):
Tim Petersf9347782003-03-07 15:36:41 +00002320 return
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002321 try:
2322 os.utime(targetpath, (tarinfo.mtime, tarinfo.mtime))
2323 except EnvironmentError, e:
Georg Brandle4751e32006-05-18 06:11:19 +00002324 raise ExtractError("could not change modification time")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002325
2326 #--------------------------------------------------------------------------
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002327 def next(self):
2328 """Return the next member of the archive as a TarInfo object, when
2329 TarFile is opened for reading. Return None if there is no more
2330 available.
2331 """
2332 self._check("ra")
2333 if self.firstmember is not None:
2334 m = self.firstmember
2335 self.firstmember = None
2336 return m
2337
2338 # Read the next block.
Andrew M. Kuchling864bba12004-07-10 22:02:11 +00002339 self.fileobj.seek(self.offset)
Lars Gustäbeldd866d52009-11-22 18:30:53 +00002340 tarinfo = None
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002341 while True:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002342 try:
Lars Gustäbelc64e4022007-03-13 10:47:19 +00002343 tarinfo = self.tarinfo.fromtarfile(self)
Lars Gustäbeldd866d52009-11-22 18:30:53 +00002344 except EOFHeaderError, e:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002345 if self.ignore_zeros:
Georg Brandlebbeed72006-12-19 22:06:46 +00002346 self._dbg(2, "0x%X: %s" % (self.offset, e))
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002347 self.offset += BLOCKSIZE
2348 continue
Lars Gustäbeldd866d52009-11-22 18:30:53 +00002349 except InvalidHeaderError, e:
2350 if self.ignore_zeros:
2351 self._dbg(2, "0x%X: %s" % (self.offset, e))
2352 self.offset += BLOCKSIZE
2353 continue
2354 elif self.offset == 0:
2355 raise ReadError(str(e))
2356 except EmptyHeaderError:
2357 if self.offset == 0:
2358 raise ReadError("empty file")
2359 except TruncatedHeaderError, e:
2360 if self.offset == 0:
2361 raise ReadError(str(e))
2362 except SubsequentHeaderError, e:
2363 raise ReadError(str(e))
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002364 break
2365
Lars Gustäbeldd866d52009-11-22 18:30:53 +00002366 if tarinfo is not None:
2367 self.members.append(tarinfo)
2368 else:
2369 self._loaded = True
2370
Georg Brandl38c6a222006-05-10 16:26:03 +00002371 return tarinfo
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002372
2373 #--------------------------------------------------------------------------
2374 # Little helper methods:
2375
Lars Gustäbel4da7d412010-06-03 12:34:14 +00002376 def _getmember(self, name, tarinfo=None, normalize=False):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002377 """Find an archive member by name from bottom to top.
2378 If tarinfo is given, it is used as the starting point.
2379 """
Martin v. Löwisf3c56112004-09-18 09:08:52 +00002380 # Ensure that all members have been loaded.
2381 members = self.getmembers()
2382
Lars Gustäbel4da7d412010-06-03 12:34:14 +00002383 # Limit the member search list up to tarinfo.
2384 if tarinfo is not None:
2385 members = members[:members.index(tarinfo)]
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002386
Lars Gustäbel4da7d412010-06-03 12:34:14 +00002387 if normalize:
2388 name = os.path.normpath(name)
2389
2390 for member in reversed(members):
2391 if normalize:
2392 member_name = os.path.normpath(member.name)
2393 else:
2394 member_name = member.name
2395
2396 if name == member_name:
2397 return member
Andrew M. Kuchling864bba12004-07-10 22:02:11 +00002398
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002399 def _load(self):
2400 """Read through the entire archive file and look for readable
2401 members.
2402 """
2403 while True:
2404 tarinfo = self.next()
2405 if tarinfo is None:
2406 break
2407 self._loaded = True
2408
2409 def _check(self, mode=None):
2410 """Check if TarFile is still open, and if the operation's mode
2411 corresponds to TarFile's mode.
2412 """
2413 if self.closed:
Georg Brandle4751e32006-05-18 06:11:19 +00002414 raise IOError("%s is closed" % self.__class__.__name__)
Lars Gustäbelc64e4022007-03-13 10:47:19 +00002415 if mode is not None and self.mode not in mode:
2416 raise IOError("bad operation for mode %r" % self.mode)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002417
Lars Gustäbel4da7d412010-06-03 12:34:14 +00002418 def _find_link_target(self, tarinfo):
2419 """Find the target member of a symlink or hardlink member in the
2420 archive.
2421 """
2422 if tarinfo.issym():
2423 # Always search the entire archive.
Lars Gustäbel231d4742012-04-24 22:42:08 +02002424 linkname = "/".join(filter(None, (os.path.dirname(tarinfo.name), tarinfo.linkname)))
Lars Gustäbel4da7d412010-06-03 12:34:14 +00002425 limit = None
2426 else:
2427 # Search the archive before the link, because a hard link is
2428 # just a reference to an already archived file.
2429 linkname = tarinfo.linkname
2430 limit = tarinfo
2431
2432 member = self._getmember(linkname, tarinfo=limit, normalize=True)
2433 if member is None:
2434 raise KeyError("linkname %r not found" % linkname)
2435 return member
2436
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002437 def __iter__(self):
2438 """Provide an iterator object.
2439 """
2440 if self._loaded:
2441 return iter(self.members)
2442 else:
2443 return TarIter(self)
2444
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002445 def _dbg(self, level, msg):
2446 """Write debugging output to sys.stderr.
2447 """
2448 if level <= self.debug:
2449 print >> sys.stderr, msg
Lars Gustäbel64581042010-03-03 11:55:48 +00002450
2451 def __enter__(self):
2452 self._check()
2453 return self
2454
2455 def __exit__(self, type, value, traceback):
2456 if type is None:
2457 self.close()
2458 else:
2459 # An exception occurred. We must not call close() because
2460 # it would try to write end-of-archive blocks and padding.
2461 if not self._extfileobj:
2462 self.fileobj.close()
2463 self.closed = True
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002464# class TarFile
2465
2466class TarIter:
2467 """Iterator Class.
2468
2469 for tarinfo in TarFile(...):
2470 suite...
2471 """
2472
2473 def __init__(self, tarfile):
2474 """Construct a TarIter object.
2475 """
2476 self.tarfile = tarfile
Martin v. Löwis637431b2005-03-03 23:12:42 +00002477 self.index = 0
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002478 def __iter__(self):
2479 """Return iterator object.
2480 """
2481 return self
2482 def next(self):
2483 """Return the next item using TarFile's next() method.
2484 When all members have been read, set TarFile as _loaded.
2485 """
Martin v. Löwis637431b2005-03-03 23:12:42 +00002486 # Fix for SF #1100429: Under rare circumstances it can
2487 # happen that getmembers() is called during iteration,
2488 # which will cause TarIter to stop prematurely.
Serhiy Storchakace34ba62013-05-09 14:22:05 +03002489
2490 if self.index == 0 and self.tarfile.firstmember is not None:
2491 tarinfo = self.tarfile.next()
2492 elif self.index < len(self.tarfile.members):
2493 tarinfo = self.tarfile.members[self.index]
2494 elif not self.tarfile._loaded:
Martin v. Löwis637431b2005-03-03 23:12:42 +00002495 tarinfo = self.tarfile.next()
2496 if not tarinfo:
2497 self.tarfile._loaded = True
2498 raise StopIteration
2499 else:
Serhiy Storchakace34ba62013-05-09 14:22:05 +03002500 raise StopIteration
Martin v. Löwis637431b2005-03-03 23:12:42 +00002501 self.index += 1
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002502 return tarinfo
2503
2504# Helper classes for sparse file support
2505class _section:
2506 """Base class for _data and _hole.
2507 """
2508 def __init__(self, offset, size):
2509 self.offset = offset
2510 self.size = size
2511 def __contains__(self, offset):
2512 return self.offset <= offset < self.offset + self.size
2513
2514class _data(_section):
2515 """Represent a data section in a sparse file.
2516 """
2517 def __init__(self, offset, size, realpos):
2518 _section.__init__(self, offset, size)
2519 self.realpos = realpos
2520
2521class _hole(_section):
2522 """Represent a hole section in a sparse file.
2523 """
2524 pass
2525
2526class _ringbuffer(list):
2527 """Ringbuffer class which increases performance
2528 over a regular list.
2529 """
2530 def __init__(self):
2531 self.idx = 0
2532 def find(self, offset):
2533 idx = self.idx
2534 while True:
2535 item = self[idx]
2536 if offset in item:
2537 break
2538 idx += 1
2539 if idx == len(self):
2540 idx = 0
2541 if idx == self.idx:
2542 # End of File
2543 return None
2544 self.idx = idx
2545 return item
2546
2547#---------------------------------------------
2548# zipfile compatible TarFile class
2549#---------------------------------------------
2550TAR_PLAIN = 0 # zipfile.ZIP_STORED
2551TAR_GZIPPED = 8 # zipfile.ZIP_DEFLATED
2552class TarFileCompat:
2553 """TarFile class compatible with standard module zipfile's
2554 ZipFile class.
2555 """
2556 def __init__(self, file, mode="r", compression=TAR_PLAIN):
Lars Gustäbel727bd0b2008-08-02 11:26:39 +00002557 from warnings import warnpy3k
2558 warnpy3k("the TarFileCompat class has been removed in Python 3.0",
2559 stacklevel=2)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002560 if compression == TAR_PLAIN:
2561 self.tarfile = TarFile.taropen(file, mode)
2562 elif compression == TAR_GZIPPED:
2563 self.tarfile = TarFile.gzopen(file, mode)
2564 else:
Georg Brandle4751e32006-05-18 06:11:19 +00002565 raise ValueError("unknown compression constant")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002566 if mode[0:1] == "r":
2567 members = self.tarfile.getmembers()
Raymond Hettingera1d09e22005-09-11 16:34:05 +00002568 for m in members:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002569 m.filename = m.name
2570 m.file_size = m.size
2571 m.date_time = time.gmtime(m.mtime)[:6]
2572 def namelist(self):
2573 return map(lambda m: m.name, self.infolist())
2574 def infolist(self):
2575 return filter(lambda m: m.type in REGULAR_TYPES,
2576 self.tarfile.getmembers())
2577 def printdir(self):
2578 self.tarfile.list()
2579 def testzip(self):
2580 return
2581 def getinfo(self, name):
2582 return self.tarfile.getmember(name)
2583 def read(self, name):
2584 return self.tarfile.extractfile(self.tarfile.getmember(name)).read()
2585 def write(self, filename, arcname=None, compress_type=None):
2586 self.tarfile.add(filename, arcname)
2587 def writestr(self, zinfo, bytes):
Raymond Hettingera6172712004-12-31 19:15:26 +00002588 try:
2589 from cStringIO import StringIO
2590 except ImportError:
2591 from StringIO import StringIO
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002592 import calendar
Lars Gustäbel727bd0b2008-08-02 11:26:39 +00002593 tinfo = TarInfo(zinfo.filename)
2594 tinfo.size = len(bytes)
2595 tinfo.mtime = calendar.timegm(zinfo.date_time)
2596 self.tarfile.addfile(tinfo, StringIO(bytes))
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002597 def close(self):
2598 self.tarfile.close()
2599#class TarFileCompat
2600
2601#--------------------
2602# exported functions
2603#--------------------
2604def is_tarfile(name):
2605 """Return True if name points to a tar archive that we
2606 are able to handle, else return False.
2607 """
2608 try:
2609 t = open(name)
2610 t.close()
2611 return True
2612 except TarError:
2613 return False
2614
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002615open = TarFile.open