blob: adf91d53823bb5a87ee682cc2518090db4f4deb6 [file] [log] [blame]
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001# -*- coding: iso-8859-1 -*-
2#-------------------------------------------------------------------
3# tarfile.py
4#-------------------------------------------------------------------
5# Copyright (C) 2002 Lars Gustäbel <lars@gustaebel.de>
6# All rights reserved.
7#
8# Permission is hereby granted, free of charge, to any person
9# obtaining a copy of this software and associated documentation
10# files (the "Software"), to deal in the Software without
11# restriction, including without limitation the rights to use,
12# copy, modify, merge, publish, distribute, sublicense, and/or sell
13# copies of the Software, and to permit persons to whom the
14# Software is furnished to do so, subject to the following
15# conditions:
16#
17# The above copyright notice and this permission notice shall be
18# included in all copies or substantial portions of the Software.
19#
20# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
21# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
22# OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
23# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
24# HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
25# WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
26# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
27# OTHER DEALINGS IN THE SOFTWARE.
28#
29"""Read from and write to tar format archives.
30"""
31
Senthil Kumaran4af1c6a2011-07-28 22:30:27 +080032__version__ = "$Revision: 85213 $"
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +000033# $Source$
34
Lars Gustäbelc64e4022007-03-13 10:47:19 +000035version = "0.9.0"
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +000036__author__ = "Lars Gustäbel (lars@gustaebel.de)"
37__date__ = "$Date$"
38__cvsid__ = "$Id$"
39__credits__ = "Gustavo Niemeyer, Niels Gustäbel, Richard Townsend."
40
41#---------
42# Imports
43#---------
Serhiy Storchaka205408d2015-03-11 17:31:59 +020044from __builtin__ import open as bltn_open
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +000045import sys
46import os
47import shutil
48import stat
49import errno
50import time
51import struct
Georg Brandl3354f282006-10-29 09:16:12 +000052import copy
Lars Gustäbelc64e4022007-03-13 10:47:19 +000053import re
Brett Cannon132fc542008-08-04 21:23:07 +000054import operator
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +000055
56try:
57 import grp, pwd
58except ImportError:
59 grp = pwd = None
60
61# from tarfile import *
62__all__ = ["TarFile", "TarInfo", "is_tarfile", "TarError"]
63
64#---------------------------------------------------------
65# tar constants
66#---------------------------------------------------------
Lars Gustäbelc64e4022007-03-13 10:47:19 +000067NUL = "\0" # the null character
68BLOCKSIZE = 512 # length of processing blocks
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +000069RECORDSIZE = BLOCKSIZE * 20 # length of records
Lars Gustäbelc64e4022007-03-13 10:47:19 +000070GNU_MAGIC = "ustar \0" # magic gnu tar string
71POSIX_MAGIC = "ustar\x0000" # magic posix tar string
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +000072
Lars Gustäbelc64e4022007-03-13 10:47:19 +000073LENGTH_NAME = 100 # maximum length of a filename
74LENGTH_LINK = 100 # maximum length of a linkname
75LENGTH_PREFIX = 155 # maximum length of the prefix field
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +000076
Lars Gustäbelc64e4022007-03-13 10:47:19 +000077REGTYPE = "0" # regular file
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +000078AREGTYPE = "\0" # regular file
Lars Gustäbelc64e4022007-03-13 10:47:19 +000079LNKTYPE = "1" # link (inside tarfile)
80SYMTYPE = "2" # symbolic link
81CHRTYPE = "3" # character special device
82BLKTYPE = "4" # block special device
83DIRTYPE = "5" # directory
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +000084FIFOTYPE = "6" # fifo special device
85CONTTYPE = "7" # contiguous file
86
Lars Gustäbelc64e4022007-03-13 10:47:19 +000087GNUTYPE_LONGNAME = "L" # GNU tar longname
88GNUTYPE_LONGLINK = "K" # GNU tar longlink
89GNUTYPE_SPARSE = "S" # GNU tar sparse file
90
91XHDTYPE = "x" # POSIX.1-2001 extended header
92XGLTYPE = "g" # POSIX.1-2001 global header
93SOLARIS_XHDTYPE = "X" # Solaris extended header
94
95USTAR_FORMAT = 0 # POSIX.1-1988 (ustar) format
96GNU_FORMAT = 1 # GNU tar format
97PAX_FORMAT = 2 # POSIX.1-2001 (pax) format
98DEFAULT_FORMAT = GNU_FORMAT
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +000099
100#---------------------------------------------------------
101# tarfile constants
102#---------------------------------------------------------
Lars Gustäbelc64e4022007-03-13 10:47:19 +0000103# File types that tarfile supports:
104SUPPORTED_TYPES = (REGTYPE, AREGTYPE, LNKTYPE,
105 SYMTYPE, DIRTYPE, FIFOTYPE,
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000106 CONTTYPE, CHRTYPE, BLKTYPE,
107 GNUTYPE_LONGNAME, GNUTYPE_LONGLINK,
108 GNUTYPE_SPARSE)
109
Lars Gustäbelc64e4022007-03-13 10:47:19 +0000110# File types that will be treated as a regular file.
111REGULAR_TYPES = (REGTYPE, AREGTYPE,
112 CONTTYPE, GNUTYPE_SPARSE)
113
114# File types that are part of the GNU tar format.
115GNU_TYPES = (GNUTYPE_LONGNAME, GNUTYPE_LONGLINK,
116 GNUTYPE_SPARSE)
117
118# Fields from a pax header that override a TarInfo attribute.
119PAX_FIELDS = ("path", "linkpath", "size", "mtime",
120 "uid", "gid", "uname", "gname")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000121
Lars Gustäbela0fcb932007-05-27 19:49:30 +0000122# Fields in a pax header that are numbers, all other fields
123# are treated as strings.
124PAX_NUMBER_FIELDS = {
125 "atime": float,
126 "ctime": float,
127 "mtime": float,
128 "uid": int,
129 "gid": int,
130 "size": int
131}
132
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000133#---------------------------------------------------------
134# Bits used in the mode field, values in octal.
135#---------------------------------------------------------
136S_IFLNK = 0120000 # symbolic link
137S_IFREG = 0100000 # regular file
138S_IFBLK = 0060000 # block device
139S_IFDIR = 0040000 # directory
140S_IFCHR = 0020000 # character device
141S_IFIFO = 0010000 # fifo
142
143TSUID = 04000 # set UID on execution
144TSGID = 02000 # set GID on execution
145TSVTX = 01000 # reserved
146
147TUREAD = 0400 # read by owner
148TUWRITE = 0200 # write by owner
149TUEXEC = 0100 # execute/search by owner
150TGREAD = 0040 # read by group
151TGWRITE = 0020 # write by group
152TGEXEC = 0010 # execute/search by group
153TOREAD = 0004 # read by other
154TOWRITE = 0002 # write by other
155TOEXEC = 0001 # execute/search by other
156
157#---------------------------------------------------------
Lars Gustäbelc64e4022007-03-13 10:47:19 +0000158# initialization
159#---------------------------------------------------------
160ENCODING = sys.getfilesystemencoding()
161if ENCODING is None:
Lars Gustäbela0fcb932007-05-27 19:49:30 +0000162 ENCODING = sys.getdefaultencoding()
Lars Gustäbelc64e4022007-03-13 10:47:19 +0000163
164#---------------------------------------------------------
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000165# Some useful functions
166#---------------------------------------------------------
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000167
Georg Brandl38c6a222006-05-10 16:26:03 +0000168def stn(s, length):
169 """Convert a python string to a null-terminated string buffer.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000170 """
Georg Brandla32e0a02006-10-24 16:54:16 +0000171 return s[:length] + (length - len(s)) * NUL
Georg Brandl38c6a222006-05-10 16:26:03 +0000172
Lars Gustäbelc64e4022007-03-13 10:47:19 +0000173def nts(s):
174 """Convert a null-terminated string field to a python string.
175 """
176 # Use the string up to the first null char.
177 p = s.find("\0")
178 if p == -1:
179 return s
180 return s[:p]
181
Georg Brandl38c6a222006-05-10 16:26:03 +0000182def nti(s):
183 """Convert a number field to a python number.
184 """
185 # There are two possible encodings for a number field, see
186 # itn() below.
187 if s[0] != chr(0200):
Georg Brandlded1c4d2006-12-20 11:55:16 +0000188 try:
Lars Gustäbeld0480032015-07-02 19:37:08 +0200189 n = int(nts(s).strip() or "0", 8)
Georg Brandlded1c4d2006-12-20 11:55:16 +0000190 except ValueError:
Lars Gustäbeldd866d52009-11-22 18:30:53 +0000191 raise InvalidHeaderError("invalid header")
Georg Brandl38c6a222006-05-10 16:26:03 +0000192 else:
193 n = 0L
194 for i in xrange(len(s) - 1):
195 n <<= 8
196 n += ord(s[i + 1])
197 return n
198
Lars Gustäbelc64e4022007-03-13 10:47:19 +0000199def itn(n, digits=8, format=DEFAULT_FORMAT):
Georg Brandl38c6a222006-05-10 16:26:03 +0000200 """Convert a python number to a number field.
201 """
202 # POSIX 1003.1-1988 requires numbers to be encoded as a string of
203 # octal digits followed by a null-byte, this allows values up to
204 # (8**(digits-1))-1. GNU tar allows storing numbers greater than
205 # that if necessary. A leading 0200 byte indicates this particular
206 # encoding, the following digits-1 bytes are a big-endian
207 # representation. This allows values up to (256**(digits-1))-1.
208 if 0 <= n < 8 ** (digits - 1):
209 s = "%0*o" % (digits - 1, n) + NUL
210 else:
Lars Gustäbelc64e4022007-03-13 10:47:19 +0000211 if format != GNU_FORMAT or n >= 256 ** (digits - 1):
Georg Brandle4751e32006-05-18 06:11:19 +0000212 raise ValueError("overflow in number field")
Georg Brandl38c6a222006-05-10 16:26:03 +0000213
214 if n < 0:
215 # XXX We mimic GNU tar's behaviour with negative numbers,
216 # this could raise OverflowError.
217 n = struct.unpack("L", struct.pack("l", n))[0]
218
219 s = ""
220 for i in xrange(digits - 1):
221 s = chr(n & 0377) + s
222 n >>= 8
223 s = chr(0200) + s
224 return s
225
Lars Gustäbela0fcb932007-05-27 19:49:30 +0000226def uts(s, encoding, errors):
227 """Convert a unicode object to a string.
228 """
229 if errors == "utf-8":
230 # An extra error handler similar to the -o invalid=UTF-8 option
231 # in POSIX.1-2001. Replace untranslatable characters with their
232 # UTF-8 representation.
233 try:
234 return s.encode(encoding, "strict")
235 except UnicodeEncodeError:
236 x = []
237 for c in s:
238 try:
239 x.append(c.encode(encoding, "strict"))
240 except UnicodeEncodeError:
241 x.append(c.encode("utf8"))
242 return "".join(x)
243 else:
244 return s.encode(encoding, errors)
245
Georg Brandl38c6a222006-05-10 16:26:03 +0000246def calc_chksums(buf):
247 """Calculate the checksum for a member's header by summing up all
248 characters except for the chksum field which is treated as if
249 it was filled with spaces. According to the GNU tar sources,
250 some tars (Sun and NeXT) calculate chksum with signed char,
251 which will be different if there are chars in the buffer with
252 the high bit set. So we calculate two checksums, unsigned and
253 signed.
254 """
255 unsigned_chksum = 256 + sum(struct.unpack("148B", buf[:148]) + struct.unpack("356B", buf[156:512]))
256 signed_chksum = 256 + sum(struct.unpack("148b", buf[:148]) + struct.unpack("356b", buf[156:512]))
257 return unsigned_chksum, signed_chksum
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000258
259def copyfileobj(src, dst, length=None):
260 """Copy length bytes from fileobj src to fileobj dst.
261 If length is None, copy the entire content.
262 """
263 if length == 0:
264 return
265 if length is None:
266 shutil.copyfileobj(src, dst)
267 return
268
269 BUFSIZE = 16 * 1024
270 blocks, remainder = divmod(length, BUFSIZE)
271 for b in xrange(blocks):
272 buf = src.read(BUFSIZE)
273 if len(buf) < BUFSIZE:
Georg Brandle4751e32006-05-18 06:11:19 +0000274 raise IOError("end of file reached")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000275 dst.write(buf)
276
277 if remainder != 0:
278 buf = src.read(remainder)
279 if len(buf) < remainder:
Georg Brandle4751e32006-05-18 06:11:19 +0000280 raise IOError("end of file reached")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000281 dst.write(buf)
282 return
283
284filemode_table = (
Andrew M. Kuchling8bc462f2004-10-20 11:48:42 +0000285 ((S_IFLNK, "l"),
286 (S_IFREG, "-"),
287 (S_IFBLK, "b"),
288 (S_IFDIR, "d"),
289 (S_IFCHR, "c"),
290 (S_IFIFO, "p")),
291
292 ((TUREAD, "r"),),
293 ((TUWRITE, "w"),),
294 ((TUEXEC|TSUID, "s"),
295 (TSUID, "S"),
296 (TUEXEC, "x")),
297
298 ((TGREAD, "r"),),
299 ((TGWRITE, "w"),),
300 ((TGEXEC|TSGID, "s"),
301 (TSGID, "S"),
302 (TGEXEC, "x")),
303
304 ((TOREAD, "r"),),
305 ((TOWRITE, "w"),),
306 ((TOEXEC|TSVTX, "t"),
307 (TSVTX, "T"),
308 (TOEXEC, "x"))
309)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000310
311def filemode(mode):
312 """Convert a file's mode to a string of the form
313 -rwxrwxrwx.
314 Used by TarFile.list()
315 """
Andrew M. Kuchling8bc462f2004-10-20 11:48:42 +0000316 perm = []
317 for table in filemode_table:
318 for bit, char in table:
319 if mode & bit == bit:
320 perm.append(char)
321 break
322 else:
323 perm.append("-")
324 return "".join(perm)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000325
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000326class TarError(Exception):
327 """Base exception."""
328 pass
329class ExtractError(TarError):
330 """General exception for extract errors."""
331 pass
332class ReadError(TarError):
Ezio Melottif5469cf2013-08-17 15:43:51 +0300333 """Exception for unreadable tar archives."""
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000334 pass
335class CompressionError(TarError):
336 """Exception for unavailable compression methods."""
337 pass
338class StreamError(TarError):
339 """Exception for unsupported operations on stream-like TarFiles."""
340 pass
Georg Brandlebbeed72006-12-19 22:06:46 +0000341class HeaderError(TarError):
Lars Gustäbeldd866d52009-11-22 18:30:53 +0000342 """Base exception for header errors."""
343 pass
344class EmptyHeaderError(HeaderError):
345 """Exception for empty headers."""
346 pass
347class TruncatedHeaderError(HeaderError):
348 """Exception for truncated headers."""
349 pass
350class EOFHeaderError(HeaderError):
351 """Exception for end of file headers."""
352 pass
353class InvalidHeaderError(HeaderError):
Georg Brandlebbeed72006-12-19 22:06:46 +0000354 """Exception for invalid headers."""
355 pass
Lars Gustäbeldd866d52009-11-22 18:30:53 +0000356class SubsequentHeaderError(HeaderError):
357 """Exception for missing and invalid extended headers."""
358 pass
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000359
360#---------------------------
361# internal stream interface
362#---------------------------
363class _LowLevelFile:
364 """Low-level file object. Supports reading and writing.
365 It is used instead of a regular file object for streaming
366 access.
367 """
368
369 def __init__(self, name, mode):
370 mode = {
371 "r": os.O_RDONLY,
372 "w": os.O_WRONLY | os.O_CREAT | os.O_TRUNC,
373 }[mode]
374 if hasattr(os, "O_BINARY"):
375 mode |= os.O_BINARY
Lars Gustäbel5c4c4612010-04-29 15:23:38 +0000376 self.fd = os.open(name, mode, 0666)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000377
378 def close(self):
379 os.close(self.fd)
380
381 def read(self, size):
382 return os.read(self.fd, size)
383
384 def write(self, s):
385 os.write(self.fd, s)
386
387class _Stream:
388 """Class that serves as an adapter between TarFile and
389 a stream-like object. The stream-like object only
390 needs to have a read() or write() method and is accessed
391 blockwise. Use of gzip or bzip2 compression is possible.
392 A stream-like object could be for example: sys.stdin,
393 sys.stdout, a socket, a tape device etc.
394
395 _Stream is intended to be used only internally.
396 """
397
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000398 def __init__(self, name, mode, comptype, fileobj, bufsize):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000399 """Construct a _Stream object.
400 """
401 self._extfileobj = True
402 if fileobj is None:
403 fileobj = _LowLevelFile(name, mode)
404 self._extfileobj = False
405
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000406 if comptype == '*':
407 # Enable transparent compression detection for the
408 # stream interface
409 fileobj = _StreamProxy(fileobj)
410 comptype = fileobj.getcomptype()
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000411
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000412 self.name = name or ""
413 self.mode = mode
414 self.comptype = comptype
415 self.fileobj = fileobj
416 self.bufsize = bufsize
417 self.buf = ""
418 self.pos = 0L
419 self.closed = False
420
Benjamin Peterson7fd59e02014-08-27 20:31:21 -0400421 try:
422 if comptype == "gz":
423 try:
424 import zlib
425 except ImportError:
426 raise CompressionError("zlib module is not available")
427 self.zlib = zlib
428 self.crc = zlib.crc32("") & 0xffffffffL
429 if mode == "r":
430 self._init_read_gz()
431 else:
432 self._init_write_gz()
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000433
Benjamin Peterson7fd59e02014-08-27 20:31:21 -0400434 elif comptype == "bz2":
435 try:
436 import bz2
437 except ImportError:
438 raise CompressionError("bz2 module is not available")
439 if mode == "r":
440 self.dbuf = ""
441 self.cmp = bz2.BZ2Decompressor()
442 else:
443 self.cmp = bz2.BZ2Compressor()
444 except:
445 if not self._extfileobj:
446 self.fileobj.close()
447 self.closed = True
448 raise
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000449
450 def __del__(self):
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000451 if hasattr(self, "closed") and not self.closed:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000452 self.close()
453
454 def _init_write_gz(self):
455 """Initialize for writing with gzip compression.
456 """
457 self.cmp = self.zlib.compressobj(9, self.zlib.DEFLATED,
458 -self.zlib.MAX_WBITS,
459 self.zlib.DEF_MEM_LEVEL,
460 0)
461 timestamp = struct.pack("<L", long(time.time()))
462 self.__write("\037\213\010\010%s\002\377" % timestamp)
Lars Gustäbel7d4d0742011-12-21 19:27:50 +0100463 if type(self.name) is unicode:
464 self.name = self.name.encode("iso-8859-1", "replace")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000465 if self.name.endswith(".gz"):
466 self.name = self.name[:-3]
467 self.__write(self.name + NUL)
468
469 def write(self, s):
470 """Write string s to the stream.
471 """
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000472 if self.comptype == "gz":
Gregory P. Smith88440962008-03-25 06:12:45 +0000473 self.crc = self.zlib.crc32(s, self.crc) & 0xffffffffL
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000474 self.pos += len(s)
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000475 if self.comptype != "tar":
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000476 s = self.cmp.compress(s)
477 self.__write(s)
478
479 def __write(self, s):
480 """Write string s to the stream if a whole new block
481 is ready to be written.
482 """
483 self.buf += s
484 while len(self.buf) > self.bufsize:
485 self.fileobj.write(self.buf[:self.bufsize])
486 self.buf = self.buf[self.bufsize:]
487
488 def close(self):
489 """Close the _Stream object. No operation should be
490 done on it afterwards.
491 """
492 if self.closed:
493 return
494
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000495 self.closed = True
Serhiy Storchaka1aa2c0f2015-04-10 13:24:10 +0300496 try:
497 if self.mode == "w" and self.comptype != "tar":
498 self.buf += self.cmp.flush()
499
500 if self.mode == "w" and self.buf:
501 self.fileobj.write(self.buf)
502 self.buf = ""
503 if self.comptype == "gz":
504 # The native zlib crc is an unsigned 32-bit integer, but
505 # the Python wrapper implicitly casts that to a signed C
506 # long. So, on a 32-bit box self.crc may "look negative",
507 # while the same crc on a 64-bit box may "look positive".
508 # To avoid irksome warnings from the `struct` module, force
509 # it to look positive on all boxes.
510 self.fileobj.write(struct.pack("<L", self.crc & 0xffffffffL))
511 self.fileobj.write(struct.pack("<L", self.pos & 0xffffFFFFL))
512 finally:
513 if not self._extfileobj:
514 self.fileobj.close()
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000515
516 def _init_read_gz(self):
517 """Initialize for reading a gzip compressed fileobj.
518 """
519 self.cmp = self.zlib.decompressobj(-self.zlib.MAX_WBITS)
520 self.dbuf = ""
521
522 # taken from gzip.GzipFile with some alterations
523 if self.__read(2) != "\037\213":
Georg Brandle4751e32006-05-18 06:11:19 +0000524 raise ReadError("not a gzip file")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000525 if self.__read(1) != "\010":
Georg Brandle4751e32006-05-18 06:11:19 +0000526 raise CompressionError("unsupported compression method")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000527
528 flag = ord(self.__read(1))
529 self.__read(6)
530
531 if flag & 4:
532 xlen = ord(self.__read(1)) + 256 * ord(self.__read(1))
533 self.read(xlen)
534 if flag & 8:
535 while True:
536 s = self.__read(1)
537 if not s or s == NUL:
538 break
539 if flag & 16:
540 while True:
541 s = self.__read(1)
542 if not s or s == NUL:
543 break
544 if flag & 2:
545 self.__read(2)
546
547 def tell(self):
548 """Return the stream's file pointer position.
549 """
550 return self.pos
551
552 def seek(self, pos=0):
553 """Set the stream's file pointer to pos. Negative seeking
554 is forbidden.
555 """
556 if pos - self.pos >= 0:
557 blocks, remainder = divmod(pos - self.pos, self.bufsize)
558 for i in xrange(blocks):
559 self.read(self.bufsize)
560 self.read(remainder)
561 else:
Georg Brandle4751e32006-05-18 06:11:19 +0000562 raise StreamError("seeking backwards is not allowed")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000563 return self.pos
564
565 def read(self, size=None):
566 """Return the next size number of bytes from the stream.
567 If size is not defined, return all bytes of the stream
568 up to EOF.
569 """
570 if size is None:
571 t = []
572 while True:
573 buf = self._read(self.bufsize)
574 if not buf:
575 break
576 t.append(buf)
577 buf = "".join(t)
578 else:
579 buf = self._read(size)
580 self.pos += len(buf)
581 return buf
582
583 def _read(self, size):
584 """Return size bytes from the stream.
585 """
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000586 if self.comptype == "tar":
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000587 return self.__read(size)
588
589 c = len(self.dbuf)
590 t = [self.dbuf]
591 while c < size:
592 buf = self.__read(self.bufsize)
593 if not buf:
594 break
Lars Gustäbelc64e4022007-03-13 10:47:19 +0000595 try:
596 buf = self.cmp.decompress(buf)
597 except IOError:
598 raise ReadError("invalid compressed data")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000599 t.append(buf)
600 c += len(buf)
601 t = "".join(t)
602 self.dbuf = t[size:]
603 return t[:size]
604
605 def __read(self, size):
606 """Return size bytes from stream. If internal buffer is empty,
607 read another block from the stream.
608 """
609 c = len(self.buf)
610 t = [self.buf]
611 while c < size:
612 buf = self.fileobj.read(self.bufsize)
613 if not buf:
614 break
615 t.append(buf)
616 c += len(buf)
617 t = "".join(t)
618 self.buf = t[size:]
619 return t[:size]
620# class _Stream
621
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000622class _StreamProxy(object):
623 """Small proxy class that enables transparent compression
624 detection for the Stream interface (mode 'r|*').
625 """
626
627 def __init__(self, fileobj):
628 self.fileobj = fileobj
629 self.buf = self.fileobj.read(BLOCKSIZE)
630
631 def read(self, size):
632 self.read = self.fileobj.read
633 return self.buf
634
635 def getcomptype(self):
636 if self.buf.startswith("\037\213\010"):
637 return "gz"
Lars Gustäbel9a388632011-12-06 13:07:09 +0100638 if self.buf[0:3] == "BZh" and self.buf[4:10] == "1AY&SY":
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000639 return "bz2"
640 return "tar"
641
642 def close(self):
643 self.fileobj.close()
644# class StreamProxy
645
Georg Brandl49c8f4c2006-05-15 19:30:35 +0000646class _BZ2Proxy(object):
647 """Small proxy class that enables external file object
648 support for "r:bz2" and "w:bz2" modes. This is actually
649 a workaround for a limitation in bz2 module's BZ2File
650 class which (unlike gzip.GzipFile) has no support for
651 a file object argument.
652 """
653
654 blocksize = 16 * 1024
655
656 def __init__(self, fileobj, mode):
657 self.fileobj = fileobj
658 self.mode = mode
Lars Gustäbelc64e4022007-03-13 10:47:19 +0000659 self.name = getattr(self.fileobj, "name", None)
Georg Brandl49c8f4c2006-05-15 19:30:35 +0000660 self.init()
661
662 def init(self):
663 import bz2
664 self.pos = 0
665 if self.mode == "r":
666 self.bz2obj = bz2.BZ2Decompressor()
667 self.fileobj.seek(0)
668 self.buf = ""
669 else:
670 self.bz2obj = bz2.BZ2Compressor()
671
672 def read(self, size):
673 b = [self.buf]
674 x = len(self.buf)
675 while x < size:
Lars Gustäbel2020a592009-03-22 20:09:33 +0000676 raw = self.fileobj.read(self.blocksize)
677 if not raw:
Georg Brandl49c8f4c2006-05-15 19:30:35 +0000678 break
Lars Gustäbel2020a592009-03-22 20:09:33 +0000679 data = self.bz2obj.decompress(raw)
680 b.append(data)
Georg Brandl49c8f4c2006-05-15 19:30:35 +0000681 x += len(data)
682 self.buf = "".join(b)
683
684 buf = self.buf[:size]
685 self.buf = self.buf[size:]
686 self.pos += len(buf)
687 return buf
688
689 def seek(self, pos):
690 if pos < self.pos:
691 self.init()
692 self.read(pos - self.pos)
693
694 def tell(self):
695 return self.pos
696
697 def write(self, data):
698 self.pos += len(data)
699 raw = self.bz2obj.compress(data)
700 self.fileobj.write(raw)
701
702 def close(self):
703 if self.mode == "w":
704 raw = self.bz2obj.flush()
705 self.fileobj.write(raw)
Georg Brandl49c8f4c2006-05-15 19:30:35 +0000706# class _BZ2Proxy
707
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000708#------------------------
709# Extraction file object
710#------------------------
Lars Gustäbel6baa5022006-12-23 16:40:13 +0000711class _FileInFile(object):
712 """A thin wrapper around an existing file object that
713 provides a part of its data as an individual file
714 object.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000715 """
716
Lars Gustäbel6baa5022006-12-23 16:40:13 +0000717 def __init__(self, fileobj, offset, size, sparse=None):
718 self.fileobj = fileobj
719 self.offset = offset
720 self.size = size
721 self.sparse = sparse
722 self.position = 0
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000723
Lars Gustäbel6baa5022006-12-23 16:40:13 +0000724 def tell(self):
725 """Return the current file position.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000726 """
Lars Gustäbel6baa5022006-12-23 16:40:13 +0000727 return self.position
728
729 def seek(self, position):
730 """Seek to a position in the file.
731 """
732 self.position = position
733
734 def read(self, size=None):
735 """Read data from the file.
736 """
737 if size is None:
738 size = self.size - self.position
739 else:
740 size = min(size, self.size - self.position)
741
742 if self.sparse is None:
743 return self.readnormal(size)
744 else:
745 return self.readsparse(size)
746
Lars Gustäbel518602a2015-07-06 09:23:04 +0200747 def __read(self, size):
748 buf = self.fileobj.read(size)
749 if len(buf) != size:
750 raise ReadError("unexpected end of data")
751 return buf
752
Lars Gustäbel6baa5022006-12-23 16:40:13 +0000753 def readnormal(self, size):
754 """Read operation for regular files.
755 """
756 self.fileobj.seek(self.offset + self.position)
757 self.position += size
Lars Gustäbel518602a2015-07-06 09:23:04 +0200758 return self.__read(size)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000759
Lars Gustäbel6baa5022006-12-23 16:40:13 +0000760 def readsparse(self, size):
761 """Read operation for sparse files.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000762 """
Lars Gustäbel6baa5022006-12-23 16:40:13 +0000763 data = []
764 while size > 0:
765 buf = self.readsparsesection(size)
766 if not buf:
767 break
768 size -= len(buf)
769 data.append(buf)
770 return "".join(data)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000771
Lars Gustäbel6baa5022006-12-23 16:40:13 +0000772 def readsparsesection(self, size):
773 """Read a single section of a sparse file.
774 """
775 section = self.sparse.find(self.position)
776
777 if section is None:
778 return ""
779
780 size = min(size, section.offset + section.size - self.position)
781
782 if isinstance(section, _data):
783 realpos = section.realpos + self.position - section.offset
784 self.fileobj.seek(self.offset + realpos)
785 self.position += size
Lars Gustäbel518602a2015-07-06 09:23:04 +0200786 return self.__read(size)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000787 else:
Lars Gustäbel6baa5022006-12-23 16:40:13 +0000788 self.position += size
789 return NUL * size
790#class _FileInFile
791
792
793class ExFileObject(object):
794 """File-like object for reading an archive member.
795 Is returned by TarFile.extractfile().
796 """
797 blocksize = 1024
798
799 def __init__(self, tarfile, tarinfo):
800 self.fileobj = _FileInFile(tarfile.fileobj,
801 tarinfo.offset_data,
802 tarinfo.size,
803 getattr(tarinfo, "sparse", None))
804 self.name = tarinfo.name
805 self.mode = "r"
806 self.closed = False
807 self.size = tarinfo.size
808
809 self.position = 0
810 self.buffer = ""
811
812 def read(self, size=None):
813 """Read at most size bytes from the file. If size is not
814 present or None, read all data until EOF is reached.
815 """
816 if self.closed:
817 raise ValueError("I/O operation on closed file")
818
819 buf = ""
820 if self.buffer:
821 if size is None:
822 buf = self.buffer
823 self.buffer = ""
824 else:
825 buf = self.buffer[:size]
826 self.buffer = self.buffer[size:]
827
828 if size is None:
829 buf += self.fileobj.read()
830 else:
831 buf += self.fileobj.read(size - len(buf))
832
833 self.position += len(buf)
834 return buf
835
836 def readline(self, size=-1):
837 """Read one entire line from the file. If size is present
838 and non-negative, return a string with at most that
839 size, which may be an incomplete line.
840 """
841 if self.closed:
842 raise ValueError("I/O operation on closed file")
843
844 if "\n" in self.buffer:
845 pos = self.buffer.find("\n") + 1
846 else:
847 buffers = [self.buffer]
848 while True:
849 buf = self.fileobj.read(self.blocksize)
850 buffers.append(buf)
851 if not buf or "\n" in buf:
852 self.buffer = "".join(buffers)
853 pos = self.buffer.find("\n") + 1
854 if pos == 0:
855 # no newline found.
856 pos = len(self.buffer)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000857 break
Lars Gustäbel6baa5022006-12-23 16:40:13 +0000858
859 if size != -1:
860 pos = min(size, pos)
861
862 buf = self.buffer[:pos]
863 self.buffer = self.buffer[pos:]
864 self.position += len(buf)
865 return buf
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000866
867 def readlines(self):
Lars Gustäbel6baa5022006-12-23 16:40:13 +0000868 """Return a list with all remaining lines.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000869 """
870 result = []
871 while True:
872 line = self.readline()
873 if not line: break
874 result.append(line)
875 return result
876
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000877 def tell(self):
878 """Return the current file position.
879 """
Lars Gustäbel6baa5022006-12-23 16:40:13 +0000880 if self.closed:
881 raise ValueError("I/O operation on closed file")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000882
Lars Gustäbel6baa5022006-12-23 16:40:13 +0000883 return self.position
884
885 def seek(self, pos, whence=os.SEEK_SET):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000886 """Seek to a position in the file.
887 """
Lars Gustäbel6baa5022006-12-23 16:40:13 +0000888 if self.closed:
889 raise ValueError("I/O operation on closed file")
890
891 if whence == os.SEEK_SET:
892 self.position = min(max(pos, 0), self.size)
893 elif whence == os.SEEK_CUR:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000894 if pos < 0:
Lars Gustäbel6baa5022006-12-23 16:40:13 +0000895 self.position = max(self.position + pos, 0)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000896 else:
Lars Gustäbel6baa5022006-12-23 16:40:13 +0000897 self.position = min(self.position + pos, self.size)
898 elif whence == os.SEEK_END:
899 self.position = max(min(self.size + pos, self.size), 0)
900 else:
901 raise ValueError("Invalid argument")
902
903 self.buffer = ""
904 self.fileobj.seek(self.position)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000905
906 def close(self):
907 """Close the file object.
908 """
909 self.closed = True
Martin v. Löwisdf241532005-03-03 08:17:42 +0000910
911 def __iter__(self):
Lars Gustäbel6baa5022006-12-23 16:40:13 +0000912 """Get an iterator over the file's lines.
Martin v. Löwisdf241532005-03-03 08:17:42 +0000913 """
Lars Gustäbel6baa5022006-12-23 16:40:13 +0000914 while True:
915 line = self.readline()
916 if not line:
917 break
918 yield line
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000919#class ExFileObject
920
921#------------------
922# Exported Classes
923#------------------
924class TarInfo(object):
925 """Informational class which holds the details about an
926 archive member given by a tar header block.
927 TarInfo objects are returned by TarFile.getmember(),
928 TarFile.getmembers() and TarFile.gettarinfo() and are
929 usually created internally.
930 """
931
932 def __init__(self, name=""):
933 """Construct a TarInfo object. name is the optional name
934 of the member.
935 """
Lars Gustäbelc64e4022007-03-13 10:47:19 +0000936 self.name = name # member name
937 self.mode = 0644 # file permissions
Georg Brandl38c6a222006-05-10 16:26:03 +0000938 self.uid = 0 # user id
939 self.gid = 0 # group id
940 self.size = 0 # file size
941 self.mtime = 0 # modification time
942 self.chksum = 0 # header checksum
943 self.type = REGTYPE # member type
944 self.linkname = "" # link name
Lars Gustäbel6aab8d02010-10-04 15:37:53 +0000945 self.uname = "" # user name
946 self.gname = "" # group name
Georg Brandl38c6a222006-05-10 16:26:03 +0000947 self.devmajor = 0 # device major number
948 self.devminor = 0 # device minor number
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000949
Georg Brandl38c6a222006-05-10 16:26:03 +0000950 self.offset = 0 # the tar header starts here
951 self.offset_data = 0 # the file's data starts here
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000952
Lars Gustäbelc64e4022007-03-13 10:47:19 +0000953 self.pax_headers = {} # pax header information
954
955 # In pax headers the "name" and "linkname" field are called
956 # "path" and "linkpath".
957 def _getpath(self):
958 return self.name
959 def _setpath(self, name):
960 self.name = name
961 path = property(_getpath, _setpath)
962
963 def _getlinkpath(self):
964 return self.linkname
965 def _setlinkpath(self, linkname):
966 self.linkname = linkname
967 linkpath = property(_getlinkpath, _setlinkpath)
968
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000969 def __repr__(self):
970 return "<%s %r at %#x>" % (self.__class__.__name__,self.name,id(self))
971
Lars Gustäbela0fcb932007-05-27 19:49:30 +0000972 def get_info(self, encoding, errors):
Lars Gustäbelc64e4022007-03-13 10:47:19 +0000973 """Return the TarInfo's attributes as a dictionary.
974 """
975 info = {
Lars Gustäbelf7cda522009-08-28 19:23:44 +0000976 "name": self.name,
Lars Gustäbelc64e4022007-03-13 10:47:19 +0000977 "mode": self.mode & 07777,
978 "uid": self.uid,
979 "gid": self.gid,
980 "size": self.size,
981 "mtime": self.mtime,
982 "chksum": self.chksum,
983 "type": self.type,
Lars Gustäbelf7cda522009-08-28 19:23:44 +0000984 "linkname": self.linkname,
Lars Gustäbelc64e4022007-03-13 10:47:19 +0000985 "uname": self.uname,
986 "gname": self.gname,
987 "devmajor": self.devmajor,
988 "devminor": self.devminor
989 }
990
991 if info["type"] == DIRTYPE and not info["name"].endswith("/"):
992 info["name"] += "/"
993
Lars Gustäbela0fcb932007-05-27 19:49:30 +0000994 for key in ("name", "linkname", "uname", "gname"):
995 if type(info[key]) is unicode:
996 info[key] = info[key].encode(encoding, errors)
997
Lars Gustäbelc64e4022007-03-13 10:47:19 +0000998 return info
999
Lars Gustäbela0fcb932007-05-27 19:49:30 +00001000 def tobuf(self, format=DEFAULT_FORMAT, encoding=ENCODING, errors="strict"):
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001001 """Return a tar header as a string of 512 byte blocks.
1002 """
Lars Gustäbela0fcb932007-05-27 19:49:30 +00001003 info = self.get_info(encoding, errors)
1004
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001005 if format == USTAR_FORMAT:
Lars Gustäbela0fcb932007-05-27 19:49:30 +00001006 return self.create_ustar_header(info)
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001007 elif format == GNU_FORMAT:
Lars Gustäbela0fcb932007-05-27 19:49:30 +00001008 return self.create_gnu_header(info)
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001009 elif format == PAX_FORMAT:
Lars Gustäbela0fcb932007-05-27 19:49:30 +00001010 return self.create_pax_header(info, encoding, errors)
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001011 else:
1012 raise ValueError("invalid format")
1013
Lars Gustäbela0fcb932007-05-27 19:49:30 +00001014 def create_ustar_header(self, info):
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001015 """Return the object as a ustar header block.
1016 """
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001017 info["magic"] = POSIX_MAGIC
1018
1019 if len(info["linkname"]) > LENGTH_LINK:
1020 raise ValueError("linkname is too long")
1021
1022 if len(info["name"]) > LENGTH_NAME:
1023 info["prefix"], info["name"] = self._posix_split_name(info["name"])
1024
1025 return self._create_header(info, USTAR_FORMAT)
1026
Lars Gustäbela0fcb932007-05-27 19:49:30 +00001027 def create_gnu_header(self, info):
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001028 """Return the object as a GNU header block sequence.
1029 """
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001030 info["magic"] = GNU_MAGIC
1031
1032 buf = ""
1033 if len(info["linkname"]) > LENGTH_LINK:
1034 buf += self._create_gnu_long_header(info["linkname"], GNUTYPE_LONGLINK)
1035
1036 if len(info["name"]) > LENGTH_NAME:
1037 buf += self._create_gnu_long_header(info["name"], GNUTYPE_LONGNAME)
1038
1039 return buf + self._create_header(info, GNU_FORMAT)
1040
Lars Gustäbela0fcb932007-05-27 19:49:30 +00001041 def create_pax_header(self, info, encoding, errors):
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001042 """Return the object as a ustar header block. If it cannot be
1043 represented this way, prepend a pax extended header sequence
1044 with supplement information.
1045 """
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001046 info["magic"] = POSIX_MAGIC
1047 pax_headers = self.pax_headers.copy()
1048
1049 # Test string fields for values that exceed the field length or cannot
1050 # be represented in ASCII encoding.
1051 for name, hname, length in (
1052 ("name", "path", LENGTH_NAME), ("linkname", "linkpath", LENGTH_LINK),
1053 ("uname", "uname", 32), ("gname", "gname", 32)):
1054
Lars Gustäbela0fcb932007-05-27 19:49:30 +00001055 if hname in pax_headers:
1056 # The pax header has priority.
1057 continue
1058
1059 val = info[name].decode(encoding, errors)
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001060
1061 # Try to encode the string as ASCII.
1062 try:
1063 val.encode("ascii")
1064 except UnicodeEncodeError:
1065 pax_headers[hname] = val
1066 continue
1067
Lars Gustäbela0fcb932007-05-27 19:49:30 +00001068 if len(info[name]) > length:
1069 pax_headers[hname] = val
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001070
1071 # Test number fields for values that exceed the field limit or values
1072 # that like to be stored as float.
1073 for name, digits in (("uid", 8), ("gid", 8), ("size", 12), ("mtime", 12)):
Lars Gustäbela0fcb932007-05-27 19:49:30 +00001074 if name in pax_headers:
1075 # The pax header has priority. Avoid overflow.
1076 info[name] = 0
1077 continue
1078
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001079 val = info[name]
1080 if not 0 <= val < 8 ** (digits - 1) or isinstance(val, float):
1081 pax_headers[name] = unicode(val)
1082 info[name] = 0
1083
Lars Gustäbela0fcb932007-05-27 19:49:30 +00001084 # Create a pax extended header if necessary.
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001085 if pax_headers:
1086 buf = self._create_pax_generic_header(pax_headers)
1087 else:
1088 buf = ""
1089
1090 return buf + self._create_header(info, USTAR_FORMAT)
1091
1092 @classmethod
Lars Gustäbela0fcb932007-05-27 19:49:30 +00001093 def create_pax_global_header(cls, pax_headers):
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001094 """Return the object as a pax global header block sequence.
1095 """
Lars Gustäbela0fcb932007-05-27 19:49:30 +00001096 return cls._create_pax_generic_header(pax_headers, type=XGLTYPE)
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001097
1098 def _posix_split_name(self, name):
1099 """Split a name longer than 100 chars into a prefix
1100 and a name part.
1101 """
1102 prefix = name[:LENGTH_PREFIX + 1]
1103 while prefix and prefix[-1] != "/":
1104 prefix = prefix[:-1]
1105
1106 name = name[len(prefix):]
1107 prefix = prefix[:-1]
1108
1109 if not prefix or len(name) > LENGTH_NAME:
1110 raise ValueError("name is too long")
1111 return prefix, name
1112
1113 @staticmethod
1114 def _create_header(info, format):
1115 """Return a header block. info is a dictionary with file
1116 information, format must be one of the *_FORMAT constants.
1117 """
1118 parts = [
1119 stn(info.get("name", ""), 100),
1120 itn(info.get("mode", 0) & 07777, 8, format),
1121 itn(info.get("uid", 0), 8, format),
1122 itn(info.get("gid", 0), 8, format),
1123 itn(info.get("size", 0), 12, format),
1124 itn(info.get("mtime", 0), 12, format),
1125 " ", # checksum field
1126 info.get("type", REGTYPE),
1127 stn(info.get("linkname", ""), 100),
Lars Gustäbela0fcb932007-05-27 19:49:30 +00001128 stn(info.get("magic", POSIX_MAGIC), 8),
Lars Gustäbel6aab8d02010-10-04 15:37:53 +00001129 stn(info.get("uname", ""), 32),
1130 stn(info.get("gname", ""), 32),
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001131 itn(info.get("devmajor", 0), 8, format),
1132 itn(info.get("devminor", 0), 8, format),
1133 stn(info.get("prefix", ""), 155)
1134 ]
1135
1136 buf = struct.pack("%ds" % BLOCKSIZE, "".join(parts))
1137 chksum = calc_chksums(buf[-BLOCKSIZE:])[0]
1138 buf = buf[:-364] + "%06o\0" % chksum + buf[-357:]
1139 return buf
1140
1141 @staticmethod
1142 def _create_payload(payload):
1143 """Return the string payload filled with zero bytes
1144 up to the next 512 byte border.
1145 """
1146 blocks, remainder = divmod(len(payload), BLOCKSIZE)
1147 if remainder > 0:
1148 payload += (BLOCKSIZE - remainder) * NUL
1149 return payload
1150
1151 @classmethod
1152 def _create_gnu_long_header(cls, name, type):
1153 """Return a GNUTYPE_LONGNAME or GNUTYPE_LONGLINK sequence
1154 for name.
1155 """
1156 name += NUL
1157
1158 info = {}
1159 info["name"] = "././@LongLink"
1160 info["type"] = type
1161 info["size"] = len(name)
1162 info["magic"] = GNU_MAGIC
1163
1164 # create extended header + name blocks.
1165 return cls._create_header(info, USTAR_FORMAT) + \
1166 cls._create_payload(name)
1167
1168 @classmethod
1169 def _create_pax_generic_header(cls, pax_headers, type=XHDTYPE):
1170 """Return a POSIX.1-2001 extended or global header sequence
1171 that contains a list of keyword, value pairs. The values
1172 must be unicode objects.
1173 """
1174 records = []
1175 for keyword, value in pax_headers.iteritems():
1176 keyword = keyword.encode("utf8")
1177 value = value.encode("utf8")
1178 l = len(keyword) + len(value) + 3 # ' ' + '=' + '\n'
1179 n = p = 0
1180 while True:
1181 n = l + len(str(p))
1182 if n == p:
1183 break
1184 p = n
1185 records.append("%d %s=%s\n" % (p, keyword, value))
1186 records = "".join(records)
1187
1188 # We use a hardcoded "././@PaxHeader" name like star does
1189 # instead of the one that POSIX recommends.
1190 info = {}
1191 info["name"] = "././@PaxHeader"
1192 info["type"] = type
1193 info["size"] = len(records)
1194 info["magic"] = POSIX_MAGIC
1195
1196 # Create pax header + record blocks.
1197 return cls._create_header(info, USTAR_FORMAT) + \
1198 cls._create_payload(records)
1199
Guido van Rossum75b64e62005-01-16 00:16:11 +00001200 @classmethod
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001201 def frombuf(cls, buf):
1202 """Construct a TarInfo object from a 512 byte string buffer.
1203 """
Lars Gustäbeldd866d52009-11-22 18:30:53 +00001204 if len(buf) == 0:
1205 raise EmptyHeaderError("empty header")
Georg Brandl38c6a222006-05-10 16:26:03 +00001206 if len(buf) != BLOCKSIZE:
Lars Gustäbeldd866d52009-11-22 18:30:53 +00001207 raise TruncatedHeaderError("truncated header")
Georg Brandl38c6a222006-05-10 16:26:03 +00001208 if buf.count(NUL) == BLOCKSIZE:
Lars Gustäbeldd866d52009-11-22 18:30:53 +00001209 raise EOFHeaderError("end of file header")
Georg Brandlebbeed72006-12-19 22:06:46 +00001210
Georg Brandlded1c4d2006-12-20 11:55:16 +00001211 chksum = nti(buf[148:156])
Georg Brandlebbeed72006-12-19 22:06:46 +00001212 if chksum not in calc_chksums(buf):
Lars Gustäbeldd866d52009-11-22 18:30:53 +00001213 raise InvalidHeaderError("bad checksum")
Georg Brandl38c6a222006-05-10 16:26:03 +00001214
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001215 obj = cls()
1216 obj.buf = buf
1217 obj.name = nts(buf[0:100])
1218 obj.mode = nti(buf[100:108])
1219 obj.uid = nti(buf[108:116])
1220 obj.gid = nti(buf[116:124])
1221 obj.size = nti(buf[124:136])
1222 obj.mtime = nti(buf[136:148])
1223 obj.chksum = chksum
1224 obj.type = buf[156:157]
1225 obj.linkname = nts(buf[157:257])
1226 obj.uname = nts(buf[265:297])
1227 obj.gname = nts(buf[297:329])
1228 obj.devmajor = nti(buf[329:337])
1229 obj.devminor = nti(buf[337:345])
1230 prefix = nts(buf[345:500])
Georg Brandl3354f282006-10-29 09:16:12 +00001231
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001232 # Old V7 tar format represents a directory as a regular
1233 # file with a trailing slash.
1234 if obj.type == AREGTYPE and obj.name.endswith("/"):
1235 obj.type = DIRTYPE
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001236
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001237 # Remove redundant slashes from directories.
1238 if obj.isdir():
1239 obj.name = obj.name.rstrip("/")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001240
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001241 # Reconstruct a ustar longname.
1242 if prefix and obj.type not in GNU_TYPES:
1243 obj.name = prefix + "/" + obj.name
1244 return obj
1245
1246 @classmethod
1247 def fromtarfile(cls, tarfile):
1248 """Return the next TarInfo object from TarFile object
1249 tarfile.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001250 """
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001251 buf = tarfile.fileobj.read(BLOCKSIZE)
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001252 obj = cls.frombuf(buf)
1253 obj.offset = tarfile.fileobj.tell() - BLOCKSIZE
1254 return obj._proc_member(tarfile)
Georg Brandl3354f282006-10-29 09:16:12 +00001255
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001256 #--------------------------------------------------------------------------
1257 # The following are methods that are called depending on the type of a
1258 # member. The entry point is _proc_member() which can be overridden in a
1259 # subclass to add custom _proc_*() methods. A _proc_*() method MUST
1260 # implement the following
1261 # operations:
1262 # 1. Set self.offset_data to the position where the data blocks begin,
1263 # if there is data that follows.
1264 # 2. Set tarfile.offset to the position where the next member's header will
1265 # begin.
1266 # 3. Return self or another valid TarInfo object.
1267 def _proc_member(self, tarfile):
1268 """Choose the right processing method depending on
1269 the type and call it.
Georg Brandl3354f282006-10-29 09:16:12 +00001270 """
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001271 if self.type in (GNUTYPE_LONGNAME, GNUTYPE_LONGLINK):
1272 return self._proc_gnulong(tarfile)
1273 elif self.type == GNUTYPE_SPARSE:
1274 return self._proc_sparse(tarfile)
1275 elif self.type in (XHDTYPE, XGLTYPE, SOLARIS_XHDTYPE):
1276 return self._proc_pax(tarfile)
1277 else:
1278 return self._proc_builtin(tarfile)
Georg Brandl3354f282006-10-29 09:16:12 +00001279
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001280 def _proc_builtin(self, tarfile):
1281 """Process a builtin type or an unknown type which
1282 will be treated as a regular file.
1283 """
1284 self.offset_data = tarfile.fileobj.tell()
1285 offset = self.offset_data
1286 if self.isreg() or self.type not in SUPPORTED_TYPES:
1287 # Skip the following data blocks.
1288 offset += self._block(self.size)
1289 tarfile.offset = offset
Georg Brandl3354f282006-10-29 09:16:12 +00001290
Lars Gustäbela0fcb932007-05-27 19:49:30 +00001291 # Patch the TarInfo object with saved global
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001292 # header information.
Lars Gustäbela0fcb932007-05-27 19:49:30 +00001293 self._apply_pax_info(tarfile.pax_headers, tarfile.encoding, tarfile.errors)
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001294
1295 return self
1296
1297 def _proc_gnulong(self, tarfile):
1298 """Process the blocks that hold a GNU longname
1299 or longlink member.
1300 """
1301 buf = tarfile.fileobj.read(self._block(self.size))
1302
1303 # Fetch the next header and process it.
Lars Gustäbeldd866d52009-11-22 18:30:53 +00001304 try:
1305 next = self.fromtarfile(tarfile)
1306 except HeaderError:
1307 raise SubsequentHeaderError("missing or bad subsequent header")
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001308
1309 # Patch the TarInfo object from the next header with
1310 # the longname information.
1311 next.offset = self.offset
1312 if self.type == GNUTYPE_LONGNAME:
Lars Gustäbela0fcb932007-05-27 19:49:30 +00001313 next.name = nts(buf)
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001314 elif self.type == GNUTYPE_LONGLINK:
Lars Gustäbela0fcb932007-05-27 19:49:30 +00001315 next.linkname = nts(buf)
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001316
1317 return next
1318
1319 def _proc_sparse(self, tarfile):
1320 """Process a GNU sparse header plus extra headers.
1321 """
1322 buf = self.buf
1323 sp = _ringbuffer()
1324 pos = 386
1325 lastpos = 0L
1326 realpos = 0L
1327 # There are 4 possible sparse structs in the
1328 # first header.
1329 for i in xrange(4):
1330 try:
1331 offset = nti(buf[pos:pos + 12])
1332 numbytes = nti(buf[pos + 12:pos + 24])
1333 except ValueError:
1334 break
1335 if offset > lastpos:
1336 sp.append(_hole(lastpos, offset - lastpos))
1337 sp.append(_data(offset, numbytes, realpos))
1338 realpos += numbytes
1339 lastpos = offset + numbytes
1340 pos += 24
1341
1342 isextended = ord(buf[482])
1343 origsize = nti(buf[483:495])
1344
1345 # If the isextended flag is given,
1346 # there are extra headers to process.
1347 while isextended == 1:
1348 buf = tarfile.fileobj.read(BLOCKSIZE)
1349 pos = 0
1350 for i in xrange(21):
1351 try:
1352 offset = nti(buf[pos:pos + 12])
1353 numbytes = nti(buf[pos + 12:pos + 24])
1354 except ValueError:
1355 break
1356 if offset > lastpos:
1357 sp.append(_hole(lastpos, offset - lastpos))
1358 sp.append(_data(offset, numbytes, realpos))
1359 realpos += numbytes
1360 lastpos = offset + numbytes
1361 pos += 24
1362 isextended = ord(buf[504])
1363
1364 if lastpos < origsize:
1365 sp.append(_hole(lastpos, origsize - lastpos))
1366
1367 self.sparse = sp
1368
1369 self.offset_data = tarfile.fileobj.tell()
1370 tarfile.offset = self.offset_data + self._block(self.size)
1371 self.size = origsize
1372
1373 return self
1374
1375 def _proc_pax(self, tarfile):
1376 """Process an extended or global header as described in
1377 POSIX.1-2001.
1378 """
1379 # Read the header information.
1380 buf = tarfile.fileobj.read(self._block(self.size))
1381
1382 # A pax header stores supplemental information for either
1383 # the following file (extended) or all following files
1384 # (global).
1385 if self.type == XGLTYPE:
1386 pax_headers = tarfile.pax_headers
1387 else:
1388 pax_headers = tarfile.pax_headers.copy()
1389
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001390 # Parse pax header information. A record looks like that:
1391 # "%d %s=%s\n" % (length, keyword, value). length is the size
1392 # of the complete record including the length field itself and
Lars Gustäbela0fcb932007-05-27 19:49:30 +00001393 # the newline. keyword and value are both UTF-8 encoded strings.
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001394 regex = re.compile(r"(\d+) ([^=]+)=", re.U)
1395 pos = 0
1396 while True:
1397 match = regex.match(buf, pos)
1398 if not match:
1399 break
1400
1401 length, keyword = match.groups()
1402 length = int(length)
1403 value = buf[match.end(2) + 1:match.start(1) + length - 1]
1404
1405 keyword = keyword.decode("utf8")
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001406 value = value.decode("utf8")
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001407
1408 pax_headers[keyword] = value
1409 pos += length
1410
Lars Gustäbela0fcb932007-05-27 19:49:30 +00001411 # Fetch the next header.
Lars Gustäbeldd866d52009-11-22 18:30:53 +00001412 try:
1413 next = self.fromtarfile(tarfile)
1414 except HeaderError:
1415 raise SubsequentHeaderError("missing or bad subsequent header")
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001416
Lars Gustäbela0fcb932007-05-27 19:49:30 +00001417 if self.type in (XHDTYPE, SOLARIS_XHDTYPE):
Lars Gustäbela0fcb932007-05-27 19:49:30 +00001418 # Patch the TarInfo object with the extended header info.
1419 next._apply_pax_info(pax_headers, tarfile.encoding, tarfile.errors)
1420 next.offset = self.offset
1421
Brett Cannon132fc542008-08-04 21:23:07 +00001422 if "size" in pax_headers:
Lars Gustäbela0fcb932007-05-27 19:49:30 +00001423 # If the extended header replaces the size field,
1424 # we need to recalculate the offset where the next
1425 # header starts.
1426 offset = next.offset_data
1427 if next.isreg() or next.type not in SUPPORTED_TYPES:
1428 offset += next._block(next.size)
1429 tarfile.offset = offset
1430
1431 return next
1432
1433 def _apply_pax_info(self, pax_headers, encoding, errors):
1434 """Replace fields with supplemental information from a previous
1435 pax extended or global header.
1436 """
1437 for keyword, value in pax_headers.iteritems():
1438 if keyword not in PAX_FIELDS:
1439 continue
1440
1441 if keyword == "path":
1442 value = value.rstrip("/")
1443
1444 if keyword in PAX_NUMBER_FIELDS:
1445 try:
1446 value = PAX_NUMBER_FIELDS[keyword](value)
1447 except ValueError:
1448 value = 0
1449 else:
1450 value = uts(value, encoding, errors)
1451
1452 setattr(self, keyword, value)
1453
1454 self.pax_headers = pax_headers.copy()
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001455
1456 def _block(self, count):
1457 """Round up a byte count by BLOCKSIZE and return it,
1458 e.g. _block(834) => 1024.
1459 """
1460 blocks, remainder = divmod(count, BLOCKSIZE)
1461 if remainder:
1462 blocks += 1
1463 return blocks * BLOCKSIZE
Georg Brandl3354f282006-10-29 09:16:12 +00001464
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001465 def isreg(self):
1466 return self.type in REGULAR_TYPES
1467 def isfile(self):
1468 return self.isreg()
1469 def isdir(self):
1470 return self.type == DIRTYPE
1471 def issym(self):
1472 return self.type == SYMTYPE
1473 def islnk(self):
1474 return self.type == LNKTYPE
1475 def ischr(self):
1476 return self.type == CHRTYPE
1477 def isblk(self):
1478 return self.type == BLKTYPE
1479 def isfifo(self):
1480 return self.type == FIFOTYPE
1481 def issparse(self):
1482 return self.type == GNUTYPE_SPARSE
1483 def isdev(self):
1484 return self.type in (CHRTYPE, BLKTYPE, FIFOTYPE)
1485# class TarInfo
1486
1487class TarFile(object):
1488 """The TarFile Class provides an interface to tar archives.
1489 """
1490
1491 debug = 0 # May be set from 0 (no msgs) to 3 (all msgs)
1492
1493 dereference = False # If true, add content of linked file to the
1494 # tar file, else the link.
1495
1496 ignore_zeros = False # If true, skips empty or invalid blocks and
1497 # continues processing.
1498
Lars Gustäbel92ca7562009-12-13 11:32:27 +00001499 errorlevel = 1 # If 0, fatal errors only appear in debug
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001500 # messages (if debug >= 0). If > 0, errors
1501 # are passed to the caller as exceptions.
1502
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001503 format = DEFAULT_FORMAT # The format to use when creating an archive.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001504
Lars Gustäbela0fcb932007-05-27 19:49:30 +00001505 encoding = ENCODING # Encoding for 8-bit character strings.
1506
1507 errors = None # Error handler for unicode conversion.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001508
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001509 tarinfo = TarInfo # The default TarInfo class to use.
1510
1511 fileobject = ExFileObject # The default ExFileObject class to use.
1512
1513 def __init__(self, name=None, mode="r", fileobj=None, format=None,
1514 tarinfo=None, dereference=None, ignore_zeros=None, encoding=None,
Lars Gustäbela0fcb932007-05-27 19:49:30 +00001515 errors=None, pax_headers=None, debug=None, errorlevel=None):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001516 """Open an (uncompressed) tar archive `name'. `mode' is either 'r' to
1517 read from an existing archive, 'a' to append data to an existing
1518 file or 'w' to create a new file overwriting an existing one. `mode'
1519 defaults to 'r'.
1520 If `fileobj' is given, it is used for reading or writing data. If it
1521 can be determined, `mode' is overridden by `fileobj's mode.
1522 `fileobj' is not closed, when TarFile is closed.
1523 """
Serhiy Storchaka75ba21a2014-01-18 15:35:19 +02001524 modes = {"r": "rb", "a": "r+b", "w": "wb"}
1525 if mode not in modes:
Georg Brandle4751e32006-05-18 06:11:19 +00001526 raise ValueError("mode must be 'r', 'a' or 'w'")
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001527 self.mode = mode
Serhiy Storchaka75ba21a2014-01-18 15:35:19 +02001528 self._mode = modes[mode]
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001529
1530 if not fileobj:
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001531 if self.mode == "a" and not os.path.exists(name):
Lars Gustäbel3f8aca12007-02-06 18:38:13 +00001532 # Create nonexistent files in append mode.
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001533 self.mode = "w"
1534 self._mode = "wb"
Brett Cannon6cef0762007-05-25 20:17:15 +00001535 fileobj = bltn_open(name, self._mode)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001536 self._extfileobj = False
1537 else:
Serhiy Storchaka7cc3b0a2014-07-22 10:39:59 +03001538 if name is None and hasattr(fileobj, "name"):
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001539 name = fileobj.name
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001540 if hasattr(fileobj, "mode"):
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001541 self._mode = fileobj.mode
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001542 self._extfileobj = True
Lars Gustäbel0f4a14b2007-08-28 12:31:09 +00001543 self.name = os.path.abspath(name) if name else None
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001544 self.fileobj = fileobj
1545
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001546 # Init attributes.
1547 if format is not None:
1548 self.format = format
1549 if tarinfo is not None:
1550 self.tarinfo = tarinfo
1551 if dereference is not None:
1552 self.dereference = dereference
1553 if ignore_zeros is not None:
1554 self.ignore_zeros = ignore_zeros
1555 if encoding is not None:
1556 self.encoding = encoding
Lars Gustäbela0fcb932007-05-27 19:49:30 +00001557
1558 if errors is not None:
1559 self.errors = errors
1560 elif mode == "r":
1561 self.errors = "utf-8"
1562 else:
1563 self.errors = "strict"
1564
1565 if pax_headers is not None and self.format == PAX_FORMAT:
1566 self.pax_headers = pax_headers
1567 else:
1568 self.pax_headers = {}
1569
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001570 if debug is not None:
1571 self.debug = debug
1572 if errorlevel is not None:
1573 self.errorlevel = errorlevel
1574
1575 # Init datastructures.
Georg Brandl38c6a222006-05-10 16:26:03 +00001576 self.closed = False
1577 self.members = [] # list of members as TarInfo objects
1578 self._loaded = False # flag if all members have been read
Lars Gustäbel77b2d632007-12-01 21:02:12 +00001579 self.offset = self.fileobj.tell()
1580 # current position in the archive file
Georg Brandl38c6a222006-05-10 16:26:03 +00001581 self.inodes = {} # dictionary caching the inodes of
1582 # archive members already added
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001583
Lars Gustäbel355538e2009-11-18 20:24:54 +00001584 try:
1585 if self.mode == "r":
1586 self.firstmember = None
1587 self.firstmember = self.next()
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001588
Lars Gustäbel355538e2009-11-18 20:24:54 +00001589 if self.mode == "a":
1590 # Move to the end of the archive,
1591 # before the first empty block.
Lars Gustäbel355538e2009-11-18 20:24:54 +00001592 while True:
Lars Gustäbeldd866d52009-11-22 18:30:53 +00001593 self.fileobj.seek(self.offset)
1594 try:
1595 tarinfo = self.tarinfo.fromtarfile(self)
1596 self.members.append(tarinfo)
1597 except EOFHeaderError:
1598 self.fileobj.seek(self.offset)
Lars Gustäbel355538e2009-11-18 20:24:54 +00001599 break
Lars Gustäbeldd866d52009-11-22 18:30:53 +00001600 except HeaderError, e:
1601 raise ReadError(str(e))
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001602
Lars Gustäbel355538e2009-11-18 20:24:54 +00001603 if self.mode in "aw":
1604 self._loaded = True
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001605
Lars Gustäbel355538e2009-11-18 20:24:54 +00001606 if self.pax_headers:
1607 buf = self.tarinfo.create_pax_global_header(self.pax_headers.copy())
1608 self.fileobj.write(buf)
1609 self.offset += len(buf)
1610 except:
1611 if not self._extfileobj:
1612 self.fileobj.close()
1613 self.closed = True
1614 raise
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001615
1616 def _getposix(self):
1617 return self.format == USTAR_FORMAT
1618 def _setposix(self, value):
1619 import warnings
Philip Jenveyd846f1d2009-05-08 02:28:39 +00001620 warnings.warn("use the format attribute instead", DeprecationWarning,
1621 2)
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001622 if value:
1623 self.format = USTAR_FORMAT
1624 else:
1625 self.format = GNU_FORMAT
1626 posix = property(_getposix, _setposix)
1627
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001628 #--------------------------------------------------------------------------
1629 # Below are the classmethods which act as alternate constructors to the
1630 # TarFile class. The open() method is the only one that is needed for
1631 # public use; it is the "super"-constructor and is able to select an
1632 # adequate "sub"-constructor for a particular compression using the mapping
1633 # from OPEN_METH.
1634 #
1635 # This concept allows one to subclass TarFile without losing the comfort of
1636 # the super-constructor. A sub-constructor is registered and made available
1637 # by adding it to the mapping in OPEN_METH.
1638
Guido van Rossum75b64e62005-01-16 00:16:11 +00001639 @classmethod
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001640 def open(cls, name=None, mode="r", fileobj=None, bufsize=RECORDSIZE, **kwargs):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001641 """Open a tar archive for reading, writing or appending. Return
1642 an appropriate TarFile class.
1643
1644 mode:
Martin v. Löwis78be7df2005-03-05 12:47:42 +00001645 'r' or 'r:*' open for reading with transparent compression
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001646 'r:' open for reading exclusively uncompressed
1647 'r:gz' open for reading with gzip compression
1648 'r:bz2' open for reading with bzip2 compression
Lars Gustäbel3f8aca12007-02-06 18:38:13 +00001649 'a' or 'a:' open for appending, creating the file if necessary
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001650 'w' or 'w:' open for writing without compression
1651 'w:gz' open for writing with gzip compression
1652 'w:bz2' open for writing with bzip2 compression
Martin v. Löwis78be7df2005-03-05 12:47:42 +00001653
1654 'r|*' open a stream of tar blocks with transparent compression
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001655 'r|' open an uncompressed stream of tar blocks for reading
1656 'r|gz' open a gzip compressed stream of tar blocks
1657 'r|bz2' open a bzip2 compressed stream of tar blocks
1658 'w|' open an uncompressed stream for writing
1659 'w|gz' open a gzip compressed stream for writing
1660 'w|bz2' open a bzip2 compressed stream for writing
1661 """
1662
1663 if not name and not fileobj:
Georg Brandle4751e32006-05-18 06:11:19 +00001664 raise ValueError("nothing to open")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001665
Martin v. Löwis78be7df2005-03-05 12:47:42 +00001666 if mode in ("r", "r:*"):
1667 # Find out which *open() is appropriate for opening the file.
Serhiy Storchaka53962572016-10-30 20:52:55 +02001668 def not_compressed(comptype):
1669 return cls.OPEN_METH[comptype] == 'taropen'
1670 for comptype in sorted(cls.OPEN_METH, key=not_compressed):
Martin v. Löwis78be7df2005-03-05 12:47:42 +00001671 func = getattr(cls, cls.OPEN_METH[comptype])
Lars Gustäbela7ba6fc2006-12-27 10:30:46 +00001672 if fileobj is not None:
1673 saved_pos = fileobj.tell()
Martin v. Löwis78be7df2005-03-05 12:47:42 +00001674 try:
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001675 return func(name, "r", fileobj, **kwargs)
1676 except (ReadError, CompressionError), e:
Lars Gustäbela7ba6fc2006-12-27 10:30:46 +00001677 if fileobj is not None:
1678 fileobj.seek(saved_pos)
Martin v. Löwis78be7df2005-03-05 12:47:42 +00001679 continue
Georg Brandle4751e32006-05-18 06:11:19 +00001680 raise ReadError("file could not be opened successfully")
Martin v. Löwis78be7df2005-03-05 12:47:42 +00001681
1682 elif ":" in mode:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001683 filemode, comptype = mode.split(":", 1)
1684 filemode = filemode or "r"
1685 comptype = comptype or "tar"
1686
1687 # Select the *open() function according to
1688 # given compression.
1689 if comptype in cls.OPEN_METH:
1690 func = getattr(cls, cls.OPEN_METH[comptype])
1691 else:
Georg Brandle4751e32006-05-18 06:11:19 +00001692 raise CompressionError("unknown compression type %r" % comptype)
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001693 return func(name, filemode, fileobj, **kwargs)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001694
1695 elif "|" in mode:
1696 filemode, comptype = mode.split("|", 1)
1697 filemode = filemode or "r"
1698 comptype = comptype or "tar"
1699
Serhiy Storchaka75ba21a2014-01-18 15:35:19 +02001700 if filemode not in ("r", "w"):
Georg Brandle4751e32006-05-18 06:11:19 +00001701 raise ValueError("mode must be 'r' or 'w'")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001702
Benjamin Peterson7fd59e02014-08-27 20:31:21 -04001703 stream = _Stream(name, filemode, comptype, fileobj, bufsize)
1704 try:
1705 t = cls(name, filemode, stream, **kwargs)
1706 except:
1707 stream.close()
1708 raise
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001709 t._extfileobj = False
1710 return t
1711
Serhiy Storchaka75ba21a2014-01-18 15:35:19 +02001712 elif mode in ("a", "w"):
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001713 return cls.taropen(name, mode, fileobj, **kwargs)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001714
Georg Brandle4751e32006-05-18 06:11:19 +00001715 raise ValueError("undiscernible mode")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001716
Guido van Rossum75b64e62005-01-16 00:16:11 +00001717 @classmethod
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001718 def taropen(cls, name, mode="r", fileobj=None, **kwargs):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001719 """Open uncompressed tar archive name for reading or writing.
1720 """
Serhiy Storchaka75ba21a2014-01-18 15:35:19 +02001721 if mode not in ("r", "a", "w"):
Georg Brandle4751e32006-05-18 06:11:19 +00001722 raise ValueError("mode must be 'r', 'a' or 'w'")
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001723 return cls(name, mode, fileobj, **kwargs)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001724
Guido van Rossum75b64e62005-01-16 00:16:11 +00001725 @classmethod
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001726 def gzopen(cls, name, mode="r", fileobj=None, compresslevel=9, **kwargs):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001727 """Open gzip compressed tar archive name for reading or writing.
1728 Appending is not allowed.
1729 """
Serhiy Storchaka75ba21a2014-01-18 15:35:19 +02001730 if mode not in ("r", "w"):
Georg Brandle4751e32006-05-18 06:11:19 +00001731 raise ValueError("mode must be 'r' or 'w'")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001732
1733 try:
1734 import gzip
Neal Norwitz4ec68242003-04-11 03:05:56 +00001735 gzip.GzipFile
1736 except (ImportError, AttributeError):
Georg Brandle4751e32006-05-18 06:11:19 +00001737 raise CompressionError("gzip module is not available")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001738
Benjamin Peterson7fd59e02014-08-27 20:31:21 -04001739 try:
1740 fileobj = gzip.GzipFile(name, mode, compresslevel, fileobj)
1741 except OSError:
1742 if fileobj is not None and mode == 'r':
1743 raise ReadError("not a gzip file")
1744 raise
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001745
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001746 try:
Benjamin Peterson7fd59e02014-08-27 20:31:21 -04001747 t = cls.taropen(name, mode, fileobj, **kwargs)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001748 except IOError:
Benjamin Peterson7fd59e02014-08-27 20:31:21 -04001749 fileobj.close()
Serhiy Storchaka7a278da2014-01-18 16:14:00 +02001750 if mode == 'r':
1751 raise ReadError("not a gzip file")
1752 raise
Benjamin Peterson7fd59e02014-08-27 20:31:21 -04001753 except:
1754 fileobj.close()
1755 raise
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001756 t._extfileobj = False
1757 return t
1758
Guido van Rossum75b64e62005-01-16 00:16:11 +00001759 @classmethod
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001760 def bz2open(cls, name, mode="r", fileobj=None, compresslevel=9, **kwargs):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001761 """Open bzip2 compressed tar archive name for reading or writing.
1762 Appending is not allowed.
1763 """
Serhiy Storchaka75ba21a2014-01-18 15:35:19 +02001764 if mode not in ("r", "w"):
Georg Brandle4751e32006-05-18 06:11:19 +00001765 raise ValueError("mode must be 'r' or 'w'.")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001766
1767 try:
1768 import bz2
1769 except ImportError:
Georg Brandle4751e32006-05-18 06:11:19 +00001770 raise CompressionError("bz2 module is not available")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001771
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001772 if fileobj is not None:
Georg Brandl49c8f4c2006-05-15 19:30:35 +00001773 fileobj = _BZ2Proxy(fileobj, mode)
1774 else:
1775 fileobj = bz2.BZ2File(name, mode, compresslevel=compresslevel)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001776
1777 try:
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001778 t = cls.taropen(name, mode, fileobj, **kwargs)
Lars Gustäbeldd866d52009-11-22 18:30:53 +00001779 except (IOError, EOFError):
Benjamin Peterson7fd59e02014-08-27 20:31:21 -04001780 fileobj.close()
Serhiy Storchaka7a278da2014-01-18 16:14:00 +02001781 if mode == 'r':
1782 raise ReadError("not a bzip2 file")
1783 raise
Benjamin Peterson7fd59e02014-08-27 20:31:21 -04001784 except:
1785 fileobj.close()
1786 raise
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001787 t._extfileobj = False
1788 return t
1789
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001790 # All *open() methods are registered here.
1791 OPEN_METH = {
1792 "tar": "taropen", # uncompressed tar
1793 "gz": "gzopen", # gzip compressed tar
1794 "bz2": "bz2open" # bzip2 compressed tar
1795 }
1796
1797 #--------------------------------------------------------------------------
1798 # The public methods which TarFile provides:
1799
1800 def close(self):
1801 """Close the TarFile. In write-mode, two finishing zero blocks are
1802 appended to the archive.
1803 """
1804 if self.closed:
1805 return
1806
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001807 self.closed = True
Serhiy Storchaka1aa2c0f2015-04-10 13:24:10 +03001808 try:
1809 if self.mode in "aw":
1810 self.fileobj.write(NUL * (BLOCKSIZE * 2))
1811 self.offset += (BLOCKSIZE * 2)
1812 # fill up the end with zero-blocks
1813 # (like option -b20 for tar does)
1814 blocks, remainder = divmod(self.offset, RECORDSIZE)
1815 if remainder > 0:
1816 self.fileobj.write(NUL * (RECORDSIZE - remainder))
1817 finally:
1818 if not self._extfileobj:
1819 self.fileobj.close()
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001820
1821 def getmember(self, name):
1822 """Return a TarInfo object for member `name'. If `name' can not be
1823 found in the archive, KeyError is raised. If a member occurs more
Mark Dickinson3e4caeb2009-02-21 20:27:01 +00001824 than once in the archive, its last occurrence is assumed to be the
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001825 most up-to-date version.
1826 """
Martin v. Löwisf3c56112004-09-18 09:08:52 +00001827 tarinfo = self._getmember(name)
1828 if tarinfo is None:
Georg Brandle4751e32006-05-18 06:11:19 +00001829 raise KeyError("filename %r not found" % name)
Martin v. Löwisf3c56112004-09-18 09:08:52 +00001830 return tarinfo
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001831
1832 def getmembers(self):
1833 """Return the members of the archive as a list of TarInfo objects. The
1834 list has the same order as the members in the archive.
1835 """
1836 self._check()
1837 if not self._loaded: # if we want to obtain a list of
1838 self._load() # all members, we first have to
1839 # scan the whole archive.
1840 return self.members
1841
1842 def getnames(self):
1843 """Return the members of the archive as a list of their names. It has
1844 the same order as the list returned by getmembers().
1845 """
Martin v. Löwisf3c56112004-09-18 09:08:52 +00001846 return [tarinfo.name for tarinfo in self.getmembers()]
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001847
1848 def gettarinfo(self, name=None, arcname=None, fileobj=None):
Martin Panter59b9a162016-02-19 23:34:56 +00001849 """Create a TarInfo object from the result of os.stat or equivalent
1850 on an existing file. The file is either named by `name', or
1851 specified as a file object `fileobj' with a file descriptor. If
1852 given, `arcname' specifies an alternative name for the file in the
1853 archive, otherwise, the name is taken from the 'name' attribute of
1854 'fileobj', or the 'name' argument.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001855 """
1856 self._check("aw")
1857
1858 # When fileobj is given, replace name by
1859 # fileobj's real name.
1860 if fileobj is not None:
1861 name = fileobj.name
1862
1863 # Building the name of the member in the archive.
1864 # Backward slashes are converted to forward slashes,
1865 # Absolute paths are turned to relative paths.
1866 if arcname is None:
1867 arcname = name
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001868 drv, arcname = os.path.splitdrive(arcname)
Lars Gustäbelf7cda522009-08-28 19:23:44 +00001869 arcname = arcname.replace(os.sep, "/")
1870 arcname = arcname.lstrip("/")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001871
1872 # Now, fill the TarInfo object with
1873 # information specific for the file.
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001874 tarinfo = self.tarinfo()
Martin Panter59b9a162016-02-19 23:34:56 +00001875 tarinfo.tarfile = self # Not needed
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001876
1877 # Use os.stat or os.lstat, depending on platform
1878 # and if symlinks shall be resolved.
1879 if fileobj is None:
1880 if hasattr(os, "lstat") and not self.dereference:
1881 statres = os.lstat(name)
1882 else:
1883 statres = os.stat(name)
1884 else:
1885 statres = os.fstat(fileobj.fileno())
1886 linkname = ""
1887
1888 stmd = statres.st_mode
1889 if stat.S_ISREG(stmd):
1890 inode = (statres.st_ino, statres.st_dev)
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001891 if not self.dereference and statres.st_nlink > 1 and \
1892 inode in self.inodes and arcname != self.inodes[inode]:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001893 # Is it a hardlink to an already
1894 # archived file?
1895 type = LNKTYPE
1896 linkname = self.inodes[inode]
1897 else:
1898 # The inode is added only if its valid.
1899 # For win32 it is always 0.
1900 type = REGTYPE
1901 if inode[0]:
1902 self.inodes[inode] = arcname
1903 elif stat.S_ISDIR(stmd):
1904 type = DIRTYPE
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001905 elif stat.S_ISFIFO(stmd):
1906 type = FIFOTYPE
1907 elif stat.S_ISLNK(stmd):
1908 type = SYMTYPE
1909 linkname = os.readlink(name)
1910 elif stat.S_ISCHR(stmd):
1911 type = CHRTYPE
1912 elif stat.S_ISBLK(stmd):
1913 type = BLKTYPE
1914 else:
1915 return None
1916
1917 # Fill the TarInfo object with all
1918 # information we can get.
Martin v. Löwis5dbdc592005-08-27 10:07:56 +00001919 tarinfo.name = arcname
1920 tarinfo.mode = stmd
1921 tarinfo.uid = statres.st_uid
1922 tarinfo.gid = statres.st_gid
Lars Gustäbel2ee9c6f2010-06-03 09:56:22 +00001923 if type == REGTYPE:
Martin v. Löwis61d77e02004-08-20 06:35:46 +00001924 tarinfo.size = statres.st_size
Martin v. Löwis5dbdc592005-08-27 10:07:56 +00001925 else:
1926 tarinfo.size = 0L
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001927 tarinfo.mtime = statres.st_mtime
Martin v. Löwis5dbdc592005-08-27 10:07:56 +00001928 tarinfo.type = type
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001929 tarinfo.linkname = linkname
1930 if pwd:
1931 try:
1932 tarinfo.uname = pwd.getpwuid(tarinfo.uid)[0]
1933 except KeyError:
1934 pass
1935 if grp:
1936 try:
1937 tarinfo.gname = grp.getgrgid(tarinfo.gid)[0]
1938 except KeyError:
1939 pass
1940
1941 if type in (CHRTYPE, BLKTYPE):
1942 if hasattr(os, "major") and hasattr(os, "minor"):
1943 tarinfo.devmajor = os.major(statres.st_rdev)
1944 tarinfo.devminor = os.minor(statres.st_rdev)
1945 return tarinfo
1946
1947 def list(self, verbose=True):
1948 """Print a table of contents to sys.stdout. If `verbose' is False, only
1949 the names of the members are printed. If it is True, an `ls -l'-like
1950 output is produced.
1951 """
1952 self._check()
1953
1954 for tarinfo in self:
1955 if verbose:
1956 print filemode(tarinfo.mode),
1957 print "%s/%s" % (tarinfo.uname or tarinfo.uid,
1958 tarinfo.gname or tarinfo.gid),
1959 if tarinfo.ischr() or tarinfo.isblk():
1960 print "%10s" % ("%d,%d" \
1961 % (tarinfo.devmajor, tarinfo.devminor)),
1962 else:
1963 print "%10d" % tarinfo.size,
1964 print "%d-%02d-%02d %02d:%02d:%02d" \
1965 % time.localtime(tarinfo.mtime)[:6],
1966
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001967 print tarinfo.name + ("/" if tarinfo.isdir() else ""),
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001968
1969 if verbose:
1970 if tarinfo.issym():
1971 print "->", tarinfo.linkname,
1972 if tarinfo.islnk():
1973 print "link to", tarinfo.linkname,
1974 print
1975
Lars Gustäbel21121e62009-09-12 10:28:15 +00001976 def add(self, name, arcname=None, recursive=True, exclude=None, filter=None):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001977 """Add the file `name' to the archive. `name' may be any type of file
1978 (directory, fifo, symbolic link, etc.). If given, `arcname'
1979 specifies an alternative name for the file in the archive.
1980 Directories are added recursively by default. This can be avoided by
Lars Gustäbel104490e2007-06-18 11:42:11 +00001981 setting `recursive' to False. `exclude' is a function that should
Lars Gustäbel21121e62009-09-12 10:28:15 +00001982 return True for each filename to be excluded. `filter' is a function
1983 that expects a TarInfo object argument and returns the changed
1984 TarInfo object, if it returns None the TarInfo object will be
1985 excluded from the archive.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001986 """
1987 self._check("aw")
1988
1989 if arcname is None:
1990 arcname = name
1991
Lars Gustäbel104490e2007-06-18 11:42:11 +00001992 # Exclude pathnames.
Lars Gustäbel21121e62009-09-12 10:28:15 +00001993 if exclude is not None:
1994 import warnings
1995 warnings.warn("use the filter argument instead",
1996 DeprecationWarning, 2)
1997 if exclude(name):
1998 self._dbg(2, "tarfile: Excluded %r" % name)
1999 return
Lars Gustäbel104490e2007-06-18 11:42:11 +00002000
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002001 # Skip if somebody tries to archive the archive...
Lars Gustäbela4b23812006-12-23 17:57:23 +00002002 if self.name is not None and os.path.abspath(name) == self.name:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002003 self._dbg(2, "tarfile: Skipped %r" % name)
2004 return
2005
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002006 self._dbg(1, name)
2007
2008 # Create a TarInfo object from the file.
2009 tarinfo = self.gettarinfo(name, arcname)
2010
2011 if tarinfo is None:
2012 self._dbg(1, "tarfile: Unsupported type %r" % name)
2013 return
2014
Lars Gustäbel21121e62009-09-12 10:28:15 +00002015 # Change or exclude the TarInfo object.
2016 if filter is not None:
2017 tarinfo = filter(tarinfo)
2018 if tarinfo is None:
2019 self._dbg(2, "tarfile: Excluded %r" % name)
2020 return
2021
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002022 # Append the tar header and data to the archive.
2023 if tarinfo.isreg():
Andrew Svetlovac26a2e2012-11-29 14:22:26 +02002024 with bltn_open(name, "rb") as f:
2025 self.addfile(tarinfo, f)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002026
Martin v. Löwis5dbdc592005-08-27 10:07:56 +00002027 elif tarinfo.isdir():
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002028 self.addfile(tarinfo)
2029 if recursive:
2030 for f in os.listdir(name):
Lars Gustäbel21121e62009-09-12 10:28:15 +00002031 self.add(os.path.join(name, f), os.path.join(arcname, f),
2032 recursive, exclude, filter)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002033
Martin v. Löwis5dbdc592005-08-27 10:07:56 +00002034 else:
2035 self.addfile(tarinfo)
2036
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002037 def addfile(self, tarinfo, fileobj=None):
2038 """Add the TarInfo object `tarinfo' to the archive. If `fileobj' is
2039 given, tarinfo.size bytes are read from it and added to the archive.
Martin Panter59b9a162016-02-19 23:34:56 +00002040 You can create TarInfo objects directly, or by using gettarinfo().
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002041 On Windows platforms, `fileobj' should always be opened with mode
2042 'rb' to avoid irritation about the file size.
2043 """
2044 self._check("aw")
2045
Georg Brandl3354f282006-10-29 09:16:12 +00002046 tarinfo = copy.copy(tarinfo)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002047
Lars Gustäbela0fcb932007-05-27 19:49:30 +00002048 buf = tarinfo.tobuf(self.format, self.encoding, self.errors)
Georg Brandl3354f282006-10-29 09:16:12 +00002049 self.fileobj.write(buf)
2050 self.offset += len(buf)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002051
2052 # If there's data to follow, append it.
2053 if fileobj is not None:
2054 copyfileobj(fileobj, self.fileobj, tarinfo.size)
2055 blocks, remainder = divmod(tarinfo.size, BLOCKSIZE)
2056 if remainder > 0:
2057 self.fileobj.write(NUL * (BLOCKSIZE - remainder))
2058 blocks += 1
2059 self.offset += blocks * BLOCKSIZE
2060
Martin v. Löwisf3c56112004-09-18 09:08:52 +00002061 self.members.append(tarinfo)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002062
Martin v. Löwis00a73e72005-03-04 19:40:34 +00002063 def extractall(self, path=".", members=None):
2064 """Extract all members from the archive to the current working
2065 directory and set owner, modification time and permissions on
2066 directories afterwards. `path' specifies a different directory
2067 to extract to. `members' is optional and must be a subset of the
2068 list returned by getmembers().
2069 """
2070 directories = []
2071
2072 if members is None:
2073 members = self
2074
2075 for tarinfo in members:
2076 if tarinfo.isdir():
Lars Gustäbel0192e432008-02-05 11:51:40 +00002077 # Extract directories with a safe mode.
Martin v. Löwis00a73e72005-03-04 19:40:34 +00002078 directories.append(tarinfo)
Lars Gustäbel0192e432008-02-05 11:51:40 +00002079 tarinfo = copy.copy(tarinfo)
2080 tarinfo.mode = 0700
2081 self.extract(tarinfo, path)
Martin v. Löwis00a73e72005-03-04 19:40:34 +00002082
2083 # Reverse sort directories.
Brett Cannon132fc542008-08-04 21:23:07 +00002084 directories.sort(key=operator.attrgetter('name'))
Martin v. Löwis00a73e72005-03-04 19:40:34 +00002085 directories.reverse()
2086
2087 # Set correct owner, mtime and filemode on directories.
2088 for tarinfo in directories:
Lars Gustäbel2ee1c762008-01-04 14:00:33 +00002089 dirpath = os.path.join(path, tarinfo.name)
Martin v. Löwis00a73e72005-03-04 19:40:34 +00002090 try:
Lars Gustäbel2ee1c762008-01-04 14:00:33 +00002091 self.chown(tarinfo, dirpath)
2092 self.utime(tarinfo, dirpath)
2093 self.chmod(tarinfo, dirpath)
Martin v. Löwis00a73e72005-03-04 19:40:34 +00002094 except ExtractError, e:
2095 if self.errorlevel > 1:
2096 raise
2097 else:
2098 self._dbg(1, "tarfile: %s" % e)
2099
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002100 def extract(self, member, path=""):
2101 """Extract a member from the archive to the current working directory,
2102 using its full name. Its file information is extracted as accurately
2103 as possible. `member' may be a filename or a TarInfo object. You can
2104 specify a different directory using `path'.
2105 """
2106 self._check("r")
2107
Lars Gustäbelc64e4022007-03-13 10:47:19 +00002108 if isinstance(member, basestring):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002109 tarinfo = self.getmember(member)
Lars Gustäbelc64e4022007-03-13 10:47:19 +00002110 else:
2111 tarinfo = member
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002112
Neal Norwitza4f651a2004-07-20 22:07:44 +00002113 # Prepare the link target for makelink().
2114 if tarinfo.islnk():
2115 tarinfo._link_target = os.path.join(path, tarinfo.linkname)
2116
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002117 try:
2118 self._extract_member(tarinfo, os.path.join(path, tarinfo.name))
2119 except EnvironmentError, e:
2120 if self.errorlevel > 0:
2121 raise
2122 else:
2123 if e.filename is None:
2124 self._dbg(1, "tarfile: %s" % e.strerror)
2125 else:
2126 self._dbg(1, "tarfile: %s %r" % (e.strerror, e.filename))
2127 except ExtractError, e:
2128 if self.errorlevel > 1:
2129 raise
2130 else:
2131 self._dbg(1, "tarfile: %s" % e)
2132
2133 def extractfile(self, member):
2134 """Extract a member from the archive as a file object. `member' may be
2135 a filename or a TarInfo object. If `member' is a regular file, a
2136 file-like object is returned. If `member' is a link, a file-like
2137 object is constructed from the link's target. If `member' is none of
2138 the above, None is returned.
2139 The file-like object is read-only and provides the following
2140 methods: read(), readline(), readlines(), seek() and tell()
2141 """
2142 self._check("r")
2143
Lars Gustäbelc64e4022007-03-13 10:47:19 +00002144 if isinstance(member, basestring):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002145 tarinfo = self.getmember(member)
Lars Gustäbelc64e4022007-03-13 10:47:19 +00002146 else:
2147 tarinfo = member
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002148
2149 if tarinfo.isreg():
2150 return self.fileobject(self, tarinfo)
2151
2152 elif tarinfo.type not in SUPPORTED_TYPES:
2153 # If a member's type is unknown, it is treated as a
2154 # regular file.
2155 return self.fileobject(self, tarinfo)
2156
2157 elif tarinfo.islnk() or tarinfo.issym():
2158 if isinstance(self.fileobj, _Stream):
2159 # A small but ugly workaround for the case that someone tries
2160 # to extract a (sym)link as a file-object from a non-seekable
2161 # stream of tar blocks.
Georg Brandle4751e32006-05-18 06:11:19 +00002162 raise StreamError("cannot extract (sym)link as file object")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002163 else:
Georg Brandl7eb4b7d2005-07-22 21:49:32 +00002164 # A (sym)link's file object is its target's file object.
Lars Gustäbel4da7d412010-06-03 12:34:14 +00002165 return self.extractfile(self._find_link_target(tarinfo))
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002166 else:
2167 # If there's no data associated with the member (directory, chrdev,
2168 # blkdev, etc.), return None instead of a file object.
2169 return None
2170
2171 def _extract_member(self, tarinfo, targetpath):
2172 """Extract the TarInfo object tarinfo to a physical
2173 file called targetpath.
2174 """
2175 # Fetch the TarInfo object for the given name
2176 # and build the destination pathname, replacing
2177 # forward slashes to platform specific separators.
Lars Gustäbelf7cda522009-08-28 19:23:44 +00002178 targetpath = targetpath.rstrip("/")
2179 targetpath = targetpath.replace("/", os.sep)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002180
2181 # Create all upper directories.
2182 upperdirs = os.path.dirname(targetpath)
2183 if upperdirs and not os.path.exists(upperdirs):
Lars Gustäbel0192e432008-02-05 11:51:40 +00002184 # Create directories that are not part of the archive with
2185 # default permissions.
Lars Gustäbeld2e22902007-01-23 11:17:33 +00002186 os.makedirs(upperdirs)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002187
2188 if tarinfo.islnk() or tarinfo.issym():
2189 self._dbg(1, "%s -> %s" % (tarinfo.name, tarinfo.linkname))
2190 else:
2191 self._dbg(1, tarinfo.name)
2192
2193 if tarinfo.isreg():
2194 self.makefile(tarinfo, targetpath)
2195 elif tarinfo.isdir():
2196 self.makedir(tarinfo, targetpath)
2197 elif tarinfo.isfifo():
2198 self.makefifo(tarinfo, targetpath)
2199 elif tarinfo.ischr() or tarinfo.isblk():
2200 self.makedev(tarinfo, targetpath)
2201 elif tarinfo.islnk() or tarinfo.issym():
2202 self.makelink(tarinfo, targetpath)
2203 elif tarinfo.type not in SUPPORTED_TYPES:
2204 self.makeunknown(tarinfo, targetpath)
2205 else:
2206 self.makefile(tarinfo, targetpath)
2207
2208 self.chown(tarinfo, targetpath)
2209 if not tarinfo.issym():
2210 self.chmod(tarinfo, targetpath)
2211 self.utime(tarinfo, targetpath)
2212
2213 #--------------------------------------------------------------------------
2214 # Below are the different file methods. They are called via
2215 # _extract_member() when extract() is called. They can be replaced in a
2216 # subclass to implement other functionality.
2217
2218 def makedir(self, tarinfo, targetpath):
2219 """Make a directory called targetpath.
2220 """
2221 try:
Lars Gustäbel0192e432008-02-05 11:51:40 +00002222 # Use a safe mode for the directory, the real mode is set
2223 # later in _extract_member().
2224 os.mkdir(targetpath, 0700)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002225 except EnvironmentError, e:
2226 if e.errno != errno.EEXIST:
2227 raise
2228
2229 def makefile(self, tarinfo, targetpath):
2230 """Make a file called targetpath.
2231 """
2232 source = self.extractfile(tarinfo)
Andrew Svetlovac26a2e2012-11-29 14:22:26 +02002233 try:
2234 with bltn_open(targetpath, "wb") as target:
2235 copyfileobj(source, target)
2236 finally:
2237 source.close()
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002238
2239 def makeunknown(self, tarinfo, targetpath):
2240 """Make a file from a TarInfo object with an unknown type
2241 at targetpath.
2242 """
2243 self.makefile(tarinfo, targetpath)
2244 self._dbg(1, "tarfile: Unknown file type %r, " \
2245 "extracted as regular file." % tarinfo.type)
2246
2247 def makefifo(self, tarinfo, targetpath):
2248 """Make a fifo called targetpath.
2249 """
2250 if hasattr(os, "mkfifo"):
2251 os.mkfifo(targetpath)
2252 else:
Georg Brandle4751e32006-05-18 06:11:19 +00002253 raise ExtractError("fifo not supported by system")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002254
2255 def makedev(self, tarinfo, targetpath):
2256 """Make a character or block device called targetpath.
2257 """
2258 if not hasattr(os, "mknod") or not hasattr(os, "makedev"):
Georg Brandle4751e32006-05-18 06:11:19 +00002259 raise ExtractError("special devices not supported by system")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002260
2261 mode = tarinfo.mode
2262 if tarinfo.isblk():
2263 mode |= stat.S_IFBLK
2264 else:
2265 mode |= stat.S_IFCHR
2266
2267 os.mknod(targetpath, mode,
2268 os.makedev(tarinfo.devmajor, tarinfo.devminor))
2269
2270 def makelink(self, tarinfo, targetpath):
2271 """Make a (symbolic) link called targetpath. If it cannot be created
2272 (platform limitation), we try to make a copy of the referenced file
2273 instead of a link.
2274 """
Lars Gustäbel4da7d412010-06-03 12:34:14 +00002275 if hasattr(os, "symlink") and hasattr(os, "link"):
2276 # For systems that support symbolic and hard links.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002277 if tarinfo.issym():
Senthil Kumaran4dd89ce2011-05-17 10:12:18 +08002278 if os.path.lexists(targetpath):
Senthil Kumaran011525e2011-04-28 15:30:31 +08002279 os.unlink(targetpath)
Lars Gustäbelf7cda522009-08-28 19:23:44 +00002280 os.symlink(tarinfo.linkname, targetpath)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002281 else:
Neal Norwitza4f651a2004-07-20 22:07:44 +00002282 # See extract().
Lars Gustäbel4da7d412010-06-03 12:34:14 +00002283 if os.path.exists(tarinfo._link_target):
Senthil Kumaran4dd89ce2011-05-17 10:12:18 +08002284 if os.path.lexists(targetpath):
2285 os.unlink(targetpath)
Lars Gustäbel4da7d412010-06-03 12:34:14 +00002286 os.link(tarinfo._link_target, targetpath)
2287 else:
2288 self._extract_member(self._find_link_target(tarinfo), targetpath)
2289 else:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002290 try:
Lars Gustäbel4da7d412010-06-03 12:34:14 +00002291 self._extract_member(self._find_link_target(tarinfo), targetpath)
2292 except KeyError:
2293 raise ExtractError("unable to resolve link inside archive")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002294
2295 def chown(self, tarinfo, targetpath):
2296 """Set owner of targetpath according to tarinfo.
2297 """
2298 if pwd and hasattr(os, "geteuid") and os.geteuid() == 0:
2299 # We have to be root to do so.
2300 try:
2301 g = grp.getgrnam(tarinfo.gname)[2]
2302 except KeyError:
Lars Gustäbel8babfdf2011-09-05 17:04:18 +02002303 g = tarinfo.gid
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002304 try:
2305 u = pwd.getpwnam(tarinfo.uname)[2]
2306 except KeyError:
Lars Gustäbel8babfdf2011-09-05 17:04:18 +02002307 u = tarinfo.uid
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002308 try:
2309 if tarinfo.issym() and hasattr(os, "lchown"):
2310 os.lchown(targetpath, u, g)
2311 else:
Andrew MacIntyre7970d202003-02-19 12:51:34 +00002312 if sys.platform != "os2emx":
2313 os.chown(targetpath, u, g)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002314 except EnvironmentError, e:
Georg Brandle4751e32006-05-18 06:11:19 +00002315 raise ExtractError("could not change owner")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002316
2317 def chmod(self, tarinfo, targetpath):
2318 """Set file permissions of targetpath according to tarinfo.
2319 """
Jack Jansen834eff62003-03-07 12:47:06 +00002320 if hasattr(os, 'chmod'):
2321 try:
2322 os.chmod(targetpath, tarinfo.mode)
2323 except EnvironmentError, e:
Georg Brandle4751e32006-05-18 06:11:19 +00002324 raise ExtractError("could not change mode")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002325
2326 def utime(self, tarinfo, targetpath):
2327 """Set modification time of targetpath according to tarinfo.
2328 """
Jack Jansen834eff62003-03-07 12:47:06 +00002329 if not hasattr(os, 'utime'):
Tim Petersf9347782003-03-07 15:36:41 +00002330 return
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002331 try:
2332 os.utime(targetpath, (tarinfo.mtime, tarinfo.mtime))
2333 except EnvironmentError, e:
Georg Brandle4751e32006-05-18 06:11:19 +00002334 raise ExtractError("could not change modification time")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002335
2336 #--------------------------------------------------------------------------
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002337 def next(self):
2338 """Return the next member of the archive as a TarInfo object, when
2339 TarFile is opened for reading. Return None if there is no more
2340 available.
2341 """
2342 self._check("ra")
2343 if self.firstmember is not None:
2344 m = self.firstmember
2345 self.firstmember = None
2346 return m
2347
Lars Gustäbel518602a2015-07-06 09:23:04 +02002348 # Advance the file pointer.
2349 if self.offset != self.fileobj.tell():
2350 self.fileobj.seek(self.offset - 1)
2351 if not self.fileobj.read(1):
2352 raise ReadError("unexpected end of data")
2353
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002354 # Read the next block.
Lars Gustäbeldd866d52009-11-22 18:30:53 +00002355 tarinfo = None
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002356 while True:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002357 try:
Lars Gustäbelc64e4022007-03-13 10:47:19 +00002358 tarinfo = self.tarinfo.fromtarfile(self)
Lars Gustäbeldd866d52009-11-22 18:30:53 +00002359 except EOFHeaderError, e:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002360 if self.ignore_zeros:
Georg Brandlebbeed72006-12-19 22:06:46 +00002361 self._dbg(2, "0x%X: %s" % (self.offset, e))
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002362 self.offset += BLOCKSIZE
2363 continue
Lars Gustäbeldd866d52009-11-22 18:30:53 +00002364 except InvalidHeaderError, e:
2365 if self.ignore_zeros:
2366 self._dbg(2, "0x%X: %s" % (self.offset, e))
2367 self.offset += BLOCKSIZE
2368 continue
2369 elif self.offset == 0:
2370 raise ReadError(str(e))
2371 except EmptyHeaderError:
2372 if self.offset == 0:
2373 raise ReadError("empty file")
2374 except TruncatedHeaderError, e:
2375 if self.offset == 0:
2376 raise ReadError(str(e))
2377 except SubsequentHeaderError, e:
2378 raise ReadError(str(e))
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002379 break
2380
Lars Gustäbeldd866d52009-11-22 18:30:53 +00002381 if tarinfo is not None:
2382 self.members.append(tarinfo)
2383 else:
2384 self._loaded = True
2385
Georg Brandl38c6a222006-05-10 16:26:03 +00002386 return tarinfo
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002387
2388 #--------------------------------------------------------------------------
2389 # Little helper methods:
2390
Lars Gustäbel4da7d412010-06-03 12:34:14 +00002391 def _getmember(self, name, tarinfo=None, normalize=False):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002392 """Find an archive member by name from bottom to top.
2393 If tarinfo is given, it is used as the starting point.
2394 """
Martin v. Löwisf3c56112004-09-18 09:08:52 +00002395 # Ensure that all members have been loaded.
2396 members = self.getmembers()
2397
Lars Gustäbel4da7d412010-06-03 12:34:14 +00002398 # Limit the member search list up to tarinfo.
2399 if tarinfo is not None:
2400 members = members[:members.index(tarinfo)]
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002401
Lars Gustäbel4da7d412010-06-03 12:34:14 +00002402 if normalize:
2403 name = os.path.normpath(name)
2404
2405 for member in reversed(members):
2406 if normalize:
2407 member_name = os.path.normpath(member.name)
2408 else:
2409 member_name = member.name
2410
2411 if name == member_name:
2412 return member
Andrew M. Kuchling864bba12004-07-10 22:02:11 +00002413
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002414 def _load(self):
2415 """Read through the entire archive file and look for readable
2416 members.
2417 """
2418 while True:
2419 tarinfo = self.next()
2420 if tarinfo is None:
2421 break
2422 self._loaded = True
2423
2424 def _check(self, mode=None):
2425 """Check if TarFile is still open, and if the operation's mode
2426 corresponds to TarFile's mode.
2427 """
2428 if self.closed:
Georg Brandle4751e32006-05-18 06:11:19 +00002429 raise IOError("%s is closed" % self.__class__.__name__)
Lars Gustäbelc64e4022007-03-13 10:47:19 +00002430 if mode is not None and self.mode not in mode:
2431 raise IOError("bad operation for mode %r" % self.mode)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002432
Lars Gustäbel4da7d412010-06-03 12:34:14 +00002433 def _find_link_target(self, tarinfo):
2434 """Find the target member of a symlink or hardlink member in the
2435 archive.
2436 """
2437 if tarinfo.issym():
2438 # Always search the entire archive.
Lars Gustäbel231d4742012-04-24 22:42:08 +02002439 linkname = "/".join(filter(None, (os.path.dirname(tarinfo.name), tarinfo.linkname)))
Lars Gustäbel4da7d412010-06-03 12:34:14 +00002440 limit = None
2441 else:
2442 # Search the archive before the link, because a hard link is
2443 # just a reference to an already archived file.
2444 linkname = tarinfo.linkname
2445 limit = tarinfo
2446
2447 member = self._getmember(linkname, tarinfo=limit, normalize=True)
2448 if member is None:
2449 raise KeyError("linkname %r not found" % linkname)
2450 return member
2451
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002452 def __iter__(self):
2453 """Provide an iterator object.
2454 """
2455 if self._loaded:
2456 return iter(self.members)
2457 else:
2458 return TarIter(self)
2459
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002460 def _dbg(self, level, msg):
2461 """Write debugging output to sys.stderr.
2462 """
2463 if level <= self.debug:
2464 print >> sys.stderr, msg
Lars Gustäbel64581042010-03-03 11:55:48 +00002465
2466 def __enter__(self):
2467 self._check()
2468 return self
2469
2470 def __exit__(self, type, value, traceback):
2471 if type is None:
2472 self.close()
2473 else:
2474 # An exception occurred. We must not call close() because
2475 # it would try to write end-of-archive blocks and padding.
2476 if not self._extfileobj:
2477 self.fileobj.close()
2478 self.closed = True
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002479# class TarFile
2480
2481class TarIter:
2482 """Iterator Class.
2483
2484 for tarinfo in TarFile(...):
2485 suite...
2486 """
2487
2488 def __init__(self, tarfile):
2489 """Construct a TarIter object.
2490 """
2491 self.tarfile = tarfile
Martin v. Löwis637431b2005-03-03 23:12:42 +00002492 self.index = 0
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002493 def __iter__(self):
2494 """Return iterator object.
2495 """
2496 return self
2497 def next(self):
2498 """Return the next item using TarFile's next() method.
2499 When all members have been read, set TarFile as _loaded.
2500 """
Martin v. Löwis637431b2005-03-03 23:12:42 +00002501 # Fix for SF #1100429: Under rare circumstances it can
2502 # happen that getmembers() is called during iteration,
2503 # which will cause TarIter to stop prematurely.
Serhiy Storchakace34ba62013-05-09 14:22:05 +03002504
2505 if self.index == 0 and self.tarfile.firstmember is not None:
2506 tarinfo = self.tarfile.next()
2507 elif self.index < len(self.tarfile.members):
2508 tarinfo = self.tarfile.members[self.index]
2509 elif not self.tarfile._loaded:
Martin v. Löwis637431b2005-03-03 23:12:42 +00002510 tarinfo = self.tarfile.next()
2511 if not tarinfo:
2512 self.tarfile._loaded = True
2513 raise StopIteration
2514 else:
Serhiy Storchakace34ba62013-05-09 14:22:05 +03002515 raise StopIteration
Martin v. Löwis637431b2005-03-03 23:12:42 +00002516 self.index += 1
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002517 return tarinfo
2518
2519# Helper classes for sparse file support
2520class _section:
2521 """Base class for _data and _hole.
2522 """
2523 def __init__(self, offset, size):
2524 self.offset = offset
2525 self.size = size
2526 def __contains__(self, offset):
2527 return self.offset <= offset < self.offset + self.size
2528
2529class _data(_section):
2530 """Represent a data section in a sparse file.
2531 """
2532 def __init__(self, offset, size, realpos):
2533 _section.__init__(self, offset, size)
2534 self.realpos = realpos
2535
2536class _hole(_section):
2537 """Represent a hole section in a sparse file.
2538 """
2539 pass
2540
2541class _ringbuffer(list):
2542 """Ringbuffer class which increases performance
2543 over a regular list.
2544 """
2545 def __init__(self):
2546 self.idx = 0
2547 def find(self, offset):
2548 idx = self.idx
2549 while True:
2550 item = self[idx]
2551 if offset in item:
2552 break
2553 idx += 1
2554 if idx == len(self):
2555 idx = 0
2556 if idx == self.idx:
2557 # End of File
2558 return None
2559 self.idx = idx
2560 return item
2561
2562#---------------------------------------------
2563# zipfile compatible TarFile class
2564#---------------------------------------------
2565TAR_PLAIN = 0 # zipfile.ZIP_STORED
2566TAR_GZIPPED = 8 # zipfile.ZIP_DEFLATED
2567class TarFileCompat:
2568 """TarFile class compatible with standard module zipfile's
2569 ZipFile class.
2570 """
2571 def __init__(self, file, mode="r", compression=TAR_PLAIN):
Lars Gustäbel727bd0b2008-08-02 11:26:39 +00002572 from warnings import warnpy3k
2573 warnpy3k("the TarFileCompat class has been removed in Python 3.0",
2574 stacklevel=2)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002575 if compression == TAR_PLAIN:
2576 self.tarfile = TarFile.taropen(file, mode)
2577 elif compression == TAR_GZIPPED:
2578 self.tarfile = TarFile.gzopen(file, mode)
2579 else:
Georg Brandle4751e32006-05-18 06:11:19 +00002580 raise ValueError("unknown compression constant")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002581 if mode[0:1] == "r":
2582 members = self.tarfile.getmembers()
Raymond Hettingera1d09e22005-09-11 16:34:05 +00002583 for m in members:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002584 m.filename = m.name
2585 m.file_size = m.size
2586 m.date_time = time.gmtime(m.mtime)[:6]
2587 def namelist(self):
2588 return map(lambda m: m.name, self.infolist())
2589 def infolist(self):
2590 return filter(lambda m: m.type in REGULAR_TYPES,
2591 self.tarfile.getmembers())
2592 def printdir(self):
2593 self.tarfile.list()
2594 def testzip(self):
2595 return
2596 def getinfo(self, name):
2597 return self.tarfile.getmember(name)
2598 def read(self, name):
2599 return self.tarfile.extractfile(self.tarfile.getmember(name)).read()
2600 def write(self, filename, arcname=None, compress_type=None):
2601 self.tarfile.add(filename, arcname)
2602 def writestr(self, zinfo, bytes):
Raymond Hettingera6172712004-12-31 19:15:26 +00002603 try:
2604 from cStringIO import StringIO
2605 except ImportError:
2606 from StringIO import StringIO
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002607 import calendar
Lars Gustäbel727bd0b2008-08-02 11:26:39 +00002608 tinfo = TarInfo(zinfo.filename)
2609 tinfo.size = len(bytes)
2610 tinfo.mtime = calendar.timegm(zinfo.date_time)
2611 self.tarfile.addfile(tinfo, StringIO(bytes))
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002612 def close(self):
2613 self.tarfile.close()
2614#class TarFileCompat
2615
2616#--------------------
2617# exported functions
2618#--------------------
2619def is_tarfile(name):
2620 """Return True if name points to a tar archive that we
2621 are able to handle, else return False.
2622 """
2623 try:
2624 t = open(name)
2625 t.close()
2626 return True
2627 except TarError:
2628 return False
2629
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002630open = TarFile.open