blob: b6dc3ee4b39549c9094fdfd7779f6986c65bad22 [file] [log] [blame]
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001#!/usr/bin/env python
2# -*- coding: iso-8859-1 -*-
3#-------------------------------------------------------------------
4# tarfile.py
5#-------------------------------------------------------------------
6# Copyright (C) 2002 Lars Gustäbel <lars@gustaebel.de>
7# All rights reserved.
8#
9# Permission is hereby granted, free of charge, to any person
10# obtaining a copy of this software and associated documentation
11# files (the "Software"), to deal in the Software without
12# restriction, including without limitation the rights to use,
13# copy, modify, merge, publish, distribute, sublicense, and/or sell
14# copies of the Software, and to permit persons to whom the
15# Software is furnished to do so, subject to the following
16# conditions:
17#
18# The above copyright notice and this permission notice shall be
19# included in all copies or substantial portions of the Software.
20#
21# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
22# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
23# OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
24# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
25# HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
26# WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
27# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
28# OTHER DEALINGS IN THE SOFTWARE.
29#
30"""Read from and write to tar format archives.
31"""
32
33__version__ = "$Revision$"
34# $Source$
35
Lars Gustäbelc64e4022007-03-13 10:47:19 +000036version = "0.9.0"
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +000037__author__ = "Lars Gustäbel (lars@gustaebel.de)"
38__date__ = "$Date$"
39__cvsid__ = "$Id$"
40__credits__ = "Gustavo Niemeyer, Niels Gustäbel, Richard Townsend."
41
42#---------
43# Imports
44#---------
45import sys
46import os
47import shutil
48import stat
49import errno
50import time
51import struct
Georg Brandl3354f282006-10-29 09:16:12 +000052import copy
Lars Gustäbelc64e4022007-03-13 10:47:19 +000053import re
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +000054
Jack Jansencfc49022003-03-07 13:37:32 +000055if sys.platform == 'mac':
56 # This module needs work for MacOS9, especially in the area of pathname
57 # handling. In many places it is assumed a simple substitution of / by the
58 # local os.path.sep is good enough to convert pathnames, but this does not
59 # work with the mac rooted:path:name versus :nonrooted:path:name syntax
60 raise ImportError, "tarfile does not work for platform==mac"
61
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +000062try:
63 import grp, pwd
64except ImportError:
65 grp = pwd = None
66
67# from tarfile import *
68__all__ = ["TarFile", "TarInfo", "is_tarfile", "TarError"]
69
70#---------------------------------------------------------
71# tar constants
72#---------------------------------------------------------
Lars Gustäbelc64e4022007-03-13 10:47:19 +000073NUL = "\0" # the null character
74BLOCKSIZE = 512 # length of processing blocks
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +000075RECORDSIZE = BLOCKSIZE * 20 # length of records
Lars Gustäbelc64e4022007-03-13 10:47:19 +000076GNU_MAGIC = "ustar \0" # magic gnu tar string
77POSIX_MAGIC = "ustar\x0000" # magic posix tar string
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +000078
Lars Gustäbelc64e4022007-03-13 10:47:19 +000079LENGTH_NAME = 100 # maximum length of a filename
80LENGTH_LINK = 100 # maximum length of a linkname
81LENGTH_PREFIX = 155 # maximum length of the prefix field
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +000082
Lars Gustäbelc64e4022007-03-13 10:47:19 +000083REGTYPE = "0" # regular file
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +000084AREGTYPE = "\0" # regular file
Lars Gustäbelc64e4022007-03-13 10:47:19 +000085LNKTYPE = "1" # link (inside tarfile)
86SYMTYPE = "2" # symbolic link
87CHRTYPE = "3" # character special device
88BLKTYPE = "4" # block special device
89DIRTYPE = "5" # directory
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +000090FIFOTYPE = "6" # fifo special device
91CONTTYPE = "7" # contiguous file
92
Lars Gustäbelc64e4022007-03-13 10:47:19 +000093GNUTYPE_LONGNAME = "L" # GNU tar longname
94GNUTYPE_LONGLINK = "K" # GNU tar longlink
95GNUTYPE_SPARSE = "S" # GNU tar sparse file
96
97XHDTYPE = "x" # POSIX.1-2001 extended header
98XGLTYPE = "g" # POSIX.1-2001 global header
99SOLARIS_XHDTYPE = "X" # Solaris extended header
100
101USTAR_FORMAT = 0 # POSIX.1-1988 (ustar) format
102GNU_FORMAT = 1 # GNU tar format
103PAX_FORMAT = 2 # POSIX.1-2001 (pax) format
104DEFAULT_FORMAT = GNU_FORMAT
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000105
106#---------------------------------------------------------
107# tarfile constants
108#---------------------------------------------------------
Lars Gustäbelc64e4022007-03-13 10:47:19 +0000109# File types that tarfile supports:
110SUPPORTED_TYPES = (REGTYPE, AREGTYPE, LNKTYPE,
111 SYMTYPE, DIRTYPE, FIFOTYPE,
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000112 CONTTYPE, CHRTYPE, BLKTYPE,
113 GNUTYPE_LONGNAME, GNUTYPE_LONGLINK,
114 GNUTYPE_SPARSE)
115
Lars Gustäbelc64e4022007-03-13 10:47:19 +0000116# File types that will be treated as a regular file.
117REGULAR_TYPES = (REGTYPE, AREGTYPE,
118 CONTTYPE, GNUTYPE_SPARSE)
119
120# File types that are part of the GNU tar format.
121GNU_TYPES = (GNUTYPE_LONGNAME, GNUTYPE_LONGLINK,
122 GNUTYPE_SPARSE)
123
124# Fields from a pax header that override a TarInfo attribute.
125PAX_FIELDS = ("path", "linkpath", "size", "mtime",
126 "uid", "gid", "uname", "gname")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000127
128#---------------------------------------------------------
129# Bits used in the mode field, values in octal.
130#---------------------------------------------------------
131S_IFLNK = 0120000 # symbolic link
132S_IFREG = 0100000 # regular file
133S_IFBLK = 0060000 # block device
134S_IFDIR = 0040000 # directory
135S_IFCHR = 0020000 # character device
136S_IFIFO = 0010000 # fifo
137
138TSUID = 04000 # set UID on execution
139TSGID = 02000 # set GID on execution
140TSVTX = 01000 # reserved
141
142TUREAD = 0400 # read by owner
143TUWRITE = 0200 # write by owner
144TUEXEC = 0100 # execute/search by owner
145TGREAD = 0040 # read by group
146TGWRITE = 0020 # write by group
147TGEXEC = 0010 # execute/search by group
148TOREAD = 0004 # read by other
149TOWRITE = 0002 # write by other
150TOEXEC = 0001 # execute/search by other
151
152#---------------------------------------------------------
Lars Gustäbelc64e4022007-03-13 10:47:19 +0000153# initialization
154#---------------------------------------------------------
155ENCODING = sys.getfilesystemencoding()
156if ENCODING is None:
157 ENCODING = "ascii"
158
159#---------------------------------------------------------
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000160# Some useful functions
161#---------------------------------------------------------
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000162
Georg Brandl38c6a222006-05-10 16:26:03 +0000163def stn(s, length):
164 """Convert a python string to a null-terminated string buffer.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000165 """
Georg Brandla32e0a02006-10-24 16:54:16 +0000166 return s[:length] + (length - len(s)) * NUL
Georg Brandl38c6a222006-05-10 16:26:03 +0000167
Lars Gustäbelc64e4022007-03-13 10:47:19 +0000168def nts(s):
169 """Convert a null-terminated string field to a python string.
170 """
171 # Use the string up to the first null char.
172 p = s.find("\0")
173 if p == -1:
174 return s
175 return s[:p]
176
Georg Brandl38c6a222006-05-10 16:26:03 +0000177def nti(s):
178 """Convert a number field to a python number.
179 """
180 # There are two possible encodings for a number field, see
181 # itn() below.
182 if s[0] != chr(0200):
Georg Brandlded1c4d2006-12-20 11:55:16 +0000183 try:
Lars Gustäbelc64e4022007-03-13 10:47:19 +0000184 n = int(nts(s) or "0", 8)
Georg Brandlded1c4d2006-12-20 11:55:16 +0000185 except ValueError:
186 raise HeaderError("invalid header")
Georg Brandl38c6a222006-05-10 16:26:03 +0000187 else:
188 n = 0L
189 for i in xrange(len(s) - 1):
190 n <<= 8
191 n += ord(s[i + 1])
192 return n
193
Lars Gustäbelc64e4022007-03-13 10:47:19 +0000194def itn(n, digits=8, format=DEFAULT_FORMAT):
Georg Brandl38c6a222006-05-10 16:26:03 +0000195 """Convert a python number to a number field.
196 """
197 # POSIX 1003.1-1988 requires numbers to be encoded as a string of
198 # octal digits followed by a null-byte, this allows values up to
199 # (8**(digits-1))-1. GNU tar allows storing numbers greater than
200 # that if necessary. A leading 0200 byte indicates this particular
201 # encoding, the following digits-1 bytes are a big-endian
202 # representation. This allows values up to (256**(digits-1))-1.
203 if 0 <= n < 8 ** (digits - 1):
204 s = "%0*o" % (digits - 1, n) + NUL
205 else:
Lars Gustäbelc64e4022007-03-13 10:47:19 +0000206 if format != GNU_FORMAT or n >= 256 ** (digits - 1):
Georg Brandle4751e32006-05-18 06:11:19 +0000207 raise ValueError("overflow in number field")
Georg Brandl38c6a222006-05-10 16:26:03 +0000208
209 if n < 0:
210 # XXX We mimic GNU tar's behaviour with negative numbers,
211 # this could raise OverflowError.
212 n = struct.unpack("L", struct.pack("l", n))[0]
213
214 s = ""
215 for i in xrange(digits - 1):
216 s = chr(n & 0377) + s
217 n >>= 8
218 s = chr(0200) + s
219 return s
220
221def calc_chksums(buf):
222 """Calculate the checksum for a member's header by summing up all
223 characters except for the chksum field which is treated as if
224 it was filled with spaces. According to the GNU tar sources,
225 some tars (Sun and NeXT) calculate chksum with signed char,
226 which will be different if there are chars in the buffer with
227 the high bit set. So we calculate two checksums, unsigned and
228 signed.
229 """
230 unsigned_chksum = 256 + sum(struct.unpack("148B", buf[:148]) + struct.unpack("356B", buf[156:512]))
231 signed_chksum = 256 + sum(struct.unpack("148b", buf[:148]) + struct.unpack("356b", buf[156:512]))
232 return unsigned_chksum, signed_chksum
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000233
234def copyfileobj(src, dst, length=None):
235 """Copy length bytes from fileobj src to fileobj dst.
236 If length is None, copy the entire content.
237 """
238 if length == 0:
239 return
240 if length is None:
241 shutil.copyfileobj(src, dst)
242 return
243
244 BUFSIZE = 16 * 1024
245 blocks, remainder = divmod(length, BUFSIZE)
246 for b in xrange(blocks):
247 buf = src.read(BUFSIZE)
248 if len(buf) < BUFSIZE:
Georg Brandle4751e32006-05-18 06:11:19 +0000249 raise IOError("end of file reached")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000250 dst.write(buf)
251
252 if remainder != 0:
253 buf = src.read(remainder)
254 if len(buf) < remainder:
Georg Brandle4751e32006-05-18 06:11:19 +0000255 raise IOError("end of file reached")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000256 dst.write(buf)
257 return
258
259filemode_table = (
Andrew M. Kuchling8bc462f2004-10-20 11:48:42 +0000260 ((S_IFLNK, "l"),
261 (S_IFREG, "-"),
262 (S_IFBLK, "b"),
263 (S_IFDIR, "d"),
264 (S_IFCHR, "c"),
265 (S_IFIFO, "p")),
266
267 ((TUREAD, "r"),),
268 ((TUWRITE, "w"),),
269 ((TUEXEC|TSUID, "s"),
270 (TSUID, "S"),
271 (TUEXEC, "x")),
272
273 ((TGREAD, "r"),),
274 ((TGWRITE, "w"),),
275 ((TGEXEC|TSGID, "s"),
276 (TSGID, "S"),
277 (TGEXEC, "x")),
278
279 ((TOREAD, "r"),),
280 ((TOWRITE, "w"),),
281 ((TOEXEC|TSVTX, "t"),
282 (TSVTX, "T"),
283 (TOEXEC, "x"))
284)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000285
286def filemode(mode):
287 """Convert a file's mode to a string of the form
288 -rwxrwxrwx.
289 Used by TarFile.list()
290 """
Andrew M. Kuchling8bc462f2004-10-20 11:48:42 +0000291 perm = []
292 for table in filemode_table:
293 for bit, char in table:
294 if mode & bit == bit:
295 perm.append(char)
296 break
297 else:
298 perm.append("-")
299 return "".join(perm)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000300
301if os.sep != "/":
302 normpath = lambda path: os.path.normpath(path).replace(os.sep, "/")
303else:
304 normpath = os.path.normpath
305
306class TarError(Exception):
307 """Base exception."""
308 pass
309class ExtractError(TarError):
310 """General exception for extract errors."""
311 pass
312class ReadError(TarError):
313 """Exception for unreadble tar archives."""
314 pass
315class CompressionError(TarError):
316 """Exception for unavailable compression methods."""
317 pass
318class StreamError(TarError):
319 """Exception for unsupported operations on stream-like TarFiles."""
320 pass
Georg Brandlebbeed72006-12-19 22:06:46 +0000321class HeaderError(TarError):
322 """Exception for invalid headers."""
323 pass
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000324
325#---------------------------
326# internal stream interface
327#---------------------------
328class _LowLevelFile:
329 """Low-level file object. Supports reading and writing.
330 It is used instead of a regular file object for streaming
331 access.
332 """
333
334 def __init__(self, name, mode):
335 mode = {
336 "r": os.O_RDONLY,
337 "w": os.O_WRONLY | os.O_CREAT | os.O_TRUNC,
338 }[mode]
339 if hasattr(os, "O_BINARY"):
340 mode |= os.O_BINARY
341 self.fd = os.open(name, mode)
342
343 def close(self):
344 os.close(self.fd)
345
346 def read(self, size):
347 return os.read(self.fd, size)
348
349 def write(self, s):
350 os.write(self.fd, s)
351
352class _Stream:
353 """Class that serves as an adapter between TarFile and
354 a stream-like object. The stream-like object only
355 needs to have a read() or write() method and is accessed
356 blockwise. Use of gzip or bzip2 compression is possible.
357 A stream-like object could be for example: sys.stdin,
358 sys.stdout, a socket, a tape device etc.
359
360 _Stream is intended to be used only internally.
361 """
362
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000363 def __init__(self, name, mode, comptype, fileobj, bufsize):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000364 """Construct a _Stream object.
365 """
366 self._extfileobj = True
367 if fileobj is None:
368 fileobj = _LowLevelFile(name, mode)
369 self._extfileobj = False
370
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000371 if comptype == '*':
372 # Enable transparent compression detection for the
373 # stream interface
374 fileobj = _StreamProxy(fileobj)
375 comptype = fileobj.getcomptype()
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000376
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000377 self.name = name or ""
378 self.mode = mode
379 self.comptype = comptype
380 self.fileobj = fileobj
381 self.bufsize = bufsize
382 self.buf = ""
383 self.pos = 0L
384 self.closed = False
385
386 if comptype == "gz":
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000387 try:
388 import zlib
389 except ImportError:
Georg Brandle4751e32006-05-18 06:11:19 +0000390 raise CompressionError("zlib module is not available")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000391 self.zlib = zlib
392 self.crc = zlib.crc32("")
393 if mode == "r":
394 self._init_read_gz()
395 else:
396 self._init_write_gz()
397
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000398 if comptype == "bz2":
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000399 try:
400 import bz2
401 except ImportError:
Georg Brandle4751e32006-05-18 06:11:19 +0000402 raise CompressionError("bz2 module is not available")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000403 if mode == "r":
404 self.dbuf = ""
405 self.cmp = bz2.BZ2Decompressor()
406 else:
407 self.cmp = bz2.BZ2Compressor()
408
409 def __del__(self):
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000410 if hasattr(self, "closed") and not self.closed:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000411 self.close()
412
413 def _init_write_gz(self):
414 """Initialize for writing with gzip compression.
415 """
416 self.cmp = self.zlib.compressobj(9, self.zlib.DEFLATED,
417 -self.zlib.MAX_WBITS,
418 self.zlib.DEF_MEM_LEVEL,
419 0)
420 timestamp = struct.pack("<L", long(time.time()))
421 self.__write("\037\213\010\010%s\002\377" % timestamp)
422 if self.name.endswith(".gz"):
423 self.name = self.name[:-3]
424 self.__write(self.name + NUL)
425
426 def write(self, s):
427 """Write string s to the stream.
428 """
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000429 if self.comptype == "gz":
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000430 self.crc = self.zlib.crc32(s, self.crc)
431 self.pos += len(s)
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000432 if self.comptype != "tar":
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000433 s = self.cmp.compress(s)
434 self.__write(s)
435
436 def __write(self, s):
437 """Write string s to the stream if a whole new block
438 is ready to be written.
439 """
440 self.buf += s
441 while len(self.buf) > self.bufsize:
442 self.fileobj.write(self.buf[:self.bufsize])
443 self.buf = self.buf[self.bufsize:]
444
445 def close(self):
446 """Close the _Stream object. No operation should be
447 done on it afterwards.
448 """
449 if self.closed:
450 return
451
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000452 if self.mode == "w" and self.comptype != "tar":
Martin v. Löwisc234a522004-08-22 21:28:33 +0000453 self.buf += self.cmp.flush()
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000454
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000455 if self.mode == "w" and self.buf:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000456 self.fileobj.write(self.buf)
457 self.buf = ""
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000458 if self.comptype == "gz":
Tim Petersa05f6e22006-08-02 05:20:08 +0000459 # The native zlib crc is an unsigned 32-bit integer, but
460 # the Python wrapper implicitly casts that to a signed C
461 # long. So, on a 32-bit box self.crc may "look negative",
462 # while the same crc on a 64-bit box may "look positive".
463 # To avoid irksome warnings from the `struct` module, force
464 # it to look positive on all boxes.
465 self.fileobj.write(struct.pack("<L", self.crc & 0xffffffffL))
Andrew M. Kuchling10a44492003-10-24 17:38:34 +0000466 self.fileobj.write(struct.pack("<L", self.pos & 0xffffFFFFL))
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000467
468 if not self._extfileobj:
469 self.fileobj.close()
470
471 self.closed = True
472
473 def _init_read_gz(self):
474 """Initialize for reading a gzip compressed fileobj.
475 """
476 self.cmp = self.zlib.decompressobj(-self.zlib.MAX_WBITS)
477 self.dbuf = ""
478
479 # taken from gzip.GzipFile with some alterations
480 if self.__read(2) != "\037\213":
Georg Brandle4751e32006-05-18 06:11:19 +0000481 raise ReadError("not a gzip file")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000482 if self.__read(1) != "\010":
Georg Brandle4751e32006-05-18 06:11:19 +0000483 raise CompressionError("unsupported compression method")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000484
485 flag = ord(self.__read(1))
486 self.__read(6)
487
488 if flag & 4:
489 xlen = ord(self.__read(1)) + 256 * ord(self.__read(1))
490 self.read(xlen)
491 if flag & 8:
492 while True:
493 s = self.__read(1)
494 if not s or s == NUL:
495 break
496 if flag & 16:
497 while True:
498 s = self.__read(1)
499 if not s or s == NUL:
500 break
501 if flag & 2:
502 self.__read(2)
503
504 def tell(self):
505 """Return the stream's file pointer position.
506 """
507 return self.pos
508
509 def seek(self, pos=0):
510 """Set the stream's file pointer to pos. Negative seeking
511 is forbidden.
512 """
513 if pos - self.pos >= 0:
514 blocks, remainder = divmod(pos - self.pos, self.bufsize)
515 for i in xrange(blocks):
516 self.read(self.bufsize)
517 self.read(remainder)
518 else:
Georg Brandle4751e32006-05-18 06:11:19 +0000519 raise StreamError("seeking backwards is not allowed")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000520 return self.pos
521
522 def read(self, size=None):
523 """Return the next size number of bytes from the stream.
524 If size is not defined, return all bytes of the stream
525 up to EOF.
526 """
527 if size is None:
528 t = []
529 while True:
530 buf = self._read(self.bufsize)
531 if not buf:
532 break
533 t.append(buf)
534 buf = "".join(t)
535 else:
536 buf = self._read(size)
537 self.pos += len(buf)
538 return buf
539
540 def _read(self, size):
541 """Return size bytes from the stream.
542 """
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000543 if self.comptype == "tar":
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000544 return self.__read(size)
545
546 c = len(self.dbuf)
547 t = [self.dbuf]
548 while c < size:
549 buf = self.__read(self.bufsize)
550 if not buf:
551 break
Lars Gustäbelc64e4022007-03-13 10:47:19 +0000552 try:
553 buf = self.cmp.decompress(buf)
554 except IOError:
555 raise ReadError("invalid compressed data")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000556 t.append(buf)
557 c += len(buf)
558 t = "".join(t)
559 self.dbuf = t[size:]
560 return t[:size]
561
562 def __read(self, size):
563 """Return size bytes from stream. If internal buffer is empty,
564 read another block from the stream.
565 """
566 c = len(self.buf)
567 t = [self.buf]
568 while c < size:
569 buf = self.fileobj.read(self.bufsize)
570 if not buf:
571 break
572 t.append(buf)
573 c += len(buf)
574 t = "".join(t)
575 self.buf = t[size:]
576 return t[:size]
577# class _Stream
578
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000579class _StreamProxy(object):
580 """Small proxy class that enables transparent compression
581 detection for the Stream interface (mode 'r|*').
582 """
583
584 def __init__(self, fileobj):
585 self.fileobj = fileobj
586 self.buf = self.fileobj.read(BLOCKSIZE)
587
588 def read(self, size):
589 self.read = self.fileobj.read
590 return self.buf
591
592 def getcomptype(self):
593 if self.buf.startswith("\037\213\010"):
594 return "gz"
595 if self.buf.startswith("BZh91"):
596 return "bz2"
597 return "tar"
598
599 def close(self):
600 self.fileobj.close()
601# class StreamProxy
602
Georg Brandl49c8f4c2006-05-15 19:30:35 +0000603class _BZ2Proxy(object):
604 """Small proxy class that enables external file object
605 support for "r:bz2" and "w:bz2" modes. This is actually
606 a workaround for a limitation in bz2 module's BZ2File
607 class which (unlike gzip.GzipFile) has no support for
608 a file object argument.
609 """
610
611 blocksize = 16 * 1024
612
613 def __init__(self, fileobj, mode):
614 self.fileobj = fileobj
615 self.mode = mode
Lars Gustäbelc64e4022007-03-13 10:47:19 +0000616 self.name = getattr(self.fileobj, "name", None)
Georg Brandl49c8f4c2006-05-15 19:30:35 +0000617 self.init()
618
619 def init(self):
620 import bz2
621 self.pos = 0
622 if self.mode == "r":
623 self.bz2obj = bz2.BZ2Decompressor()
624 self.fileobj.seek(0)
625 self.buf = ""
626 else:
627 self.bz2obj = bz2.BZ2Compressor()
628
629 def read(self, size):
630 b = [self.buf]
631 x = len(self.buf)
632 while x < size:
633 try:
634 raw = self.fileobj.read(self.blocksize)
635 data = self.bz2obj.decompress(raw)
636 b.append(data)
637 except EOFError:
638 break
639 x += len(data)
640 self.buf = "".join(b)
641
642 buf = self.buf[:size]
643 self.buf = self.buf[size:]
644 self.pos += len(buf)
645 return buf
646
647 def seek(self, pos):
648 if pos < self.pos:
649 self.init()
650 self.read(pos - self.pos)
651
652 def tell(self):
653 return self.pos
654
655 def write(self, data):
656 self.pos += len(data)
657 raw = self.bz2obj.compress(data)
658 self.fileobj.write(raw)
659
660 def close(self):
661 if self.mode == "w":
662 raw = self.bz2obj.flush()
663 self.fileobj.write(raw)
Georg Brandle8953182006-05-27 14:02:03 +0000664 self.fileobj.close()
Georg Brandl49c8f4c2006-05-15 19:30:35 +0000665# class _BZ2Proxy
666
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000667#------------------------
668# Extraction file object
669#------------------------
Lars Gustäbel6baa5022006-12-23 16:40:13 +0000670class _FileInFile(object):
671 """A thin wrapper around an existing file object that
672 provides a part of its data as an individual file
673 object.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000674 """
675
Lars Gustäbel6baa5022006-12-23 16:40:13 +0000676 def __init__(self, fileobj, offset, size, sparse=None):
677 self.fileobj = fileobj
678 self.offset = offset
679 self.size = size
680 self.sparse = sparse
681 self.position = 0
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000682
Lars Gustäbel6baa5022006-12-23 16:40:13 +0000683 def tell(self):
684 """Return the current file position.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000685 """
Lars Gustäbel6baa5022006-12-23 16:40:13 +0000686 return self.position
687
688 def seek(self, position):
689 """Seek to a position in the file.
690 """
691 self.position = position
692
693 def read(self, size=None):
694 """Read data from the file.
695 """
696 if size is None:
697 size = self.size - self.position
698 else:
699 size = min(size, self.size - self.position)
700
701 if self.sparse is None:
702 return self.readnormal(size)
703 else:
704 return self.readsparse(size)
705
706 def readnormal(self, size):
707 """Read operation for regular files.
708 """
709 self.fileobj.seek(self.offset + self.position)
710 self.position += size
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000711 return self.fileobj.read(size)
712
Lars Gustäbel6baa5022006-12-23 16:40:13 +0000713 def readsparse(self, size):
714 """Read operation for sparse files.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000715 """
Lars Gustäbel6baa5022006-12-23 16:40:13 +0000716 data = []
717 while size > 0:
718 buf = self.readsparsesection(size)
719 if not buf:
720 break
721 size -= len(buf)
722 data.append(buf)
723 return "".join(data)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000724
Lars Gustäbel6baa5022006-12-23 16:40:13 +0000725 def readsparsesection(self, size):
726 """Read a single section of a sparse file.
727 """
728 section = self.sparse.find(self.position)
729
730 if section is None:
731 return ""
732
733 size = min(size, section.offset + section.size - self.position)
734
735 if isinstance(section, _data):
736 realpos = section.realpos + self.position - section.offset
737 self.fileobj.seek(self.offset + realpos)
738 self.position += size
739 return self.fileobj.read(size)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000740 else:
Lars Gustäbel6baa5022006-12-23 16:40:13 +0000741 self.position += size
742 return NUL * size
743#class _FileInFile
744
745
746class ExFileObject(object):
747 """File-like object for reading an archive member.
748 Is returned by TarFile.extractfile().
749 """
750 blocksize = 1024
751
752 def __init__(self, tarfile, tarinfo):
753 self.fileobj = _FileInFile(tarfile.fileobj,
754 tarinfo.offset_data,
755 tarinfo.size,
756 getattr(tarinfo, "sparse", None))
757 self.name = tarinfo.name
758 self.mode = "r"
759 self.closed = False
760 self.size = tarinfo.size
761
762 self.position = 0
763 self.buffer = ""
764
765 def read(self, size=None):
766 """Read at most size bytes from the file. If size is not
767 present or None, read all data until EOF is reached.
768 """
769 if self.closed:
770 raise ValueError("I/O operation on closed file")
771
772 buf = ""
773 if self.buffer:
774 if size is None:
775 buf = self.buffer
776 self.buffer = ""
777 else:
778 buf = self.buffer[:size]
779 self.buffer = self.buffer[size:]
780
781 if size is None:
782 buf += self.fileobj.read()
783 else:
784 buf += self.fileobj.read(size - len(buf))
785
786 self.position += len(buf)
787 return buf
788
789 def readline(self, size=-1):
790 """Read one entire line from the file. If size is present
791 and non-negative, return a string with at most that
792 size, which may be an incomplete line.
793 """
794 if self.closed:
795 raise ValueError("I/O operation on closed file")
796
797 if "\n" in self.buffer:
798 pos = self.buffer.find("\n") + 1
799 else:
800 buffers = [self.buffer]
801 while True:
802 buf = self.fileobj.read(self.blocksize)
803 buffers.append(buf)
804 if not buf or "\n" in buf:
805 self.buffer = "".join(buffers)
806 pos = self.buffer.find("\n") + 1
807 if pos == 0:
808 # no newline found.
809 pos = len(self.buffer)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000810 break
Lars Gustäbel6baa5022006-12-23 16:40:13 +0000811
812 if size != -1:
813 pos = min(size, pos)
814
815 buf = self.buffer[:pos]
816 self.buffer = self.buffer[pos:]
817 self.position += len(buf)
818 return buf
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000819
820 def readlines(self):
Lars Gustäbel6baa5022006-12-23 16:40:13 +0000821 """Return a list with all remaining lines.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000822 """
823 result = []
824 while True:
825 line = self.readline()
826 if not line: break
827 result.append(line)
828 return result
829
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000830 def tell(self):
831 """Return the current file position.
832 """
Lars Gustäbel6baa5022006-12-23 16:40:13 +0000833 if self.closed:
834 raise ValueError("I/O operation on closed file")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000835
Lars Gustäbel6baa5022006-12-23 16:40:13 +0000836 return self.position
837
838 def seek(self, pos, whence=os.SEEK_SET):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000839 """Seek to a position in the file.
840 """
Lars Gustäbel6baa5022006-12-23 16:40:13 +0000841 if self.closed:
842 raise ValueError("I/O operation on closed file")
843
844 if whence == os.SEEK_SET:
845 self.position = min(max(pos, 0), self.size)
846 elif whence == os.SEEK_CUR:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000847 if pos < 0:
Lars Gustäbel6baa5022006-12-23 16:40:13 +0000848 self.position = max(self.position + pos, 0)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000849 else:
Lars Gustäbel6baa5022006-12-23 16:40:13 +0000850 self.position = min(self.position + pos, self.size)
851 elif whence == os.SEEK_END:
852 self.position = max(min(self.size + pos, self.size), 0)
853 else:
854 raise ValueError("Invalid argument")
855
856 self.buffer = ""
857 self.fileobj.seek(self.position)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000858
859 def close(self):
860 """Close the file object.
861 """
862 self.closed = True
Martin v. Löwisdf241532005-03-03 08:17:42 +0000863
864 def __iter__(self):
Lars Gustäbel6baa5022006-12-23 16:40:13 +0000865 """Get an iterator over the file's lines.
Martin v. Löwisdf241532005-03-03 08:17:42 +0000866 """
Lars Gustäbel6baa5022006-12-23 16:40:13 +0000867 while True:
868 line = self.readline()
869 if not line:
870 break
871 yield line
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000872#class ExFileObject
873
874#------------------
875# Exported Classes
876#------------------
877class TarInfo(object):
878 """Informational class which holds the details about an
879 archive member given by a tar header block.
880 TarInfo objects are returned by TarFile.getmember(),
881 TarFile.getmembers() and TarFile.gettarinfo() and are
882 usually created internally.
883 """
884
885 def __init__(self, name=""):
886 """Construct a TarInfo object. name is the optional name
887 of the member.
888 """
Lars Gustäbelc64e4022007-03-13 10:47:19 +0000889 self.name = name # member name
890 self.mode = 0644 # file permissions
Georg Brandl38c6a222006-05-10 16:26:03 +0000891 self.uid = 0 # user id
892 self.gid = 0 # group id
893 self.size = 0 # file size
894 self.mtime = 0 # modification time
895 self.chksum = 0 # header checksum
896 self.type = REGTYPE # member type
897 self.linkname = "" # link name
Lars Gustäbelc64e4022007-03-13 10:47:19 +0000898 self.uname = "root" # user name
899 self.gname = "root" # group name
Georg Brandl38c6a222006-05-10 16:26:03 +0000900 self.devmajor = 0 # device major number
901 self.devminor = 0 # device minor number
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000902
Georg Brandl38c6a222006-05-10 16:26:03 +0000903 self.offset = 0 # the tar header starts here
904 self.offset_data = 0 # the file's data starts here
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000905
Lars Gustäbelc64e4022007-03-13 10:47:19 +0000906 self.pax_headers = {} # pax header information
907
908 # In pax headers the "name" and "linkname" field are called
909 # "path" and "linkpath".
910 def _getpath(self):
911 return self.name
912 def _setpath(self, name):
913 self.name = name
914 path = property(_getpath, _setpath)
915
916 def _getlinkpath(self):
917 return self.linkname
918 def _setlinkpath(self, linkname):
919 self.linkname = linkname
920 linkpath = property(_getlinkpath, _setlinkpath)
921
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000922 def __repr__(self):
923 return "<%s %r at %#x>" % (self.__class__.__name__,self.name,id(self))
924
Lars Gustäbelc64e4022007-03-13 10:47:19 +0000925 def get_info(self):
926 """Return the TarInfo's attributes as a dictionary.
927 """
928 info = {
929 "name": normpath(self.name),
930 "mode": self.mode & 07777,
931 "uid": self.uid,
932 "gid": self.gid,
933 "size": self.size,
934 "mtime": self.mtime,
935 "chksum": self.chksum,
936 "type": self.type,
937 "linkname": normpath(self.linkname) if self.linkname else "",
938 "uname": self.uname,
939 "gname": self.gname,
940 "devmajor": self.devmajor,
941 "devminor": self.devminor
942 }
943
944 if info["type"] == DIRTYPE and not info["name"].endswith("/"):
945 info["name"] += "/"
946
947 return info
948
949 def tobuf(self, format=DEFAULT_FORMAT, encoding=ENCODING):
950 """Return a tar header as a string of 512 byte blocks.
951 """
952 if format == USTAR_FORMAT:
953 return self.create_ustar_header()
954 elif format == GNU_FORMAT:
955 return self.create_gnu_header()
956 elif format == PAX_FORMAT:
957 return self.create_pax_header(encoding)
958 else:
959 raise ValueError("invalid format")
960
961 def create_ustar_header(self):
962 """Return the object as a ustar header block.
963 """
964 info = self.get_info()
965 info["magic"] = POSIX_MAGIC
966
967 if len(info["linkname"]) > LENGTH_LINK:
968 raise ValueError("linkname is too long")
969
970 if len(info["name"]) > LENGTH_NAME:
971 info["prefix"], info["name"] = self._posix_split_name(info["name"])
972
973 return self._create_header(info, USTAR_FORMAT)
974
975 def create_gnu_header(self):
976 """Return the object as a GNU header block sequence.
977 """
978 info = self.get_info()
979 info["magic"] = GNU_MAGIC
980
981 buf = ""
982 if len(info["linkname"]) > LENGTH_LINK:
983 buf += self._create_gnu_long_header(info["linkname"], GNUTYPE_LONGLINK)
984
985 if len(info["name"]) > LENGTH_NAME:
986 buf += self._create_gnu_long_header(info["name"], GNUTYPE_LONGNAME)
987
988 return buf + self._create_header(info, GNU_FORMAT)
989
990 def create_pax_header(self, encoding):
991 """Return the object as a ustar header block. If it cannot be
992 represented this way, prepend a pax extended header sequence
993 with supplement information.
994 """
995 info = self.get_info()
996 info["magic"] = POSIX_MAGIC
997 pax_headers = self.pax_headers.copy()
998
999 # Test string fields for values that exceed the field length or cannot
1000 # be represented in ASCII encoding.
1001 for name, hname, length in (
1002 ("name", "path", LENGTH_NAME), ("linkname", "linkpath", LENGTH_LINK),
1003 ("uname", "uname", 32), ("gname", "gname", 32)):
1004
1005 val = info[name].decode(encoding)
1006
1007 # Try to encode the string as ASCII.
1008 try:
1009 val.encode("ascii")
1010 except UnicodeEncodeError:
1011 pax_headers[hname] = val
1012 continue
1013
1014 if len(val) > length:
1015 if name == "name":
1016 # Try to squeeze a longname in the prefix and name fields as in
1017 # ustar format.
1018 try:
1019 info["prefix"], info["name"] = self._posix_split_name(info["name"])
1020 except ValueError:
1021 pax_headers[hname] = val
1022 else:
1023 continue
1024 else:
1025 pax_headers[hname] = val
1026
1027 # Test number fields for values that exceed the field limit or values
1028 # that like to be stored as float.
1029 for name, digits in (("uid", 8), ("gid", 8), ("size", 12), ("mtime", 12)):
1030 val = info[name]
1031 if not 0 <= val < 8 ** (digits - 1) or isinstance(val, float):
1032 pax_headers[name] = unicode(val)
1033 info[name] = 0
1034
1035 if pax_headers:
1036 buf = self._create_pax_generic_header(pax_headers)
1037 else:
1038 buf = ""
1039
1040 return buf + self._create_header(info, USTAR_FORMAT)
1041
1042 @classmethod
1043 def create_pax_global_header(cls, pax_headers, encoding):
1044 """Return the object as a pax global header block sequence.
1045 """
1046 new_headers = {}
1047 for key, val in pax_headers.iteritems():
1048 key = cls._to_unicode(key, encoding)
1049 val = cls._to_unicode(val, encoding)
1050 new_headers[key] = val
1051 return cls._create_pax_generic_header(new_headers, type=XGLTYPE)
1052
1053 @staticmethod
1054 def _to_unicode(value, encoding):
1055 if isinstance(value, unicode):
1056 return value
1057 elif isinstance(value, (int, long, float)):
1058 return unicode(value)
1059 elif isinstance(value, str):
1060 return unicode(value, encoding)
1061 else:
1062 raise ValueError("unable to convert to unicode: %r" % value)
1063
1064 def _posix_split_name(self, name):
1065 """Split a name longer than 100 chars into a prefix
1066 and a name part.
1067 """
1068 prefix = name[:LENGTH_PREFIX + 1]
1069 while prefix and prefix[-1] != "/":
1070 prefix = prefix[:-1]
1071
1072 name = name[len(prefix):]
1073 prefix = prefix[:-1]
1074
1075 if not prefix or len(name) > LENGTH_NAME:
1076 raise ValueError("name is too long")
1077 return prefix, name
1078
1079 @staticmethod
1080 def _create_header(info, format):
1081 """Return a header block. info is a dictionary with file
1082 information, format must be one of the *_FORMAT constants.
1083 """
1084 parts = [
1085 stn(info.get("name", ""), 100),
1086 itn(info.get("mode", 0) & 07777, 8, format),
1087 itn(info.get("uid", 0), 8, format),
1088 itn(info.get("gid", 0), 8, format),
1089 itn(info.get("size", 0), 12, format),
1090 itn(info.get("mtime", 0), 12, format),
1091 " ", # checksum field
1092 info.get("type", REGTYPE),
1093 stn(info.get("linkname", ""), 100),
1094 stn(info.get("magic", ""), 8),
1095 stn(info.get("uname", ""), 32),
1096 stn(info.get("gname", ""), 32),
1097 itn(info.get("devmajor", 0), 8, format),
1098 itn(info.get("devminor", 0), 8, format),
1099 stn(info.get("prefix", ""), 155)
1100 ]
1101
1102 buf = struct.pack("%ds" % BLOCKSIZE, "".join(parts))
1103 chksum = calc_chksums(buf[-BLOCKSIZE:])[0]
1104 buf = buf[:-364] + "%06o\0" % chksum + buf[-357:]
1105 return buf
1106
1107 @staticmethod
1108 def _create_payload(payload):
1109 """Return the string payload filled with zero bytes
1110 up to the next 512 byte border.
1111 """
1112 blocks, remainder = divmod(len(payload), BLOCKSIZE)
1113 if remainder > 0:
1114 payload += (BLOCKSIZE - remainder) * NUL
1115 return payload
1116
1117 @classmethod
1118 def _create_gnu_long_header(cls, name, type):
1119 """Return a GNUTYPE_LONGNAME or GNUTYPE_LONGLINK sequence
1120 for name.
1121 """
1122 name += NUL
1123
1124 info = {}
1125 info["name"] = "././@LongLink"
1126 info["type"] = type
1127 info["size"] = len(name)
1128 info["magic"] = GNU_MAGIC
1129
1130 # create extended header + name blocks.
1131 return cls._create_header(info, USTAR_FORMAT) + \
1132 cls._create_payload(name)
1133
1134 @classmethod
1135 def _create_pax_generic_header(cls, pax_headers, type=XHDTYPE):
1136 """Return a POSIX.1-2001 extended or global header sequence
1137 that contains a list of keyword, value pairs. The values
1138 must be unicode objects.
1139 """
1140 records = []
1141 for keyword, value in pax_headers.iteritems():
1142 keyword = keyword.encode("utf8")
1143 value = value.encode("utf8")
1144 l = len(keyword) + len(value) + 3 # ' ' + '=' + '\n'
1145 n = p = 0
1146 while True:
1147 n = l + len(str(p))
1148 if n == p:
1149 break
1150 p = n
1151 records.append("%d %s=%s\n" % (p, keyword, value))
1152 records = "".join(records)
1153
1154 # We use a hardcoded "././@PaxHeader" name like star does
1155 # instead of the one that POSIX recommends.
1156 info = {}
1157 info["name"] = "././@PaxHeader"
1158 info["type"] = type
1159 info["size"] = len(records)
1160 info["magic"] = POSIX_MAGIC
1161
1162 # Create pax header + record blocks.
1163 return cls._create_header(info, USTAR_FORMAT) + \
1164 cls._create_payload(records)
1165
Guido van Rossum75b64e62005-01-16 00:16:11 +00001166 @classmethod
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001167 def frombuf(cls, buf):
1168 """Construct a TarInfo object from a 512 byte string buffer.
1169 """
Georg Brandl38c6a222006-05-10 16:26:03 +00001170 if len(buf) != BLOCKSIZE:
Georg Brandlebbeed72006-12-19 22:06:46 +00001171 raise HeaderError("truncated header")
Georg Brandl38c6a222006-05-10 16:26:03 +00001172 if buf.count(NUL) == BLOCKSIZE:
Georg Brandlebbeed72006-12-19 22:06:46 +00001173 raise HeaderError("empty header")
1174
Georg Brandlded1c4d2006-12-20 11:55:16 +00001175 chksum = nti(buf[148:156])
Georg Brandlebbeed72006-12-19 22:06:46 +00001176 if chksum not in calc_chksums(buf):
1177 raise HeaderError("bad checksum")
Georg Brandl38c6a222006-05-10 16:26:03 +00001178
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001179 obj = cls()
1180 obj.buf = buf
1181 obj.name = nts(buf[0:100])
1182 obj.mode = nti(buf[100:108])
1183 obj.uid = nti(buf[108:116])
1184 obj.gid = nti(buf[116:124])
1185 obj.size = nti(buf[124:136])
1186 obj.mtime = nti(buf[136:148])
1187 obj.chksum = chksum
1188 obj.type = buf[156:157]
1189 obj.linkname = nts(buf[157:257])
1190 obj.uname = nts(buf[265:297])
1191 obj.gname = nts(buf[297:329])
1192 obj.devmajor = nti(buf[329:337])
1193 obj.devminor = nti(buf[337:345])
1194 prefix = nts(buf[345:500])
Georg Brandl3354f282006-10-29 09:16:12 +00001195
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001196 # Old V7 tar format represents a directory as a regular
1197 # file with a trailing slash.
1198 if obj.type == AREGTYPE and obj.name.endswith("/"):
1199 obj.type = DIRTYPE
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001200
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001201 # Remove redundant slashes from directories.
1202 if obj.isdir():
1203 obj.name = obj.name.rstrip("/")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001204
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001205 # Reconstruct a ustar longname.
1206 if prefix and obj.type not in GNU_TYPES:
1207 obj.name = prefix + "/" + obj.name
1208 return obj
1209
1210 @classmethod
1211 def fromtarfile(cls, tarfile):
1212 """Return the next TarInfo object from TarFile object
1213 tarfile.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001214 """
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001215 buf = tarfile.fileobj.read(BLOCKSIZE)
1216 if not buf:
1217 return
1218 obj = cls.frombuf(buf)
1219 obj.offset = tarfile.fileobj.tell() - BLOCKSIZE
1220 return obj._proc_member(tarfile)
Georg Brandl3354f282006-10-29 09:16:12 +00001221
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001222 #--------------------------------------------------------------------------
1223 # The following are methods that are called depending on the type of a
1224 # member. The entry point is _proc_member() which can be overridden in a
1225 # subclass to add custom _proc_*() methods. A _proc_*() method MUST
1226 # implement the following
1227 # operations:
1228 # 1. Set self.offset_data to the position where the data blocks begin,
1229 # if there is data that follows.
1230 # 2. Set tarfile.offset to the position where the next member's header will
1231 # begin.
1232 # 3. Return self or another valid TarInfo object.
1233 def _proc_member(self, tarfile):
1234 """Choose the right processing method depending on
1235 the type and call it.
Georg Brandl3354f282006-10-29 09:16:12 +00001236 """
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001237 if self.type in (GNUTYPE_LONGNAME, GNUTYPE_LONGLINK):
1238 return self._proc_gnulong(tarfile)
1239 elif self.type == GNUTYPE_SPARSE:
1240 return self._proc_sparse(tarfile)
1241 elif self.type in (XHDTYPE, XGLTYPE, SOLARIS_XHDTYPE):
1242 return self._proc_pax(tarfile)
1243 else:
1244 return self._proc_builtin(tarfile)
Georg Brandl3354f282006-10-29 09:16:12 +00001245
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001246 def _proc_builtin(self, tarfile):
1247 """Process a builtin type or an unknown type which
1248 will be treated as a regular file.
1249 """
1250 self.offset_data = tarfile.fileobj.tell()
1251 offset = self.offset_data
1252 if self.isreg() or self.type not in SUPPORTED_TYPES:
1253 # Skip the following data blocks.
1254 offset += self._block(self.size)
1255 tarfile.offset = offset
Georg Brandl3354f282006-10-29 09:16:12 +00001256
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001257 # Patch the TarInfo object with saved extended
1258 # header information.
1259 for keyword, value in tarfile.pax_headers.iteritems():
1260 if keyword in PAX_FIELDS:
1261 setattr(self, keyword, value)
1262 self.pax_headers[keyword] = value
1263
1264 return self
1265
1266 def _proc_gnulong(self, tarfile):
1267 """Process the blocks that hold a GNU longname
1268 or longlink member.
1269 """
1270 buf = tarfile.fileobj.read(self._block(self.size))
1271
1272 # Fetch the next header and process it.
1273 b = tarfile.fileobj.read(BLOCKSIZE)
1274 t = self.frombuf(b)
1275 t.offset = self.offset
1276 next = t._proc_member(tarfile)
1277
1278 # Patch the TarInfo object from the next header with
1279 # the longname information.
1280 next.offset = self.offset
1281 if self.type == GNUTYPE_LONGNAME:
1282 next.name = buf.rstrip(NUL)
1283 elif self.type == GNUTYPE_LONGLINK:
1284 next.linkname = buf.rstrip(NUL)
1285
1286 return next
1287
1288 def _proc_sparse(self, tarfile):
1289 """Process a GNU sparse header plus extra headers.
1290 """
1291 buf = self.buf
1292 sp = _ringbuffer()
1293 pos = 386
1294 lastpos = 0L
1295 realpos = 0L
1296 # There are 4 possible sparse structs in the
1297 # first header.
1298 for i in xrange(4):
1299 try:
1300 offset = nti(buf[pos:pos + 12])
1301 numbytes = nti(buf[pos + 12:pos + 24])
1302 except ValueError:
1303 break
1304 if offset > lastpos:
1305 sp.append(_hole(lastpos, offset - lastpos))
1306 sp.append(_data(offset, numbytes, realpos))
1307 realpos += numbytes
1308 lastpos = offset + numbytes
1309 pos += 24
1310
1311 isextended = ord(buf[482])
1312 origsize = nti(buf[483:495])
1313
1314 # If the isextended flag is given,
1315 # there are extra headers to process.
1316 while isextended == 1:
1317 buf = tarfile.fileobj.read(BLOCKSIZE)
1318 pos = 0
1319 for i in xrange(21):
1320 try:
1321 offset = nti(buf[pos:pos + 12])
1322 numbytes = nti(buf[pos + 12:pos + 24])
1323 except ValueError:
1324 break
1325 if offset > lastpos:
1326 sp.append(_hole(lastpos, offset - lastpos))
1327 sp.append(_data(offset, numbytes, realpos))
1328 realpos += numbytes
1329 lastpos = offset + numbytes
1330 pos += 24
1331 isextended = ord(buf[504])
1332
1333 if lastpos < origsize:
1334 sp.append(_hole(lastpos, origsize - lastpos))
1335
1336 self.sparse = sp
1337
1338 self.offset_data = tarfile.fileobj.tell()
1339 tarfile.offset = self.offset_data + self._block(self.size)
1340 self.size = origsize
1341
1342 return self
1343
1344 def _proc_pax(self, tarfile):
1345 """Process an extended or global header as described in
1346 POSIX.1-2001.
1347 """
1348 # Read the header information.
1349 buf = tarfile.fileobj.read(self._block(self.size))
1350
1351 # A pax header stores supplemental information for either
1352 # the following file (extended) or all following files
1353 # (global).
1354 if self.type == XGLTYPE:
1355 pax_headers = tarfile.pax_headers
1356 else:
1357 pax_headers = tarfile.pax_headers.copy()
1358
1359 # Fields in POSIX.1-2001 that are numbers, all other fields
1360 # are treated as UTF-8 strings.
1361 type_mapping = {
1362 "atime": float,
1363 "ctime": float,
1364 "mtime": float,
1365 "uid": int,
1366 "gid": int,
1367 "size": int
1368 }
1369
1370 # Parse pax header information. A record looks like that:
1371 # "%d %s=%s\n" % (length, keyword, value). length is the size
1372 # of the complete record including the length field itself and
1373 # the newline.
1374 regex = re.compile(r"(\d+) ([^=]+)=", re.U)
1375 pos = 0
1376 while True:
1377 match = regex.match(buf, pos)
1378 if not match:
1379 break
1380
1381 length, keyword = match.groups()
1382 length = int(length)
1383 value = buf[match.end(2) + 1:match.start(1) + length - 1]
1384
1385 keyword = keyword.decode("utf8")
1386 keyword = keyword.encode(tarfile.encoding)
1387
1388 value = value.decode("utf8")
1389 if keyword in type_mapping:
1390 try:
1391 value = type_mapping[keyword](value)
1392 except ValueError:
1393 value = 0
1394 else:
1395 value = value.encode(tarfile.encoding)
1396
1397 pax_headers[keyword] = value
1398 pos += length
1399
1400 # Fetch the next header that will be patched with the
1401 # supplement information from the pax header (extended
1402 # only).
1403 t = self.fromtarfile(tarfile)
1404
1405 if self.type != XGLTYPE and t is not None:
1406 # Patch the TarInfo object from the next header with
1407 # the pax header's information.
1408 for keyword, value in pax_headers.items():
1409 if keyword in PAX_FIELDS:
1410 setattr(t, keyword, value)
1411 pax_headers[keyword] = value
1412 t.pax_headers = pax_headers.copy()
1413
1414 return t
1415
1416 def _block(self, count):
1417 """Round up a byte count by BLOCKSIZE and return it,
1418 e.g. _block(834) => 1024.
1419 """
1420 blocks, remainder = divmod(count, BLOCKSIZE)
1421 if remainder:
1422 blocks += 1
1423 return blocks * BLOCKSIZE
Georg Brandl3354f282006-10-29 09:16:12 +00001424
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001425 def isreg(self):
1426 return self.type in REGULAR_TYPES
1427 def isfile(self):
1428 return self.isreg()
1429 def isdir(self):
1430 return self.type == DIRTYPE
1431 def issym(self):
1432 return self.type == SYMTYPE
1433 def islnk(self):
1434 return self.type == LNKTYPE
1435 def ischr(self):
1436 return self.type == CHRTYPE
1437 def isblk(self):
1438 return self.type == BLKTYPE
1439 def isfifo(self):
1440 return self.type == FIFOTYPE
1441 def issparse(self):
1442 return self.type == GNUTYPE_SPARSE
1443 def isdev(self):
1444 return self.type in (CHRTYPE, BLKTYPE, FIFOTYPE)
1445# class TarInfo
1446
1447class TarFile(object):
1448 """The TarFile Class provides an interface to tar archives.
1449 """
1450
1451 debug = 0 # May be set from 0 (no msgs) to 3 (all msgs)
1452
1453 dereference = False # If true, add content of linked file to the
1454 # tar file, else the link.
1455
1456 ignore_zeros = False # If true, skips empty or invalid blocks and
1457 # continues processing.
1458
1459 errorlevel = 0 # If 0, fatal errors only appear in debug
1460 # messages (if debug >= 0). If > 0, errors
1461 # are passed to the caller as exceptions.
1462
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001463 format = DEFAULT_FORMAT # The format to use when creating an archive.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001464
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001465 encoding = ENCODING # Transfer UTF-8 strings from POSIX.1-2001
1466 # headers to this encoding.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001467
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001468 tarinfo = TarInfo # The default TarInfo class to use.
1469
1470 fileobject = ExFileObject # The default ExFileObject class to use.
1471
1472 def __init__(self, name=None, mode="r", fileobj=None, format=None,
1473 tarinfo=None, dereference=None, ignore_zeros=None, encoding=None,
1474 pax_headers=None, debug=None, errorlevel=None):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001475 """Open an (uncompressed) tar archive `name'. `mode' is either 'r' to
1476 read from an existing archive, 'a' to append data to an existing
1477 file or 'w' to create a new file overwriting an existing one. `mode'
1478 defaults to 'r'.
1479 If `fileobj' is given, it is used for reading or writing data. If it
1480 can be determined, `mode' is overridden by `fileobj's mode.
1481 `fileobj' is not closed, when TarFile is closed.
1482 """
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001483 if len(mode) > 1 or mode not in "raw":
Georg Brandle4751e32006-05-18 06:11:19 +00001484 raise ValueError("mode must be 'r', 'a' or 'w'")
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001485 self.mode = mode
1486 self._mode = {"r": "rb", "a": "r+b", "w": "wb"}[mode]
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001487
1488 if not fileobj:
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001489 if self.mode == "a" and not os.path.exists(name):
Lars Gustäbel3f8aca12007-02-06 18:38:13 +00001490 # Create nonexistent files in append mode.
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001491 self.mode = "w"
1492 self._mode = "wb"
1493 fileobj = file(name, self._mode)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001494 self._extfileobj = False
1495 else:
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001496 if name is None and hasattr(fileobj, "name"):
1497 name = fileobj.name
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001498 if hasattr(fileobj, "mode"):
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001499 self._mode = fileobj.mode
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001500 self._extfileobj = True
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001501 self.name = os.path.abspath(name)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001502 self.fileobj = fileobj
1503
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001504 # Init attributes.
1505 if format is not None:
1506 self.format = format
1507 if tarinfo is not None:
1508 self.tarinfo = tarinfo
1509 if dereference is not None:
1510 self.dereference = dereference
1511 if ignore_zeros is not None:
1512 self.ignore_zeros = ignore_zeros
1513 if encoding is not None:
1514 self.encoding = encoding
1515 if debug is not None:
1516 self.debug = debug
1517 if errorlevel is not None:
1518 self.errorlevel = errorlevel
1519
1520 # Init datastructures.
Georg Brandl38c6a222006-05-10 16:26:03 +00001521 self.closed = False
1522 self.members = [] # list of members as TarInfo objects
1523 self._loaded = False # flag if all members have been read
1524 self.offset = 0L # current position in the archive file
1525 self.inodes = {} # dictionary caching the inodes of
1526 # archive members already added
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001527 self.pax_headers = {} # save contents of global pax headers
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001528
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001529 if self.mode == "r":
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001530 self.firstmember = None
1531 self.firstmember = self.next()
1532
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001533 if self.mode == "a":
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001534 # Move to the end of the archive,
1535 # before the first empty block.
1536 self.firstmember = None
1537 while True:
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001538 if self.next() is None:
Lars Gustäbel3f8aca12007-02-06 18:38:13 +00001539 if self.offset > 0:
1540 self.fileobj.seek(- BLOCKSIZE, 1)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001541 break
1542
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001543 if self.mode in "aw":
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001544 self._loaded = True
1545
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001546 if pax_headers:
1547 buf = self.tarinfo.create_pax_global_header(
1548 pax_headers.copy(), self.encoding)
1549 self.fileobj.write(buf)
1550 self.offset += len(buf)
1551
1552 def _getposix(self):
1553 return self.format == USTAR_FORMAT
1554 def _setposix(self, value):
1555 import warnings
1556 warnings.warn("use the format attribute instead", DeprecationWarning)
1557 if value:
1558 self.format = USTAR_FORMAT
1559 else:
1560 self.format = GNU_FORMAT
1561 posix = property(_getposix, _setposix)
1562
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001563 #--------------------------------------------------------------------------
1564 # Below are the classmethods which act as alternate constructors to the
1565 # TarFile class. The open() method is the only one that is needed for
1566 # public use; it is the "super"-constructor and is able to select an
1567 # adequate "sub"-constructor for a particular compression using the mapping
1568 # from OPEN_METH.
1569 #
1570 # This concept allows one to subclass TarFile without losing the comfort of
1571 # the super-constructor. A sub-constructor is registered and made available
1572 # by adding it to the mapping in OPEN_METH.
1573
Guido van Rossum75b64e62005-01-16 00:16:11 +00001574 @classmethod
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001575 def open(cls, name=None, mode="r", fileobj=None, bufsize=RECORDSIZE, **kwargs):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001576 """Open a tar archive for reading, writing or appending. Return
1577 an appropriate TarFile class.
1578
1579 mode:
Martin v. Löwis78be7df2005-03-05 12:47:42 +00001580 'r' or 'r:*' open for reading with transparent compression
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001581 'r:' open for reading exclusively uncompressed
1582 'r:gz' open for reading with gzip compression
1583 'r:bz2' open for reading with bzip2 compression
Lars Gustäbel3f8aca12007-02-06 18:38:13 +00001584 'a' or 'a:' open for appending, creating the file if necessary
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001585 'w' or 'w:' open for writing without compression
1586 'w:gz' open for writing with gzip compression
1587 'w:bz2' open for writing with bzip2 compression
Martin v. Löwis78be7df2005-03-05 12:47:42 +00001588
1589 'r|*' open a stream of tar blocks with transparent compression
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001590 'r|' open an uncompressed stream of tar blocks for reading
1591 'r|gz' open a gzip compressed stream of tar blocks
1592 'r|bz2' open a bzip2 compressed stream of tar blocks
1593 'w|' open an uncompressed stream for writing
1594 'w|gz' open a gzip compressed stream for writing
1595 'w|bz2' open a bzip2 compressed stream for writing
1596 """
1597
1598 if not name and not fileobj:
Georg Brandle4751e32006-05-18 06:11:19 +00001599 raise ValueError("nothing to open")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001600
Martin v. Löwis78be7df2005-03-05 12:47:42 +00001601 if mode in ("r", "r:*"):
1602 # Find out which *open() is appropriate for opening the file.
1603 for comptype in cls.OPEN_METH:
1604 func = getattr(cls, cls.OPEN_METH[comptype])
Lars Gustäbela7ba6fc2006-12-27 10:30:46 +00001605 if fileobj is not None:
1606 saved_pos = fileobj.tell()
Martin v. Löwis78be7df2005-03-05 12:47:42 +00001607 try:
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001608 return func(name, "r", fileobj, **kwargs)
1609 except (ReadError, CompressionError), e:
Lars Gustäbela7ba6fc2006-12-27 10:30:46 +00001610 if fileobj is not None:
1611 fileobj.seek(saved_pos)
Martin v. Löwis78be7df2005-03-05 12:47:42 +00001612 continue
Georg Brandle4751e32006-05-18 06:11:19 +00001613 raise ReadError("file could not be opened successfully")
Martin v. Löwis78be7df2005-03-05 12:47:42 +00001614
1615 elif ":" in mode:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001616 filemode, comptype = mode.split(":", 1)
1617 filemode = filemode or "r"
1618 comptype = comptype or "tar"
1619
1620 # Select the *open() function according to
1621 # given compression.
1622 if comptype in cls.OPEN_METH:
1623 func = getattr(cls, cls.OPEN_METH[comptype])
1624 else:
Georg Brandle4751e32006-05-18 06:11:19 +00001625 raise CompressionError("unknown compression type %r" % comptype)
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001626 return func(name, filemode, fileobj, **kwargs)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001627
1628 elif "|" in mode:
1629 filemode, comptype = mode.split("|", 1)
1630 filemode = filemode or "r"
1631 comptype = comptype or "tar"
1632
1633 if filemode not in "rw":
Georg Brandle4751e32006-05-18 06:11:19 +00001634 raise ValueError("mode must be 'r' or 'w'")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001635
1636 t = cls(name, filemode,
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001637 _Stream(name, filemode, comptype, fileobj, bufsize),
1638 **kwargs)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001639 t._extfileobj = False
1640 return t
1641
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001642 elif mode in "aw":
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001643 return cls.taropen(name, mode, fileobj, **kwargs)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001644
Georg Brandle4751e32006-05-18 06:11:19 +00001645 raise ValueError("undiscernible mode")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001646
Guido van Rossum75b64e62005-01-16 00:16:11 +00001647 @classmethod
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001648 def taropen(cls, name, mode="r", fileobj=None, **kwargs):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001649 """Open uncompressed tar archive name for reading or writing.
1650 """
1651 if len(mode) > 1 or mode not in "raw":
Georg Brandle4751e32006-05-18 06:11:19 +00001652 raise ValueError("mode must be 'r', 'a' or 'w'")
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001653 return cls(name, mode, fileobj, **kwargs)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001654
Guido van Rossum75b64e62005-01-16 00:16:11 +00001655 @classmethod
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001656 def gzopen(cls, name, mode="r", fileobj=None, compresslevel=9, **kwargs):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001657 """Open gzip compressed tar archive name for reading or writing.
1658 Appending is not allowed.
1659 """
1660 if len(mode) > 1 or mode not in "rw":
Georg Brandle4751e32006-05-18 06:11:19 +00001661 raise ValueError("mode must be 'r' or 'w'")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001662
1663 try:
1664 import gzip
Neal Norwitz4ec68242003-04-11 03:05:56 +00001665 gzip.GzipFile
1666 except (ImportError, AttributeError):
Georg Brandle4751e32006-05-18 06:11:19 +00001667 raise CompressionError("gzip module is not available")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001668
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001669 if fileobj is None:
1670 fileobj = file(name, mode + "b")
1671
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001672 try:
Lars Gustäbela4b23812006-12-23 17:57:23 +00001673 t = cls.taropen(name, mode,
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001674 gzip.GzipFile(name, mode, compresslevel, fileobj),
1675 **kwargs)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001676 except IOError:
Georg Brandle4751e32006-05-18 06:11:19 +00001677 raise ReadError("not a gzip file")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001678 t._extfileobj = False
1679 return t
1680
Guido van Rossum75b64e62005-01-16 00:16:11 +00001681 @classmethod
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001682 def bz2open(cls, name, mode="r", fileobj=None, compresslevel=9, **kwargs):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001683 """Open bzip2 compressed tar archive name for reading or writing.
1684 Appending is not allowed.
1685 """
1686 if len(mode) > 1 or mode not in "rw":
Georg Brandle4751e32006-05-18 06:11:19 +00001687 raise ValueError("mode must be 'r' or 'w'.")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001688
1689 try:
1690 import bz2
1691 except ImportError:
Georg Brandle4751e32006-05-18 06:11:19 +00001692 raise CompressionError("bz2 module is not available")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001693
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001694 if fileobj is not None:
Georg Brandl49c8f4c2006-05-15 19:30:35 +00001695 fileobj = _BZ2Proxy(fileobj, mode)
1696 else:
1697 fileobj = bz2.BZ2File(name, mode, compresslevel=compresslevel)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001698
1699 try:
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001700 t = cls.taropen(name, mode, fileobj, **kwargs)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001701 except IOError:
Georg Brandle4751e32006-05-18 06:11:19 +00001702 raise ReadError("not a bzip2 file")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001703 t._extfileobj = False
1704 return t
1705
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001706 # All *open() methods are registered here.
1707 OPEN_METH = {
1708 "tar": "taropen", # uncompressed tar
1709 "gz": "gzopen", # gzip compressed tar
1710 "bz2": "bz2open" # bzip2 compressed tar
1711 }
1712
1713 #--------------------------------------------------------------------------
1714 # The public methods which TarFile provides:
1715
1716 def close(self):
1717 """Close the TarFile. In write-mode, two finishing zero blocks are
1718 appended to the archive.
1719 """
1720 if self.closed:
1721 return
1722
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001723 if self.mode in "aw":
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001724 self.fileobj.write(NUL * (BLOCKSIZE * 2))
1725 self.offset += (BLOCKSIZE * 2)
1726 # fill up the end with zero-blocks
1727 # (like option -b20 for tar does)
1728 blocks, remainder = divmod(self.offset, RECORDSIZE)
1729 if remainder > 0:
1730 self.fileobj.write(NUL * (RECORDSIZE - remainder))
1731
1732 if not self._extfileobj:
1733 self.fileobj.close()
1734 self.closed = True
1735
1736 def getmember(self, name):
1737 """Return a TarInfo object for member `name'. If `name' can not be
1738 found in the archive, KeyError is raised. If a member occurs more
1739 than once in the archive, its last occurence is assumed to be the
1740 most up-to-date version.
1741 """
Martin v. Löwisf3c56112004-09-18 09:08:52 +00001742 tarinfo = self._getmember(name)
1743 if tarinfo is None:
Georg Brandle4751e32006-05-18 06:11:19 +00001744 raise KeyError("filename %r not found" % name)
Martin v. Löwisf3c56112004-09-18 09:08:52 +00001745 return tarinfo
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001746
1747 def getmembers(self):
1748 """Return the members of the archive as a list of TarInfo objects. The
1749 list has the same order as the members in the archive.
1750 """
1751 self._check()
1752 if not self._loaded: # if we want to obtain a list of
1753 self._load() # all members, we first have to
1754 # scan the whole archive.
1755 return self.members
1756
1757 def getnames(self):
1758 """Return the members of the archive as a list of their names. It has
1759 the same order as the list returned by getmembers().
1760 """
Martin v. Löwisf3c56112004-09-18 09:08:52 +00001761 return [tarinfo.name for tarinfo in self.getmembers()]
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001762
1763 def gettarinfo(self, name=None, arcname=None, fileobj=None):
1764 """Create a TarInfo object for either the file `name' or the file
1765 object `fileobj' (using os.fstat on its file descriptor). You can
1766 modify some of the TarInfo's attributes before you add it using
1767 addfile(). If given, `arcname' specifies an alternative name for the
1768 file in the archive.
1769 """
1770 self._check("aw")
1771
1772 # When fileobj is given, replace name by
1773 # fileobj's real name.
1774 if fileobj is not None:
1775 name = fileobj.name
1776
1777 # Building the name of the member in the archive.
1778 # Backward slashes are converted to forward slashes,
1779 # Absolute paths are turned to relative paths.
1780 if arcname is None:
1781 arcname = name
1782 arcname = normpath(arcname)
1783 drv, arcname = os.path.splitdrive(arcname)
1784 while arcname[0:1] == "/":
1785 arcname = arcname[1:]
1786
1787 # Now, fill the TarInfo object with
1788 # information specific for the file.
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001789 tarinfo = self.tarinfo()
1790 tarinfo.tarfile = self
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001791
1792 # Use os.stat or os.lstat, depending on platform
1793 # and if symlinks shall be resolved.
1794 if fileobj is None:
1795 if hasattr(os, "lstat") and not self.dereference:
1796 statres = os.lstat(name)
1797 else:
1798 statres = os.stat(name)
1799 else:
1800 statres = os.fstat(fileobj.fileno())
1801 linkname = ""
1802
1803 stmd = statres.st_mode
1804 if stat.S_ISREG(stmd):
1805 inode = (statres.st_ino, statres.st_dev)
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001806 if not self.dereference and statres.st_nlink > 1 and \
1807 inode in self.inodes and arcname != self.inodes[inode]:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001808 # Is it a hardlink to an already
1809 # archived file?
1810 type = LNKTYPE
1811 linkname = self.inodes[inode]
1812 else:
1813 # The inode is added only if its valid.
1814 # For win32 it is always 0.
1815 type = REGTYPE
1816 if inode[0]:
1817 self.inodes[inode] = arcname
1818 elif stat.S_ISDIR(stmd):
1819 type = DIRTYPE
1820 if arcname[-1:] != "/":
1821 arcname += "/"
1822 elif stat.S_ISFIFO(stmd):
1823 type = FIFOTYPE
1824 elif stat.S_ISLNK(stmd):
1825 type = SYMTYPE
1826 linkname = os.readlink(name)
1827 elif stat.S_ISCHR(stmd):
1828 type = CHRTYPE
1829 elif stat.S_ISBLK(stmd):
1830 type = BLKTYPE
1831 else:
1832 return None
1833
1834 # Fill the TarInfo object with all
1835 # information we can get.
Martin v. Löwis5dbdc592005-08-27 10:07:56 +00001836 tarinfo.name = arcname
1837 tarinfo.mode = stmd
1838 tarinfo.uid = statres.st_uid
1839 tarinfo.gid = statres.st_gid
1840 if stat.S_ISREG(stmd):
Martin v. Löwis61d77e02004-08-20 06:35:46 +00001841 tarinfo.size = statres.st_size
Martin v. Löwis5dbdc592005-08-27 10:07:56 +00001842 else:
1843 tarinfo.size = 0L
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001844 tarinfo.mtime = statres.st_mtime
Martin v. Löwis5dbdc592005-08-27 10:07:56 +00001845 tarinfo.type = type
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001846 tarinfo.linkname = linkname
1847 if pwd:
1848 try:
1849 tarinfo.uname = pwd.getpwuid(tarinfo.uid)[0]
1850 except KeyError:
1851 pass
1852 if grp:
1853 try:
1854 tarinfo.gname = grp.getgrgid(tarinfo.gid)[0]
1855 except KeyError:
1856 pass
1857
1858 if type in (CHRTYPE, BLKTYPE):
1859 if hasattr(os, "major") and hasattr(os, "minor"):
1860 tarinfo.devmajor = os.major(statres.st_rdev)
1861 tarinfo.devminor = os.minor(statres.st_rdev)
1862 return tarinfo
1863
1864 def list(self, verbose=True):
1865 """Print a table of contents to sys.stdout. If `verbose' is False, only
1866 the names of the members are printed. If it is True, an `ls -l'-like
1867 output is produced.
1868 """
1869 self._check()
1870
1871 for tarinfo in self:
1872 if verbose:
1873 print filemode(tarinfo.mode),
1874 print "%s/%s" % (tarinfo.uname or tarinfo.uid,
1875 tarinfo.gname or tarinfo.gid),
1876 if tarinfo.ischr() or tarinfo.isblk():
1877 print "%10s" % ("%d,%d" \
1878 % (tarinfo.devmajor, tarinfo.devminor)),
1879 else:
1880 print "%10d" % tarinfo.size,
1881 print "%d-%02d-%02d %02d:%02d:%02d" \
1882 % time.localtime(tarinfo.mtime)[:6],
1883
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001884 print tarinfo.name + ("/" if tarinfo.isdir() else ""),
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001885
1886 if verbose:
1887 if tarinfo.issym():
1888 print "->", tarinfo.linkname,
1889 if tarinfo.islnk():
1890 print "link to", tarinfo.linkname,
1891 print
1892
1893 def add(self, name, arcname=None, recursive=True):
1894 """Add the file `name' to the archive. `name' may be any type of file
1895 (directory, fifo, symbolic link, etc.). If given, `arcname'
1896 specifies an alternative name for the file in the archive.
1897 Directories are added recursively by default. This can be avoided by
1898 setting `recursive' to False.
1899 """
1900 self._check("aw")
1901
1902 if arcname is None:
1903 arcname = name
1904
1905 # Skip if somebody tries to archive the archive...
Lars Gustäbela4b23812006-12-23 17:57:23 +00001906 if self.name is not None and os.path.abspath(name) == self.name:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001907 self._dbg(2, "tarfile: Skipped %r" % name)
1908 return
1909
1910 # Special case: The user wants to add the current
1911 # working directory.
1912 if name == ".":
1913 if recursive:
1914 if arcname == ".":
1915 arcname = ""
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001916 for f in os.listdir(name):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001917 self.add(f, os.path.join(arcname, f))
1918 return
1919
1920 self._dbg(1, name)
1921
1922 # Create a TarInfo object from the file.
1923 tarinfo = self.gettarinfo(name, arcname)
1924
1925 if tarinfo is None:
1926 self._dbg(1, "tarfile: Unsupported type %r" % name)
1927 return
1928
1929 # Append the tar header and data to the archive.
1930 if tarinfo.isreg():
1931 f = file(name, "rb")
1932 self.addfile(tarinfo, f)
1933 f.close()
1934
Martin v. Löwis5dbdc592005-08-27 10:07:56 +00001935 elif tarinfo.isdir():
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001936 self.addfile(tarinfo)
1937 if recursive:
1938 for f in os.listdir(name):
1939 self.add(os.path.join(name, f), os.path.join(arcname, f))
1940
Martin v. Löwis5dbdc592005-08-27 10:07:56 +00001941 else:
1942 self.addfile(tarinfo)
1943
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001944 def addfile(self, tarinfo, fileobj=None):
1945 """Add the TarInfo object `tarinfo' to the archive. If `fileobj' is
1946 given, tarinfo.size bytes are read from it and added to the archive.
1947 You can create TarInfo objects using gettarinfo().
1948 On Windows platforms, `fileobj' should always be opened with mode
1949 'rb' to avoid irritation about the file size.
1950 """
1951 self._check("aw")
1952
Georg Brandl3354f282006-10-29 09:16:12 +00001953 tarinfo = copy.copy(tarinfo)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001954
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001955 buf = tarinfo.tobuf(self.format, self.encoding)
Georg Brandl3354f282006-10-29 09:16:12 +00001956 self.fileobj.write(buf)
1957 self.offset += len(buf)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001958
1959 # If there's data to follow, append it.
1960 if fileobj is not None:
1961 copyfileobj(fileobj, self.fileobj, tarinfo.size)
1962 blocks, remainder = divmod(tarinfo.size, BLOCKSIZE)
1963 if remainder > 0:
1964 self.fileobj.write(NUL * (BLOCKSIZE - remainder))
1965 blocks += 1
1966 self.offset += blocks * BLOCKSIZE
1967
Martin v. Löwisf3c56112004-09-18 09:08:52 +00001968 self.members.append(tarinfo)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001969
Martin v. Löwis00a73e72005-03-04 19:40:34 +00001970 def extractall(self, path=".", members=None):
1971 """Extract all members from the archive to the current working
1972 directory and set owner, modification time and permissions on
1973 directories afterwards. `path' specifies a different directory
1974 to extract to. `members' is optional and must be a subset of the
1975 list returned by getmembers().
1976 """
1977 directories = []
1978
1979 if members is None:
1980 members = self
1981
1982 for tarinfo in members:
1983 if tarinfo.isdir():
1984 # Extract directory with a safe mode, so that
1985 # all files below can be extracted as well.
1986 try:
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001987 os.makedirs(os.path.join(path, tarinfo.name), 0700)
Martin v. Löwis00a73e72005-03-04 19:40:34 +00001988 except EnvironmentError:
1989 pass
1990 directories.append(tarinfo)
1991 else:
1992 self.extract(tarinfo, path)
1993
1994 # Reverse sort directories.
1995 directories.sort(lambda a, b: cmp(a.name, b.name))
1996 directories.reverse()
1997
1998 # Set correct owner, mtime and filemode on directories.
1999 for tarinfo in directories:
2000 path = os.path.join(path, tarinfo.name)
2001 try:
2002 self.chown(tarinfo, path)
2003 self.utime(tarinfo, path)
2004 self.chmod(tarinfo, path)
2005 except ExtractError, e:
2006 if self.errorlevel > 1:
2007 raise
2008 else:
2009 self._dbg(1, "tarfile: %s" % e)
2010
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002011 def extract(self, member, path=""):
2012 """Extract a member from the archive to the current working directory,
2013 using its full name. Its file information is extracted as accurately
2014 as possible. `member' may be a filename or a TarInfo object. You can
2015 specify a different directory using `path'.
2016 """
2017 self._check("r")
2018
Lars Gustäbelc64e4022007-03-13 10:47:19 +00002019 if isinstance(member, basestring):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002020 tarinfo = self.getmember(member)
Lars Gustäbelc64e4022007-03-13 10:47:19 +00002021 else:
2022 tarinfo = member
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002023
Neal Norwitza4f651a2004-07-20 22:07:44 +00002024 # Prepare the link target for makelink().
2025 if tarinfo.islnk():
2026 tarinfo._link_target = os.path.join(path, tarinfo.linkname)
2027
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002028 try:
2029 self._extract_member(tarinfo, os.path.join(path, tarinfo.name))
2030 except EnvironmentError, e:
2031 if self.errorlevel > 0:
2032 raise
2033 else:
2034 if e.filename is None:
2035 self._dbg(1, "tarfile: %s" % e.strerror)
2036 else:
2037 self._dbg(1, "tarfile: %s %r" % (e.strerror, e.filename))
2038 except ExtractError, e:
2039 if self.errorlevel > 1:
2040 raise
2041 else:
2042 self._dbg(1, "tarfile: %s" % e)
2043
2044 def extractfile(self, member):
2045 """Extract a member from the archive as a file object. `member' may be
2046 a filename or a TarInfo object. If `member' is a regular file, a
2047 file-like object is returned. If `member' is a link, a file-like
2048 object is constructed from the link's target. If `member' is none of
2049 the above, None is returned.
2050 The file-like object is read-only and provides the following
2051 methods: read(), readline(), readlines(), seek() and tell()
2052 """
2053 self._check("r")
2054
Lars Gustäbelc64e4022007-03-13 10:47:19 +00002055 if isinstance(member, basestring):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002056 tarinfo = self.getmember(member)
Lars Gustäbelc64e4022007-03-13 10:47:19 +00002057 else:
2058 tarinfo = member
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002059
2060 if tarinfo.isreg():
2061 return self.fileobject(self, tarinfo)
2062
2063 elif tarinfo.type not in SUPPORTED_TYPES:
2064 # If a member's type is unknown, it is treated as a
2065 # regular file.
2066 return self.fileobject(self, tarinfo)
2067
2068 elif tarinfo.islnk() or tarinfo.issym():
2069 if isinstance(self.fileobj, _Stream):
2070 # A small but ugly workaround for the case that someone tries
2071 # to extract a (sym)link as a file-object from a non-seekable
2072 # stream of tar blocks.
Georg Brandle4751e32006-05-18 06:11:19 +00002073 raise StreamError("cannot extract (sym)link as file object")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002074 else:
Georg Brandl7eb4b7d2005-07-22 21:49:32 +00002075 # A (sym)link's file object is its target's file object.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002076 return self.extractfile(self._getmember(tarinfo.linkname,
2077 tarinfo))
2078 else:
2079 # If there's no data associated with the member (directory, chrdev,
2080 # blkdev, etc.), return None instead of a file object.
2081 return None
2082
2083 def _extract_member(self, tarinfo, targetpath):
2084 """Extract the TarInfo object tarinfo to a physical
2085 file called targetpath.
2086 """
2087 # Fetch the TarInfo object for the given name
2088 # and build the destination pathname, replacing
2089 # forward slashes to platform specific separators.
2090 if targetpath[-1:] == "/":
2091 targetpath = targetpath[:-1]
2092 targetpath = os.path.normpath(targetpath)
2093
2094 # Create all upper directories.
2095 upperdirs = os.path.dirname(targetpath)
2096 if upperdirs and not os.path.exists(upperdirs):
Lars Gustäbeld2e22902007-01-23 11:17:33 +00002097 os.makedirs(upperdirs)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002098
2099 if tarinfo.islnk() or tarinfo.issym():
2100 self._dbg(1, "%s -> %s" % (tarinfo.name, tarinfo.linkname))
2101 else:
2102 self._dbg(1, tarinfo.name)
2103
2104 if tarinfo.isreg():
2105 self.makefile(tarinfo, targetpath)
2106 elif tarinfo.isdir():
2107 self.makedir(tarinfo, targetpath)
2108 elif tarinfo.isfifo():
2109 self.makefifo(tarinfo, targetpath)
2110 elif tarinfo.ischr() or tarinfo.isblk():
2111 self.makedev(tarinfo, targetpath)
2112 elif tarinfo.islnk() or tarinfo.issym():
2113 self.makelink(tarinfo, targetpath)
2114 elif tarinfo.type not in SUPPORTED_TYPES:
2115 self.makeunknown(tarinfo, targetpath)
2116 else:
2117 self.makefile(tarinfo, targetpath)
2118
2119 self.chown(tarinfo, targetpath)
2120 if not tarinfo.issym():
2121 self.chmod(tarinfo, targetpath)
2122 self.utime(tarinfo, targetpath)
2123
2124 #--------------------------------------------------------------------------
2125 # Below are the different file methods. They are called via
2126 # _extract_member() when extract() is called. They can be replaced in a
2127 # subclass to implement other functionality.
2128
2129 def makedir(self, tarinfo, targetpath):
2130 """Make a directory called targetpath.
2131 """
2132 try:
2133 os.mkdir(targetpath)
2134 except EnvironmentError, e:
2135 if e.errno != errno.EEXIST:
2136 raise
2137
2138 def makefile(self, tarinfo, targetpath):
2139 """Make a file called targetpath.
2140 """
2141 source = self.extractfile(tarinfo)
2142 target = file(targetpath, "wb")
2143 copyfileobj(source, target)
2144 source.close()
2145 target.close()
2146
2147 def makeunknown(self, tarinfo, targetpath):
2148 """Make a file from a TarInfo object with an unknown type
2149 at targetpath.
2150 """
2151 self.makefile(tarinfo, targetpath)
2152 self._dbg(1, "tarfile: Unknown file type %r, " \
2153 "extracted as regular file." % tarinfo.type)
2154
2155 def makefifo(self, tarinfo, targetpath):
2156 """Make a fifo called targetpath.
2157 """
2158 if hasattr(os, "mkfifo"):
2159 os.mkfifo(targetpath)
2160 else:
Georg Brandle4751e32006-05-18 06:11:19 +00002161 raise ExtractError("fifo not supported by system")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002162
2163 def makedev(self, tarinfo, targetpath):
2164 """Make a character or block device called targetpath.
2165 """
2166 if not hasattr(os, "mknod") or not hasattr(os, "makedev"):
Georg Brandle4751e32006-05-18 06:11:19 +00002167 raise ExtractError("special devices not supported by system")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002168
2169 mode = tarinfo.mode
2170 if tarinfo.isblk():
2171 mode |= stat.S_IFBLK
2172 else:
2173 mode |= stat.S_IFCHR
2174
2175 os.mknod(targetpath, mode,
2176 os.makedev(tarinfo.devmajor, tarinfo.devminor))
2177
2178 def makelink(self, tarinfo, targetpath):
2179 """Make a (symbolic) link called targetpath. If it cannot be created
2180 (platform limitation), we try to make a copy of the referenced file
2181 instead of a link.
2182 """
2183 linkpath = tarinfo.linkname
2184 try:
2185 if tarinfo.issym():
2186 os.symlink(linkpath, targetpath)
2187 else:
Neal Norwitza4f651a2004-07-20 22:07:44 +00002188 # See extract().
2189 os.link(tarinfo._link_target, targetpath)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002190 except AttributeError:
2191 if tarinfo.issym():
2192 linkpath = os.path.join(os.path.dirname(tarinfo.name),
2193 linkpath)
2194 linkpath = normpath(linkpath)
2195
2196 try:
2197 self._extract_member(self.getmember(linkpath), targetpath)
2198 except (EnvironmentError, KeyError), e:
2199 linkpath = os.path.normpath(linkpath)
2200 try:
2201 shutil.copy2(linkpath, targetpath)
2202 except EnvironmentError, e:
Georg Brandle4751e32006-05-18 06:11:19 +00002203 raise IOError("link could not be created")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002204
2205 def chown(self, tarinfo, targetpath):
2206 """Set owner of targetpath according to tarinfo.
2207 """
2208 if pwd and hasattr(os, "geteuid") and os.geteuid() == 0:
2209 # We have to be root to do so.
2210 try:
2211 g = grp.getgrnam(tarinfo.gname)[2]
2212 except KeyError:
2213 try:
2214 g = grp.getgrgid(tarinfo.gid)[2]
2215 except KeyError:
2216 g = os.getgid()
2217 try:
2218 u = pwd.getpwnam(tarinfo.uname)[2]
2219 except KeyError:
2220 try:
2221 u = pwd.getpwuid(tarinfo.uid)[2]
2222 except KeyError:
2223 u = os.getuid()
2224 try:
2225 if tarinfo.issym() and hasattr(os, "lchown"):
2226 os.lchown(targetpath, u, g)
2227 else:
Andrew MacIntyre7970d202003-02-19 12:51:34 +00002228 if sys.platform != "os2emx":
2229 os.chown(targetpath, u, g)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002230 except EnvironmentError, e:
Georg Brandle4751e32006-05-18 06:11:19 +00002231 raise ExtractError("could not change owner")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002232
2233 def chmod(self, tarinfo, targetpath):
2234 """Set file permissions of targetpath according to tarinfo.
2235 """
Jack Jansen834eff62003-03-07 12:47:06 +00002236 if hasattr(os, 'chmod'):
2237 try:
2238 os.chmod(targetpath, tarinfo.mode)
2239 except EnvironmentError, e:
Georg Brandle4751e32006-05-18 06:11:19 +00002240 raise ExtractError("could not change mode")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002241
2242 def utime(self, tarinfo, targetpath):
2243 """Set modification time of targetpath according to tarinfo.
2244 """
Jack Jansen834eff62003-03-07 12:47:06 +00002245 if not hasattr(os, 'utime'):
Tim Petersf9347782003-03-07 15:36:41 +00002246 return
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002247 if sys.platform == "win32" and tarinfo.isdir():
2248 # According to msdn.microsoft.com, it is an error (EACCES)
2249 # to use utime() on directories.
2250 return
2251 try:
2252 os.utime(targetpath, (tarinfo.mtime, tarinfo.mtime))
2253 except EnvironmentError, e:
Georg Brandle4751e32006-05-18 06:11:19 +00002254 raise ExtractError("could not change modification time")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002255
2256 #--------------------------------------------------------------------------
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002257 def next(self):
2258 """Return the next member of the archive as a TarInfo object, when
2259 TarFile is opened for reading. Return None if there is no more
2260 available.
2261 """
2262 self._check("ra")
2263 if self.firstmember is not None:
2264 m = self.firstmember
2265 self.firstmember = None
2266 return m
2267
2268 # Read the next block.
Andrew M. Kuchling864bba12004-07-10 22:02:11 +00002269 self.fileobj.seek(self.offset)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002270 while True:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002271 try:
Lars Gustäbelc64e4022007-03-13 10:47:19 +00002272 tarinfo = self.tarinfo.fromtarfile(self)
2273 if tarinfo is None:
2274 return
2275 self.members.append(tarinfo)
Georg Brandl38c6a222006-05-10 16:26:03 +00002276
Georg Brandlebbeed72006-12-19 22:06:46 +00002277 except HeaderError, e:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002278 if self.ignore_zeros:
Georg Brandlebbeed72006-12-19 22:06:46 +00002279 self._dbg(2, "0x%X: %s" % (self.offset, e))
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002280 self.offset += BLOCKSIZE
2281 continue
2282 else:
Andrew M. Kuchling864bba12004-07-10 22:02:11 +00002283 if self.offset == 0:
Georg Brandlebbeed72006-12-19 22:06:46 +00002284 raise ReadError(str(e))
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002285 return None
2286 break
2287
Georg Brandl38c6a222006-05-10 16:26:03 +00002288 return tarinfo
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002289
2290 #--------------------------------------------------------------------------
2291 # Little helper methods:
2292
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002293 def _getmember(self, name, tarinfo=None):
2294 """Find an archive member by name from bottom to top.
2295 If tarinfo is given, it is used as the starting point.
2296 """
Martin v. Löwisf3c56112004-09-18 09:08:52 +00002297 # Ensure that all members have been loaded.
2298 members = self.getmembers()
2299
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002300 if tarinfo is None:
Martin v. Löwisf3c56112004-09-18 09:08:52 +00002301 end = len(members)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002302 else:
Martin v. Löwisf3c56112004-09-18 09:08:52 +00002303 end = members.index(tarinfo)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002304
2305 for i in xrange(end - 1, -1, -1):
Martin v. Löwisf3c56112004-09-18 09:08:52 +00002306 if name == members[i].name:
2307 return members[i]
Andrew M. Kuchling864bba12004-07-10 22:02:11 +00002308
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002309 def _load(self):
2310 """Read through the entire archive file and look for readable
2311 members.
2312 """
2313 while True:
2314 tarinfo = self.next()
2315 if tarinfo is None:
2316 break
2317 self._loaded = True
2318
2319 def _check(self, mode=None):
2320 """Check if TarFile is still open, and if the operation's mode
2321 corresponds to TarFile's mode.
2322 """
2323 if self.closed:
Georg Brandle4751e32006-05-18 06:11:19 +00002324 raise IOError("%s is closed" % self.__class__.__name__)
Lars Gustäbelc64e4022007-03-13 10:47:19 +00002325 if mode is not None and self.mode not in mode:
2326 raise IOError("bad operation for mode %r" % self.mode)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002327
2328 def __iter__(self):
2329 """Provide an iterator object.
2330 """
2331 if self._loaded:
2332 return iter(self.members)
2333 else:
2334 return TarIter(self)
2335
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002336 def _dbg(self, level, msg):
2337 """Write debugging output to sys.stderr.
2338 """
2339 if level <= self.debug:
2340 print >> sys.stderr, msg
2341# class TarFile
2342
2343class TarIter:
2344 """Iterator Class.
2345
2346 for tarinfo in TarFile(...):
2347 suite...
2348 """
2349
2350 def __init__(self, tarfile):
2351 """Construct a TarIter object.
2352 """
2353 self.tarfile = tarfile
Martin v. Löwis637431b2005-03-03 23:12:42 +00002354 self.index = 0
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002355 def __iter__(self):
2356 """Return iterator object.
2357 """
2358 return self
2359 def next(self):
2360 """Return the next item using TarFile's next() method.
2361 When all members have been read, set TarFile as _loaded.
2362 """
Martin v. Löwis637431b2005-03-03 23:12:42 +00002363 # Fix for SF #1100429: Under rare circumstances it can
2364 # happen that getmembers() is called during iteration,
2365 # which will cause TarIter to stop prematurely.
2366 if not self.tarfile._loaded:
2367 tarinfo = self.tarfile.next()
2368 if not tarinfo:
2369 self.tarfile._loaded = True
2370 raise StopIteration
2371 else:
2372 try:
2373 tarinfo = self.tarfile.members[self.index]
2374 except IndexError:
2375 raise StopIteration
2376 self.index += 1
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002377 return tarinfo
2378
2379# Helper classes for sparse file support
2380class _section:
2381 """Base class for _data and _hole.
2382 """
2383 def __init__(self, offset, size):
2384 self.offset = offset
2385 self.size = size
2386 def __contains__(self, offset):
2387 return self.offset <= offset < self.offset + self.size
2388
2389class _data(_section):
2390 """Represent a data section in a sparse file.
2391 """
2392 def __init__(self, offset, size, realpos):
2393 _section.__init__(self, offset, size)
2394 self.realpos = realpos
2395
2396class _hole(_section):
2397 """Represent a hole section in a sparse file.
2398 """
2399 pass
2400
2401class _ringbuffer(list):
2402 """Ringbuffer class which increases performance
2403 over a regular list.
2404 """
2405 def __init__(self):
2406 self.idx = 0
2407 def find(self, offset):
2408 idx = self.idx
2409 while True:
2410 item = self[idx]
2411 if offset in item:
2412 break
2413 idx += 1
2414 if idx == len(self):
2415 idx = 0
2416 if idx == self.idx:
2417 # End of File
2418 return None
2419 self.idx = idx
2420 return item
2421
2422#---------------------------------------------
2423# zipfile compatible TarFile class
2424#---------------------------------------------
2425TAR_PLAIN = 0 # zipfile.ZIP_STORED
2426TAR_GZIPPED = 8 # zipfile.ZIP_DEFLATED
2427class TarFileCompat:
2428 """TarFile class compatible with standard module zipfile's
2429 ZipFile class.
2430 """
2431 def __init__(self, file, mode="r", compression=TAR_PLAIN):
2432 if compression == TAR_PLAIN:
2433 self.tarfile = TarFile.taropen(file, mode)
2434 elif compression == TAR_GZIPPED:
2435 self.tarfile = TarFile.gzopen(file, mode)
2436 else:
Georg Brandle4751e32006-05-18 06:11:19 +00002437 raise ValueError("unknown compression constant")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002438 if mode[0:1] == "r":
2439 members = self.tarfile.getmembers()
Raymond Hettingera1d09e22005-09-11 16:34:05 +00002440 for m in members:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002441 m.filename = m.name
2442 m.file_size = m.size
2443 m.date_time = time.gmtime(m.mtime)[:6]
2444 def namelist(self):
2445 return map(lambda m: m.name, self.infolist())
2446 def infolist(self):
2447 return filter(lambda m: m.type in REGULAR_TYPES,
2448 self.tarfile.getmembers())
2449 def printdir(self):
2450 self.tarfile.list()
2451 def testzip(self):
2452 return
2453 def getinfo(self, name):
2454 return self.tarfile.getmember(name)
2455 def read(self, name):
2456 return self.tarfile.extractfile(self.tarfile.getmember(name)).read()
2457 def write(self, filename, arcname=None, compress_type=None):
2458 self.tarfile.add(filename, arcname)
2459 def writestr(self, zinfo, bytes):
Raymond Hettingera6172712004-12-31 19:15:26 +00002460 try:
2461 from cStringIO import StringIO
2462 except ImportError:
2463 from StringIO import StringIO
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002464 import calendar
2465 zinfo.name = zinfo.filename
2466 zinfo.size = zinfo.file_size
2467 zinfo.mtime = calendar.timegm(zinfo.date_time)
Raymond Hettingera6172712004-12-31 19:15:26 +00002468 self.tarfile.addfile(zinfo, StringIO(bytes))
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002469 def close(self):
2470 self.tarfile.close()
2471#class TarFileCompat
2472
2473#--------------------
2474# exported functions
2475#--------------------
2476def is_tarfile(name):
2477 """Return True if name points to a tar archive that we
2478 are able to handle, else return False.
2479 """
2480 try:
2481 t = open(name)
2482 t.close()
2483 return True
2484 except TarError:
2485 return False
2486
2487open = TarFile.open