blob: ccbfdde6edeff827e0dc1a93b68d23025fffb763 [file] [log] [blame]
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001#!/usr/bin/env python
2# -*- coding: iso-8859-1 -*-
3#-------------------------------------------------------------------
4# tarfile.py
5#-------------------------------------------------------------------
6# Copyright (C) 2002 Lars Gustäbel <lars@gustaebel.de>
7# All rights reserved.
8#
9# Permission is hereby granted, free of charge, to any person
10# obtaining a copy of this software and associated documentation
11# files (the "Software"), to deal in the Software without
12# restriction, including without limitation the rights to use,
13# copy, modify, merge, publish, distribute, sublicense, and/or sell
14# copies of the Software, and to permit persons to whom the
15# Software is furnished to do so, subject to the following
16# conditions:
17#
18# The above copyright notice and this permission notice shall be
19# included in all copies or substantial portions of the Software.
20#
21# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
22# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
23# OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
24# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
25# HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
26# WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
27# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
28# OTHER DEALINGS IN THE SOFTWARE.
29#
30"""Read from and write to tar format archives.
31"""
32
33__version__ = "$Revision$"
34# $Source$
35
Georg Brandl38c6a222006-05-10 16:26:03 +000036version = "0.8.0"
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +000037__author__ = "Lars Gustäbel (lars@gustaebel.de)"
38__date__ = "$Date$"
39__cvsid__ = "$Id$"
40__credits__ = "Gustavo Niemeyer, Niels Gustäbel, Richard Townsend."
41
42#---------
43# Imports
44#---------
45import sys
46import os
47import shutil
48import stat
49import errno
50import time
51import struct
Georg Brandl3354f282006-10-29 09:16:12 +000052import copy
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +000053
Jack Jansencfc49022003-03-07 13:37:32 +000054if sys.platform == 'mac':
55 # This module needs work for MacOS9, especially in the area of pathname
56 # handling. In many places it is assumed a simple substitution of / by the
57 # local os.path.sep is good enough to convert pathnames, but this does not
58 # work with the mac rooted:path:name versus :nonrooted:path:name syntax
59 raise ImportError, "tarfile does not work for platform==mac"
60
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +000061try:
62 import grp, pwd
63except ImportError:
64 grp = pwd = None
65
66# from tarfile import *
67__all__ = ["TarFile", "TarInfo", "is_tarfile", "TarError"]
68
69#---------------------------------------------------------
70# tar constants
71#---------------------------------------------------------
72NUL = "\0" # the null character
73BLOCKSIZE = 512 # length of processing blocks
74RECORDSIZE = BLOCKSIZE * 20 # length of records
75MAGIC = "ustar" # magic tar string
76VERSION = "00" # version number
77
78LENGTH_NAME = 100 # maximum length of a filename
79LENGTH_LINK = 100 # maximum length of a linkname
80LENGTH_PREFIX = 155 # maximum length of the prefix field
81MAXSIZE_MEMBER = 077777777777L # maximum size of a file (11 octal digits)
82
83REGTYPE = "0" # regular file
84AREGTYPE = "\0" # regular file
85LNKTYPE = "1" # link (inside tarfile)
86SYMTYPE = "2" # symbolic link
87CHRTYPE = "3" # character special device
88BLKTYPE = "4" # block special device
89DIRTYPE = "5" # directory
90FIFOTYPE = "6" # fifo special device
91CONTTYPE = "7" # contiguous file
92
93GNUTYPE_LONGNAME = "L" # GNU tar extension for longnames
94GNUTYPE_LONGLINK = "K" # GNU tar extension for longlink
95GNUTYPE_SPARSE = "S" # GNU tar extension for sparse file
96
97#---------------------------------------------------------
98# tarfile constants
99#---------------------------------------------------------
100SUPPORTED_TYPES = (REGTYPE, AREGTYPE, LNKTYPE, # file types that tarfile
101 SYMTYPE, DIRTYPE, FIFOTYPE, # can cope with.
102 CONTTYPE, CHRTYPE, BLKTYPE,
103 GNUTYPE_LONGNAME, GNUTYPE_LONGLINK,
104 GNUTYPE_SPARSE)
105
106REGULAR_TYPES = (REGTYPE, AREGTYPE, # file types that somehow
107 CONTTYPE, GNUTYPE_SPARSE) # represent regular files
108
109#---------------------------------------------------------
110# Bits used in the mode field, values in octal.
111#---------------------------------------------------------
112S_IFLNK = 0120000 # symbolic link
113S_IFREG = 0100000 # regular file
114S_IFBLK = 0060000 # block device
115S_IFDIR = 0040000 # directory
116S_IFCHR = 0020000 # character device
117S_IFIFO = 0010000 # fifo
118
119TSUID = 04000 # set UID on execution
120TSGID = 02000 # set GID on execution
121TSVTX = 01000 # reserved
122
123TUREAD = 0400 # read by owner
124TUWRITE = 0200 # write by owner
125TUEXEC = 0100 # execute/search by owner
126TGREAD = 0040 # read by group
127TGWRITE = 0020 # write by group
128TGEXEC = 0010 # execute/search by group
129TOREAD = 0004 # read by other
130TOWRITE = 0002 # write by other
131TOEXEC = 0001 # execute/search by other
132
133#---------------------------------------------------------
134# Some useful functions
135#---------------------------------------------------------
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000136
Georg Brandl38c6a222006-05-10 16:26:03 +0000137def stn(s, length):
138 """Convert a python string to a null-terminated string buffer.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000139 """
Georg Brandla32e0a02006-10-24 16:54:16 +0000140 return s[:length] + (length - len(s)) * NUL
Georg Brandl38c6a222006-05-10 16:26:03 +0000141
142def nti(s):
143 """Convert a number field to a python number.
144 """
145 # There are two possible encodings for a number field, see
146 # itn() below.
147 if s[0] != chr(0200):
Georg Brandlded1c4d2006-12-20 11:55:16 +0000148 try:
149 n = int(s.rstrip(NUL + " ") or "0", 8)
150 except ValueError:
151 raise HeaderError("invalid header")
Georg Brandl38c6a222006-05-10 16:26:03 +0000152 else:
153 n = 0L
154 for i in xrange(len(s) - 1):
155 n <<= 8
156 n += ord(s[i + 1])
157 return n
158
159def itn(n, digits=8, posix=False):
160 """Convert a python number to a number field.
161 """
162 # POSIX 1003.1-1988 requires numbers to be encoded as a string of
163 # octal digits followed by a null-byte, this allows values up to
164 # (8**(digits-1))-1. GNU tar allows storing numbers greater than
165 # that if necessary. A leading 0200 byte indicates this particular
166 # encoding, the following digits-1 bytes are a big-endian
167 # representation. This allows values up to (256**(digits-1))-1.
168 if 0 <= n < 8 ** (digits - 1):
169 s = "%0*o" % (digits - 1, n) + NUL
170 else:
171 if posix:
Georg Brandle4751e32006-05-18 06:11:19 +0000172 raise ValueError("overflow in number field")
Georg Brandl38c6a222006-05-10 16:26:03 +0000173
174 if n < 0:
175 # XXX We mimic GNU tar's behaviour with negative numbers,
176 # this could raise OverflowError.
177 n = struct.unpack("L", struct.pack("l", n))[0]
178
179 s = ""
180 for i in xrange(digits - 1):
181 s = chr(n & 0377) + s
182 n >>= 8
183 s = chr(0200) + s
184 return s
185
186def calc_chksums(buf):
187 """Calculate the checksum for a member's header by summing up all
188 characters except for the chksum field which is treated as if
189 it was filled with spaces. According to the GNU tar sources,
190 some tars (Sun and NeXT) calculate chksum with signed char,
191 which will be different if there are chars in the buffer with
192 the high bit set. So we calculate two checksums, unsigned and
193 signed.
194 """
195 unsigned_chksum = 256 + sum(struct.unpack("148B", buf[:148]) + struct.unpack("356B", buf[156:512]))
196 signed_chksum = 256 + sum(struct.unpack("148b", buf[:148]) + struct.unpack("356b", buf[156:512]))
197 return unsigned_chksum, signed_chksum
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000198
199def copyfileobj(src, dst, length=None):
200 """Copy length bytes from fileobj src to fileobj dst.
201 If length is None, copy the entire content.
202 """
203 if length == 0:
204 return
205 if length is None:
206 shutil.copyfileobj(src, dst)
207 return
208
209 BUFSIZE = 16 * 1024
210 blocks, remainder = divmod(length, BUFSIZE)
211 for b in xrange(blocks):
212 buf = src.read(BUFSIZE)
213 if len(buf) < BUFSIZE:
Georg Brandle4751e32006-05-18 06:11:19 +0000214 raise IOError("end of file reached")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000215 dst.write(buf)
216
217 if remainder != 0:
218 buf = src.read(remainder)
219 if len(buf) < remainder:
Georg Brandle4751e32006-05-18 06:11:19 +0000220 raise IOError("end of file reached")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000221 dst.write(buf)
222 return
223
224filemode_table = (
Andrew M. Kuchling8bc462f2004-10-20 11:48:42 +0000225 ((S_IFLNK, "l"),
226 (S_IFREG, "-"),
227 (S_IFBLK, "b"),
228 (S_IFDIR, "d"),
229 (S_IFCHR, "c"),
230 (S_IFIFO, "p")),
231
232 ((TUREAD, "r"),),
233 ((TUWRITE, "w"),),
234 ((TUEXEC|TSUID, "s"),
235 (TSUID, "S"),
236 (TUEXEC, "x")),
237
238 ((TGREAD, "r"),),
239 ((TGWRITE, "w"),),
240 ((TGEXEC|TSGID, "s"),
241 (TSGID, "S"),
242 (TGEXEC, "x")),
243
244 ((TOREAD, "r"),),
245 ((TOWRITE, "w"),),
246 ((TOEXEC|TSVTX, "t"),
247 (TSVTX, "T"),
248 (TOEXEC, "x"))
249)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000250
251def filemode(mode):
252 """Convert a file's mode to a string of the form
253 -rwxrwxrwx.
254 Used by TarFile.list()
255 """
Andrew M. Kuchling8bc462f2004-10-20 11:48:42 +0000256 perm = []
257 for table in filemode_table:
258 for bit, char in table:
259 if mode & bit == bit:
260 perm.append(char)
261 break
262 else:
263 perm.append("-")
264 return "".join(perm)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000265
266if os.sep != "/":
267 normpath = lambda path: os.path.normpath(path).replace(os.sep, "/")
268else:
269 normpath = os.path.normpath
270
271class TarError(Exception):
272 """Base exception."""
273 pass
274class ExtractError(TarError):
275 """General exception for extract errors."""
276 pass
277class ReadError(TarError):
278 """Exception for unreadble tar archives."""
279 pass
280class CompressionError(TarError):
281 """Exception for unavailable compression methods."""
282 pass
283class StreamError(TarError):
284 """Exception for unsupported operations on stream-like TarFiles."""
285 pass
Georg Brandlebbeed72006-12-19 22:06:46 +0000286class HeaderError(TarError):
287 """Exception for invalid headers."""
288 pass
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000289
290#---------------------------
291# internal stream interface
292#---------------------------
293class _LowLevelFile:
294 """Low-level file object. Supports reading and writing.
295 It is used instead of a regular file object for streaming
296 access.
297 """
298
299 def __init__(self, name, mode):
300 mode = {
301 "r": os.O_RDONLY,
302 "w": os.O_WRONLY | os.O_CREAT | os.O_TRUNC,
303 }[mode]
304 if hasattr(os, "O_BINARY"):
305 mode |= os.O_BINARY
306 self.fd = os.open(name, mode)
307
308 def close(self):
309 os.close(self.fd)
310
311 def read(self, size):
312 return os.read(self.fd, size)
313
314 def write(self, s):
315 os.write(self.fd, s)
316
317class _Stream:
318 """Class that serves as an adapter between TarFile and
319 a stream-like object. The stream-like object only
320 needs to have a read() or write() method and is accessed
321 blockwise. Use of gzip or bzip2 compression is possible.
322 A stream-like object could be for example: sys.stdin,
323 sys.stdout, a socket, a tape device etc.
324
325 _Stream is intended to be used only internally.
326 """
327
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000328 def __init__(self, name, mode, comptype, fileobj, bufsize):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000329 """Construct a _Stream object.
330 """
331 self._extfileobj = True
332 if fileobj is None:
333 fileobj = _LowLevelFile(name, mode)
334 self._extfileobj = False
335
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000336 if comptype == '*':
337 # Enable transparent compression detection for the
338 # stream interface
339 fileobj = _StreamProxy(fileobj)
340 comptype = fileobj.getcomptype()
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000341
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000342 self.name = name or ""
343 self.mode = mode
344 self.comptype = comptype
345 self.fileobj = fileobj
346 self.bufsize = bufsize
347 self.buf = ""
348 self.pos = 0L
349 self.closed = False
350
351 if comptype == "gz":
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000352 try:
353 import zlib
354 except ImportError:
Georg Brandle4751e32006-05-18 06:11:19 +0000355 raise CompressionError("zlib module is not available")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000356 self.zlib = zlib
357 self.crc = zlib.crc32("")
358 if mode == "r":
359 self._init_read_gz()
360 else:
361 self._init_write_gz()
362
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000363 if comptype == "bz2":
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000364 try:
365 import bz2
366 except ImportError:
Georg Brandle4751e32006-05-18 06:11:19 +0000367 raise CompressionError("bz2 module is not available")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000368 if mode == "r":
369 self.dbuf = ""
370 self.cmp = bz2.BZ2Decompressor()
371 else:
372 self.cmp = bz2.BZ2Compressor()
373
374 def __del__(self):
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000375 if hasattr(self, "closed") and not self.closed:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000376 self.close()
377
378 def _init_write_gz(self):
379 """Initialize for writing with gzip compression.
380 """
381 self.cmp = self.zlib.compressobj(9, self.zlib.DEFLATED,
382 -self.zlib.MAX_WBITS,
383 self.zlib.DEF_MEM_LEVEL,
384 0)
385 timestamp = struct.pack("<L", long(time.time()))
386 self.__write("\037\213\010\010%s\002\377" % timestamp)
387 if self.name.endswith(".gz"):
388 self.name = self.name[:-3]
389 self.__write(self.name + NUL)
390
391 def write(self, s):
392 """Write string s to the stream.
393 """
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000394 if self.comptype == "gz":
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000395 self.crc = self.zlib.crc32(s, self.crc)
396 self.pos += len(s)
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000397 if self.comptype != "tar":
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000398 s = self.cmp.compress(s)
399 self.__write(s)
400
401 def __write(self, s):
402 """Write string s to the stream if a whole new block
403 is ready to be written.
404 """
405 self.buf += s
406 while len(self.buf) > self.bufsize:
407 self.fileobj.write(self.buf[:self.bufsize])
408 self.buf = self.buf[self.bufsize:]
409
410 def close(self):
411 """Close the _Stream object. No operation should be
412 done on it afterwards.
413 """
414 if self.closed:
415 return
416
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000417 if self.mode == "w" and self.comptype != "tar":
Martin v. Löwisc234a522004-08-22 21:28:33 +0000418 self.buf += self.cmp.flush()
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000419
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000420 if self.mode == "w" and self.buf:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000421 self.fileobj.write(self.buf)
422 self.buf = ""
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000423 if self.comptype == "gz":
Tim Petersa05f6e22006-08-02 05:20:08 +0000424 # The native zlib crc is an unsigned 32-bit integer, but
425 # the Python wrapper implicitly casts that to a signed C
426 # long. So, on a 32-bit box self.crc may "look negative",
427 # while the same crc on a 64-bit box may "look positive".
428 # To avoid irksome warnings from the `struct` module, force
429 # it to look positive on all boxes.
430 self.fileobj.write(struct.pack("<L", self.crc & 0xffffffffL))
Andrew M. Kuchling10a44492003-10-24 17:38:34 +0000431 self.fileobj.write(struct.pack("<L", self.pos & 0xffffFFFFL))
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000432
433 if not self._extfileobj:
434 self.fileobj.close()
435
436 self.closed = True
437
438 def _init_read_gz(self):
439 """Initialize for reading a gzip compressed fileobj.
440 """
441 self.cmp = self.zlib.decompressobj(-self.zlib.MAX_WBITS)
442 self.dbuf = ""
443
444 # taken from gzip.GzipFile with some alterations
445 if self.__read(2) != "\037\213":
Georg Brandle4751e32006-05-18 06:11:19 +0000446 raise ReadError("not a gzip file")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000447 if self.__read(1) != "\010":
Georg Brandle4751e32006-05-18 06:11:19 +0000448 raise CompressionError("unsupported compression method")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000449
450 flag = ord(self.__read(1))
451 self.__read(6)
452
453 if flag & 4:
454 xlen = ord(self.__read(1)) + 256 * ord(self.__read(1))
455 self.read(xlen)
456 if flag & 8:
457 while True:
458 s = self.__read(1)
459 if not s or s == NUL:
460 break
461 if flag & 16:
462 while True:
463 s = self.__read(1)
464 if not s or s == NUL:
465 break
466 if flag & 2:
467 self.__read(2)
468
469 def tell(self):
470 """Return the stream's file pointer position.
471 """
472 return self.pos
473
474 def seek(self, pos=0):
475 """Set the stream's file pointer to pos. Negative seeking
476 is forbidden.
477 """
478 if pos - self.pos >= 0:
479 blocks, remainder = divmod(pos - self.pos, self.bufsize)
480 for i in xrange(blocks):
481 self.read(self.bufsize)
482 self.read(remainder)
483 else:
Georg Brandle4751e32006-05-18 06:11:19 +0000484 raise StreamError("seeking backwards is not allowed")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000485 return self.pos
486
487 def read(self, size=None):
488 """Return the next size number of bytes from the stream.
489 If size is not defined, return all bytes of the stream
490 up to EOF.
491 """
492 if size is None:
493 t = []
494 while True:
495 buf = self._read(self.bufsize)
496 if not buf:
497 break
498 t.append(buf)
499 buf = "".join(t)
500 else:
501 buf = self._read(size)
502 self.pos += len(buf)
503 return buf
504
505 def _read(self, size):
506 """Return size bytes from the stream.
507 """
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000508 if self.comptype == "tar":
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000509 return self.__read(size)
510
511 c = len(self.dbuf)
512 t = [self.dbuf]
513 while c < size:
514 buf = self.__read(self.bufsize)
515 if not buf:
516 break
517 buf = self.cmp.decompress(buf)
518 t.append(buf)
519 c += len(buf)
520 t = "".join(t)
521 self.dbuf = t[size:]
522 return t[:size]
523
524 def __read(self, size):
525 """Return size bytes from stream. If internal buffer is empty,
526 read another block from the stream.
527 """
528 c = len(self.buf)
529 t = [self.buf]
530 while c < size:
531 buf = self.fileobj.read(self.bufsize)
532 if not buf:
533 break
534 t.append(buf)
535 c += len(buf)
536 t = "".join(t)
537 self.buf = t[size:]
538 return t[:size]
539# class _Stream
540
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000541class _StreamProxy(object):
542 """Small proxy class that enables transparent compression
543 detection for the Stream interface (mode 'r|*').
544 """
545
546 def __init__(self, fileobj):
547 self.fileobj = fileobj
548 self.buf = self.fileobj.read(BLOCKSIZE)
549
550 def read(self, size):
551 self.read = self.fileobj.read
552 return self.buf
553
554 def getcomptype(self):
555 if self.buf.startswith("\037\213\010"):
556 return "gz"
557 if self.buf.startswith("BZh91"):
558 return "bz2"
559 return "tar"
560
561 def close(self):
562 self.fileobj.close()
563# class StreamProxy
564
Georg Brandl49c8f4c2006-05-15 19:30:35 +0000565class _BZ2Proxy(object):
566 """Small proxy class that enables external file object
567 support for "r:bz2" and "w:bz2" modes. This is actually
568 a workaround for a limitation in bz2 module's BZ2File
569 class which (unlike gzip.GzipFile) has no support for
570 a file object argument.
571 """
572
573 blocksize = 16 * 1024
574
575 def __init__(self, fileobj, mode):
576 self.fileobj = fileobj
577 self.mode = mode
578 self.init()
579
580 def init(self):
581 import bz2
582 self.pos = 0
583 if self.mode == "r":
584 self.bz2obj = bz2.BZ2Decompressor()
585 self.fileobj.seek(0)
586 self.buf = ""
587 else:
588 self.bz2obj = bz2.BZ2Compressor()
589
590 def read(self, size):
591 b = [self.buf]
592 x = len(self.buf)
593 while x < size:
594 try:
595 raw = self.fileobj.read(self.blocksize)
596 data = self.bz2obj.decompress(raw)
597 b.append(data)
598 except EOFError:
599 break
600 x += len(data)
601 self.buf = "".join(b)
602
603 buf = self.buf[:size]
604 self.buf = self.buf[size:]
605 self.pos += len(buf)
606 return buf
607
608 def seek(self, pos):
609 if pos < self.pos:
610 self.init()
611 self.read(pos - self.pos)
612
613 def tell(self):
614 return self.pos
615
616 def write(self, data):
617 self.pos += len(data)
618 raw = self.bz2obj.compress(data)
619 self.fileobj.write(raw)
620
621 def close(self):
622 if self.mode == "w":
623 raw = self.bz2obj.flush()
624 self.fileobj.write(raw)
Georg Brandle8953182006-05-27 14:02:03 +0000625 self.fileobj.close()
Georg Brandl49c8f4c2006-05-15 19:30:35 +0000626# class _BZ2Proxy
627
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000628#------------------------
629# Extraction file object
630#------------------------
631class ExFileObject(object):
632 """File-like object for reading an archive member.
633 Is returned by TarFile.extractfile(). Support for
634 sparse files included.
635 """
636
637 def __init__(self, tarfile, tarinfo):
638 self.fileobj = tarfile.fileobj
639 self.name = tarinfo.name
640 self.mode = "r"
641 self.closed = False
642 self.offset = tarinfo.offset_data
643 self.size = tarinfo.size
644 self.pos = 0L
645 self.linebuffer = ""
646 if tarinfo.issparse():
647 self.sparse = tarinfo.sparse
648 self.read = self._readsparse
649 else:
650 self.read = self._readnormal
651
652 def __read(self, size):
653 """Overloadable read method.
654 """
655 return self.fileobj.read(size)
656
657 def readline(self, size=-1):
658 """Read a line with approx. size. If size is negative,
659 read a whole line. readline() and read() must not
660 be mixed up (!).
661 """
662 if size < 0:
663 size = sys.maxint
664
665 nl = self.linebuffer.find("\n")
666 if nl >= 0:
667 nl = min(nl, size)
668 else:
669 size -= len(self.linebuffer)
Martin v. Löwisc11d6f12004-08-25 10:52:58 +0000670 while (nl < 0 and size > 0):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000671 buf = self.read(min(size, 100))
672 if not buf:
673 break
674 self.linebuffer += buf
675 size -= len(buf)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000676 nl = self.linebuffer.find("\n")
677 if nl == -1:
678 s = self.linebuffer
679 self.linebuffer = ""
680 return s
681 buf = self.linebuffer[:nl]
682 self.linebuffer = self.linebuffer[nl + 1:]
683 while buf[-1:] == "\r":
684 buf = buf[:-1]
685 return buf + "\n"
686
687 def readlines(self):
688 """Return a list with all (following) lines.
689 """
690 result = []
691 while True:
692 line = self.readline()
693 if not line: break
694 result.append(line)
695 return result
696
697 def _readnormal(self, size=None):
698 """Read operation for regular files.
699 """
700 if self.closed:
Georg Brandle4751e32006-05-18 06:11:19 +0000701 raise ValueError("file is closed")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000702 self.fileobj.seek(self.offset + self.pos)
703 bytesleft = self.size - self.pos
704 if size is None:
705 bytestoread = bytesleft
706 else:
707 bytestoread = min(size, bytesleft)
708 self.pos += bytestoread
709 return self.__read(bytestoread)
710
711 def _readsparse(self, size=None):
712 """Read operation for sparse files.
713 """
714 if self.closed:
Georg Brandle4751e32006-05-18 06:11:19 +0000715 raise ValueError("file is closed")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000716
717 if size is None:
718 size = self.size - self.pos
719
720 data = []
721 while size > 0:
722 buf = self._readsparsesection(size)
723 if not buf:
724 break
725 size -= len(buf)
726 data.append(buf)
727 return "".join(data)
728
729 def _readsparsesection(self, size):
730 """Read a single section of a sparse file.
731 """
732 section = self.sparse.find(self.pos)
733
734 if section is None:
735 return ""
736
737 toread = min(size, section.offset + section.size - self.pos)
738 if isinstance(section, _data):
739 realpos = section.realpos + self.pos - section.offset
740 self.pos += toread
741 self.fileobj.seek(self.offset + realpos)
742 return self.__read(toread)
743 else:
744 self.pos += toread
745 return NUL * toread
746
747 def tell(self):
748 """Return the current file position.
749 """
750 return self.pos
751
752 def seek(self, pos, whence=0):
753 """Seek to a position in the file.
754 """
755 self.linebuffer = ""
756 if whence == 0:
757 self.pos = min(max(pos, 0), self.size)
758 if whence == 1:
759 if pos < 0:
760 self.pos = max(self.pos + pos, 0)
761 else:
762 self.pos = min(self.pos + pos, self.size)
763 if whence == 2:
764 self.pos = max(min(self.size + pos, self.size), 0)
765
766 def close(self):
767 """Close the file object.
768 """
769 self.closed = True
Martin v. Löwisdf241532005-03-03 08:17:42 +0000770
771 def __iter__(self):
772 """Get an iterator over the file object.
773 """
774 if self.closed:
Georg Brandle4751e32006-05-18 06:11:19 +0000775 raise ValueError("I/O operation on closed file")
Martin v. Löwisdf241532005-03-03 08:17:42 +0000776 return self
777
778 def next(self):
779 """Get the next item from the file iterator.
780 """
781 result = self.readline()
782 if not result:
783 raise StopIteration
784 return result
Tim Peterseba28be2005-03-28 01:08:02 +0000785
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000786#class ExFileObject
787
788#------------------
789# Exported Classes
790#------------------
791class TarInfo(object):
792 """Informational class which holds the details about an
793 archive member given by a tar header block.
794 TarInfo objects are returned by TarFile.getmember(),
795 TarFile.getmembers() and TarFile.gettarinfo() and are
796 usually created internally.
797 """
798
799 def __init__(self, name=""):
800 """Construct a TarInfo object. name is the optional name
801 of the member.
802 """
Georg Brandl38c6a222006-05-10 16:26:03 +0000803 self.name = name # member name (dirnames must end with '/')
804 self.mode = 0666 # file permissions
805 self.uid = 0 # user id
806 self.gid = 0 # group id
807 self.size = 0 # file size
808 self.mtime = 0 # modification time
809 self.chksum = 0 # header checksum
810 self.type = REGTYPE # member type
811 self.linkname = "" # link name
812 self.uname = "user" # user name
813 self.gname = "group" # group name
814 self.devmajor = 0 # device major number
815 self.devminor = 0 # device minor number
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000816
Georg Brandl38c6a222006-05-10 16:26:03 +0000817 self.offset = 0 # the tar header starts here
818 self.offset_data = 0 # the file's data starts here
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000819
820 def __repr__(self):
821 return "<%s %r at %#x>" % (self.__class__.__name__,self.name,id(self))
822
Guido van Rossum75b64e62005-01-16 00:16:11 +0000823 @classmethod
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000824 def frombuf(cls, buf):
825 """Construct a TarInfo object from a 512 byte string buffer.
826 """
Georg Brandl38c6a222006-05-10 16:26:03 +0000827 if len(buf) != BLOCKSIZE:
Georg Brandlebbeed72006-12-19 22:06:46 +0000828 raise HeaderError("truncated header")
Georg Brandl38c6a222006-05-10 16:26:03 +0000829 if buf.count(NUL) == BLOCKSIZE:
Georg Brandlebbeed72006-12-19 22:06:46 +0000830 raise HeaderError("empty header")
831
Georg Brandlded1c4d2006-12-20 11:55:16 +0000832 chksum = nti(buf[148:156])
Georg Brandlebbeed72006-12-19 22:06:46 +0000833 if chksum not in calc_chksums(buf):
834 raise HeaderError("bad checksum")
Georg Brandl38c6a222006-05-10 16:26:03 +0000835
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000836 tarinfo = cls()
Georg Brandl38c6a222006-05-10 16:26:03 +0000837 tarinfo.buf = buf
Georg Brandle8953182006-05-27 14:02:03 +0000838 tarinfo.name = buf[0:100].rstrip(NUL)
Georg Brandl38c6a222006-05-10 16:26:03 +0000839 tarinfo.mode = nti(buf[100:108])
840 tarinfo.uid = nti(buf[108:116])
841 tarinfo.gid = nti(buf[116:124])
842 tarinfo.size = nti(buf[124:136])
843 tarinfo.mtime = nti(buf[136:148])
Georg Brandlebbeed72006-12-19 22:06:46 +0000844 tarinfo.chksum = chksum
Georg Brandl38c6a222006-05-10 16:26:03 +0000845 tarinfo.type = buf[156:157]
Georg Brandle8953182006-05-27 14:02:03 +0000846 tarinfo.linkname = buf[157:257].rstrip(NUL)
847 tarinfo.uname = buf[265:297].rstrip(NUL)
848 tarinfo.gname = buf[297:329].rstrip(NUL)
Georg Brandl38c6a222006-05-10 16:26:03 +0000849 tarinfo.devmajor = nti(buf[329:337])
850 tarinfo.devminor = nti(buf[337:345])
Georg Brandl3354f282006-10-29 09:16:12 +0000851 prefix = buf[345:500].rstrip(NUL)
852
853 if prefix and not tarinfo.issparse():
854 tarinfo.name = prefix + "/" + tarinfo.name
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000855
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000856 return tarinfo
857
Georg Brandl38c6a222006-05-10 16:26:03 +0000858 def tobuf(self, posix=False):
Georg Brandl3354f282006-10-29 09:16:12 +0000859 """Return a tar header as a string of 512 byte blocks.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000860 """
Georg Brandl3354f282006-10-29 09:16:12 +0000861 buf = ""
862 type = self.type
863 prefix = ""
864
865 if self.name.endswith("/"):
866 type = DIRTYPE
867
Georg Brandl87fa5592006-12-06 22:21:18 +0000868 if type in (GNUTYPE_LONGNAME, GNUTYPE_LONGLINK):
869 # Prevent "././@LongLink" from being normalized.
870 name = self.name
871 else:
872 name = normpath(self.name)
Georg Brandl3354f282006-10-29 09:16:12 +0000873
874 if type == DIRTYPE:
875 # directories should end with '/'
876 name += "/"
877
878 linkname = self.linkname
879 if linkname:
880 # if linkname is empty we end up with a '.'
881 linkname = normpath(linkname)
882
883 if posix:
884 if self.size > MAXSIZE_MEMBER:
885 raise ValueError("file is too large (>= 8 GB)")
886
887 if len(self.linkname) > LENGTH_LINK:
888 raise ValueError("linkname is too long (>%d)" % (LENGTH_LINK))
889
890 if len(name) > LENGTH_NAME:
891 prefix = name[:LENGTH_PREFIX + 1]
892 while prefix and prefix[-1] != "/":
893 prefix = prefix[:-1]
894
895 name = name[len(prefix):]
896 prefix = prefix[:-1]
897
898 if not prefix or len(name) > LENGTH_NAME:
899 raise ValueError("name is too long")
900
901 else:
902 if len(self.linkname) > LENGTH_LINK:
903 buf += self._create_gnulong(self.linkname, GNUTYPE_LONGLINK)
904
905 if len(name) > LENGTH_NAME:
906 buf += self._create_gnulong(name, GNUTYPE_LONGNAME)
907
Georg Brandl38c6a222006-05-10 16:26:03 +0000908 parts = [
Georg Brandl3354f282006-10-29 09:16:12 +0000909 stn(name, 100),
Georg Brandl38c6a222006-05-10 16:26:03 +0000910 itn(self.mode & 07777, 8, posix),
911 itn(self.uid, 8, posix),
912 itn(self.gid, 8, posix),
913 itn(self.size, 12, posix),
914 itn(self.mtime, 12, posix),
915 " ", # checksum field
Georg Brandl3354f282006-10-29 09:16:12 +0000916 type,
Georg Brandl38c6a222006-05-10 16:26:03 +0000917 stn(self.linkname, 100),
918 stn(MAGIC, 6),
919 stn(VERSION, 2),
920 stn(self.uname, 32),
921 stn(self.gname, 32),
922 itn(self.devmajor, 8, posix),
923 itn(self.devminor, 8, posix),
Georg Brandl3354f282006-10-29 09:16:12 +0000924 stn(prefix, 155)
Georg Brandl38c6a222006-05-10 16:26:03 +0000925 ]
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000926
Georg Brandl3354f282006-10-29 09:16:12 +0000927 buf += struct.pack("%ds" % BLOCKSIZE, "".join(parts))
Georg Brandl87fa5592006-12-06 22:21:18 +0000928 chksum = calc_chksums(buf[-BLOCKSIZE:])[0]
Georg Brandl3354f282006-10-29 09:16:12 +0000929 buf = buf[:-364] + "%06o\0" % chksum + buf[-357:]
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000930 self.buf = buf
931 return buf
932
Georg Brandl3354f282006-10-29 09:16:12 +0000933 def _create_gnulong(self, name, type):
934 """Create a GNU longname/longlink header from name.
935 It consists of an extended tar header, with the length
936 of the longname as size, followed by data blocks,
937 which contain the longname as a null terminated string.
938 """
939 name += NUL
940
941 tarinfo = self.__class__()
942 tarinfo.name = "././@LongLink"
943 tarinfo.type = type
944 tarinfo.mode = 0
945 tarinfo.size = len(name)
946
947 # create extended header
948 buf = tarinfo.tobuf()
949 # create name blocks
950 buf += name
951 blocks, remainder = divmod(len(name), BLOCKSIZE)
952 if remainder > 0:
953 buf += (BLOCKSIZE - remainder) * NUL
954 return buf
955
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000956 def isreg(self):
957 return self.type in REGULAR_TYPES
958 def isfile(self):
959 return self.isreg()
960 def isdir(self):
961 return self.type == DIRTYPE
962 def issym(self):
963 return self.type == SYMTYPE
964 def islnk(self):
965 return self.type == LNKTYPE
966 def ischr(self):
967 return self.type == CHRTYPE
968 def isblk(self):
969 return self.type == BLKTYPE
970 def isfifo(self):
971 return self.type == FIFOTYPE
972 def issparse(self):
973 return self.type == GNUTYPE_SPARSE
974 def isdev(self):
975 return self.type in (CHRTYPE, BLKTYPE, FIFOTYPE)
976# class TarInfo
977
978class TarFile(object):
979 """The TarFile Class provides an interface to tar archives.
980 """
981
982 debug = 0 # May be set from 0 (no msgs) to 3 (all msgs)
983
984 dereference = False # If true, add content of linked file to the
985 # tar file, else the link.
986
987 ignore_zeros = False # If true, skips empty or invalid blocks and
988 # continues processing.
989
990 errorlevel = 0 # If 0, fatal errors only appear in debug
991 # messages (if debug >= 0). If > 0, errors
992 # are passed to the caller as exceptions.
993
Martin v. Löwis75b9da42004-08-18 13:57:44 +0000994 posix = False # If True, generates POSIX.1-1990-compliant
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000995 # archives (no GNU extensions!)
996
997 fileobject = ExFileObject
998
999 def __init__(self, name=None, mode="r", fileobj=None):
1000 """Open an (uncompressed) tar archive `name'. `mode' is either 'r' to
1001 read from an existing archive, 'a' to append data to an existing
1002 file or 'w' to create a new file overwriting an existing one. `mode'
1003 defaults to 'r'.
1004 If `fileobj' is given, it is used for reading or writing data. If it
1005 can be determined, `mode' is overridden by `fileobj's mode.
1006 `fileobj' is not closed, when TarFile is closed.
1007 """
Martin v. Löwisfaffa152005-08-24 06:43:09 +00001008 self.name = name
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001009
1010 if len(mode) > 1 or mode not in "raw":
Georg Brandle4751e32006-05-18 06:11:19 +00001011 raise ValueError("mode must be 'r', 'a' or 'w'")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001012 self._mode = mode
1013 self.mode = {"r": "rb", "a": "r+b", "w": "wb"}[mode]
1014
1015 if not fileobj:
1016 fileobj = file(self.name, self.mode)
1017 self._extfileobj = False
1018 else:
1019 if self.name is None and hasattr(fileobj, "name"):
Martin v. Löwisfaffa152005-08-24 06:43:09 +00001020 self.name = fileobj.name
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001021 if hasattr(fileobj, "mode"):
1022 self.mode = fileobj.mode
1023 self._extfileobj = True
1024 self.fileobj = fileobj
1025
1026 # Init datastructures
Georg Brandl38c6a222006-05-10 16:26:03 +00001027 self.closed = False
1028 self.members = [] # list of members as TarInfo objects
1029 self._loaded = False # flag if all members have been read
1030 self.offset = 0L # current position in the archive file
1031 self.inodes = {} # dictionary caching the inodes of
1032 # archive members already added
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001033
1034 if self._mode == "r":
1035 self.firstmember = None
1036 self.firstmember = self.next()
1037
1038 if self._mode == "a":
1039 # Move to the end of the archive,
1040 # before the first empty block.
1041 self.firstmember = None
1042 while True:
1043 try:
1044 tarinfo = self.next()
1045 except ReadError:
1046 self.fileobj.seek(0)
1047 break
1048 if tarinfo is None:
1049 self.fileobj.seek(- BLOCKSIZE, 1)
1050 break
1051
1052 if self._mode in "aw":
1053 self._loaded = True
1054
1055 #--------------------------------------------------------------------------
1056 # Below are the classmethods which act as alternate constructors to the
1057 # TarFile class. The open() method is the only one that is needed for
1058 # public use; it is the "super"-constructor and is able to select an
1059 # adequate "sub"-constructor for a particular compression using the mapping
1060 # from OPEN_METH.
1061 #
1062 # This concept allows one to subclass TarFile without losing the comfort of
1063 # the super-constructor. A sub-constructor is registered and made available
1064 # by adding it to the mapping in OPEN_METH.
1065
Guido van Rossum75b64e62005-01-16 00:16:11 +00001066 @classmethod
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001067 def open(cls, name=None, mode="r", fileobj=None, bufsize=20*512):
1068 """Open a tar archive for reading, writing or appending. Return
1069 an appropriate TarFile class.
1070
1071 mode:
Martin v. Löwis78be7df2005-03-05 12:47:42 +00001072 'r' or 'r:*' open for reading with transparent compression
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001073 'r:' open for reading exclusively uncompressed
1074 'r:gz' open for reading with gzip compression
1075 'r:bz2' open for reading with bzip2 compression
1076 'a' or 'a:' open for appending
1077 'w' or 'w:' open for writing without compression
1078 'w:gz' open for writing with gzip compression
1079 'w:bz2' open for writing with bzip2 compression
Martin v. Löwis78be7df2005-03-05 12:47:42 +00001080
1081 'r|*' open a stream of tar blocks with transparent compression
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001082 'r|' open an uncompressed stream of tar blocks for reading
1083 'r|gz' open a gzip compressed stream of tar blocks
1084 'r|bz2' open a bzip2 compressed stream of tar blocks
1085 'w|' open an uncompressed stream for writing
1086 'w|gz' open a gzip compressed stream for writing
1087 'w|bz2' open a bzip2 compressed stream for writing
1088 """
1089
1090 if not name and not fileobj:
Georg Brandle4751e32006-05-18 06:11:19 +00001091 raise ValueError("nothing to open")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001092
Martin v. Löwis78be7df2005-03-05 12:47:42 +00001093 if mode in ("r", "r:*"):
1094 # Find out which *open() is appropriate for opening the file.
1095 for comptype in cls.OPEN_METH:
1096 func = getattr(cls, cls.OPEN_METH[comptype])
1097 try:
1098 return func(name, "r", fileobj)
1099 except (ReadError, CompressionError):
1100 continue
Georg Brandle4751e32006-05-18 06:11:19 +00001101 raise ReadError("file could not be opened successfully")
Martin v. Löwis78be7df2005-03-05 12:47:42 +00001102
1103 elif ":" in mode:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001104 filemode, comptype = mode.split(":", 1)
1105 filemode = filemode or "r"
1106 comptype = comptype or "tar"
1107
1108 # Select the *open() function according to
1109 # given compression.
1110 if comptype in cls.OPEN_METH:
1111 func = getattr(cls, cls.OPEN_METH[comptype])
1112 else:
Georg Brandle4751e32006-05-18 06:11:19 +00001113 raise CompressionError("unknown compression type %r" % comptype)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001114 return func(name, filemode, fileobj)
1115
1116 elif "|" in mode:
1117 filemode, comptype = mode.split("|", 1)
1118 filemode = filemode or "r"
1119 comptype = comptype or "tar"
1120
1121 if filemode not in "rw":
Georg Brandle4751e32006-05-18 06:11:19 +00001122 raise ValueError("mode must be 'r' or 'w'")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001123
1124 t = cls(name, filemode,
1125 _Stream(name, filemode, comptype, fileobj, bufsize))
1126 t._extfileobj = False
1127 return t
1128
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001129 elif mode in "aw":
1130 return cls.taropen(name, mode, fileobj)
1131
Georg Brandle4751e32006-05-18 06:11:19 +00001132 raise ValueError("undiscernible mode")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001133
Guido van Rossum75b64e62005-01-16 00:16:11 +00001134 @classmethod
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001135 def taropen(cls, name, mode="r", fileobj=None):
1136 """Open uncompressed tar archive name for reading or writing.
1137 """
1138 if len(mode) > 1 or mode not in "raw":
Georg Brandle4751e32006-05-18 06:11:19 +00001139 raise ValueError("mode must be 'r', 'a' or 'w'")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001140 return cls(name, mode, fileobj)
1141
Guido van Rossum75b64e62005-01-16 00:16:11 +00001142 @classmethod
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001143 def gzopen(cls, name, mode="r", fileobj=None, compresslevel=9):
1144 """Open gzip compressed tar archive name for reading or writing.
1145 Appending is not allowed.
1146 """
1147 if len(mode) > 1 or mode not in "rw":
Georg Brandle4751e32006-05-18 06:11:19 +00001148 raise ValueError("mode must be 'r' or 'w'")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001149
1150 try:
1151 import gzip
Neal Norwitz4ec68242003-04-11 03:05:56 +00001152 gzip.GzipFile
1153 except (ImportError, AttributeError):
Georg Brandle4751e32006-05-18 06:11:19 +00001154 raise CompressionError("gzip module is not available")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001155
1156 pre, ext = os.path.splitext(name)
Martin v. Löwisfaffa152005-08-24 06:43:09 +00001157 pre = os.path.basename(pre)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001158 if ext == ".tgz":
1159 ext = ".tar"
1160 if ext == ".gz":
1161 ext = ""
Martin v. Löwisfaffa152005-08-24 06:43:09 +00001162 tarname = pre + ext
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001163
1164 if fileobj is None:
1165 fileobj = file(name, mode + "b")
1166
Martin v. Löwisfaffa152005-08-24 06:43:09 +00001167 if mode != "r":
1168 name = tarname
1169
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001170 try:
Martin v. Löwisfaffa152005-08-24 06:43:09 +00001171 t = cls.taropen(tarname, mode,
1172 gzip.GzipFile(name, mode, compresslevel, fileobj)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001173 )
1174 except IOError:
Georg Brandle4751e32006-05-18 06:11:19 +00001175 raise ReadError("not a gzip file")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001176 t._extfileobj = False
1177 return t
1178
Guido van Rossum75b64e62005-01-16 00:16:11 +00001179 @classmethod
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001180 def bz2open(cls, name, mode="r", fileobj=None, compresslevel=9):
1181 """Open bzip2 compressed tar archive name for reading or writing.
1182 Appending is not allowed.
1183 """
1184 if len(mode) > 1 or mode not in "rw":
Georg Brandle4751e32006-05-18 06:11:19 +00001185 raise ValueError("mode must be 'r' or 'w'.")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001186
1187 try:
1188 import bz2
1189 except ImportError:
Georg Brandle4751e32006-05-18 06:11:19 +00001190 raise CompressionError("bz2 module is not available")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001191
Martin v. Löwisfaffa152005-08-24 06:43:09 +00001192 pre, ext = os.path.splitext(name)
1193 pre = os.path.basename(pre)
1194 if ext == ".tbz2":
1195 ext = ".tar"
1196 if ext == ".bz2":
1197 ext = ""
1198 tarname = pre + ext
1199
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001200 if fileobj is not None:
Georg Brandl49c8f4c2006-05-15 19:30:35 +00001201 fileobj = _BZ2Proxy(fileobj, mode)
1202 else:
1203 fileobj = bz2.BZ2File(name, mode, compresslevel=compresslevel)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001204
1205 try:
Georg Brandl49c8f4c2006-05-15 19:30:35 +00001206 t = cls.taropen(tarname, mode, fileobj)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001207 except IOError:
Georg Brandle4751e32006-05-18 06:11:19 +00001208 raise ReadError("not a bzip2 file")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001209 t._extfileobj = False
1210 return t
1211
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001212 # All *open() methods are registered here.
1213 OPEN_METH = {
1214 "tar": "taropen", # uncompressed tar
1215 "gz": "gzopen", # gzip compressed tar
1216 "bz2": "bz2open" # bzip2 compressed tar
1217 }
1218
1219 #--------------------------------------------------------------------------
1220 # The public methods which TarFile provides:
1221
1222 def close(self):
1223 """Close the TarFile. In write-mode, two finishing zero blocks are
1224 appended to the archive.
1225 """
1226 if self.closed:
1227 return
1228
1229 if self._mode in "aw":
1230 self.fileobj.write(NUL * (BLOCKSIZE * 2))
1231 self.offset += (BLOCKSIZE * 2)
1232 # fill up the end with zero-blocks
1233 # (like option -b20 for tar does)
1234 blocks, remainder = divmod(self.offset, RECORDSIZE)
1235 if remainder > 0:
1236 self.fileobj.write(NUL * (RECORDSIZE - remainder))
1237
1238 if not self._extfileobj:
1239 self.fileobj.close()
1240 self.closed = True
1241
1242 def getmember(self, name):
1243 """Return a TarInfo object for member `name'. If `name' can not be
1244 found in the archive, KeyError is raised. If a member occurs more
1245 than once in the archive, its last occurence is assumed to be the
1246 most up-to-date version.
1247 """
Martin v. Löwisf3c56112004-09-18 09:08:52 +00001248 tarinfo = self._getmember(name)
1249 if tarinfo is None:
Georg Brandle4751e32006-05-18 06:11:19 +00001250 raise KeyError("filename %r not found" % name)
Martin v. Löwisf3c56112004-09-18 09:08:52 +00001251 return tarinfo
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001252
1253 def getmembers(self):
1254 """Return the members of the archive as a list of TarInfo objects. The
1255 list has the same order as the members in the archive.
1256 """
1257 self._check()
1258 if not self._loaded: # if we want to obtain a list of
1259 self._load() # all members, we first have to
1260 # scan the whole archive.
1261 return self.members
1262
1263 def getnames(self):
1264 """Return the members of the archive as a list of their names. It has
1265 the same order as the list returned by getmembers().
1266 """
Martin v. Löwisf3c56112004-09-18 09:08:52 +00001267 return [tarinfo.name for tarinfo in self.getmembers()]
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001268
1269 def gettarinfo(self, name=None, arcname=None, fileobj=None):
1270 """Create a TarInfo object for either the file `name' or the file
1271 object `fileobj' (using os.fstat on its file descriptor). You can
1272 modify some of the TarInfo's attributes before you add it using
1273 addfile(). If given, `arcname' specifies an alternative name for the
1274 file in the archive.
1275 """
1276 self._check("aw")
1277
1278 # When fileobj is given, replace name by
1279 # fileobj's real name.
1280 if fileobj is not None:
1281 name = fileobj.name
1282
1283 # Building the name of the member in the archive.
1284 # Backward slashes are converted to forward slashes,
1285 # Absolute paths are turned to relative paths.
1286 if arcname is None:
1287 arcname = name
1288 arcname = normpath(arcname)
1289 drv, arcname = os.path.splitdrive(arcname)
1290 while arcname[0:1] == "/":
1291 arcname = arcname[1:]
1292
1293 # Now, fill the TarInfo object with
1294 # information specific for the file.
1295 tarinfo = TarInfo()
1296
1297 # Use os.stat or os.lstat, depending on platform
1298 # and if symlinks shall be resolved.
1299 if fileobj is None:
1300 if hasattr(os, "lstat") and not self.dereference:
1301 statres = os.lstat(name)
1302 else:
1303 statres = os.stat(name)
1304 else:
1305 statres = os.fstat(fileobj.fileno())
1306 linkname = ""
1307
1308 stmd = statres.st_mode
1309 if stat.S_ISREG(stmd):
1310 inode = (statres.st_ino, statres.st_dev)
Neal Norwitzb0e32e22005-10-20 04:50:13 +00001311 if not self.dereference and \
1312 statres.st_nlink > 1 and inode in self.inodes:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001313 # Is it a hardlink to an already
1314 # archived file?
1315 type = LNKTYPE
1316 linkname = self.inodes[inode]
1317 else:
1318 # The inode is added only if its valid.
1319 # For win32 it is always 0.
1320 type = REGTYPE
1321 if inode[0]:
1322 self.inodes[inode] = arcname
1323 elif stat.S_ISDIR(stmd):
1324 type = DIRTYPE
1325 if arcname[-1:] != "/":
1326 arcname += "/"
1327 elif stat.S_ISFIFO(stmd):
1328 type = FIFOTYPE
1329 elif stat.S_ISLNK(stmd):
1330 type = SYMTYPE
1331 linkname = os.readlink(name)
1332 elif stat.S_ISCHR(stmd):
1333 type = CHRTYPE
1334 elif stat.S_ISBLK(stmd):
1335 type = BLKTYPE
1336 else:
1337 return None
1338
1339 # Fill the TarInfo object with all
1340 # information we can get.
Martin v. Löwis5dbdc592005-08-27 10:07:56 +00001341 tarinfo.name = arcname
1342 tarinfo.mode = stmd
1343 tarinfo.uid = statres.st_uid
1344 tarinfo.gid = statres.st_gid
1345 if stat.S_ISREG(stmd):
Martin v. Löwis61d77e02004-08-20 06:35:46 +00001346 tarinfo.size = statres.st_size
Martin v. Löwis5dbdc592005-08-27 10:07:56 +00001347 else:
1348 tarinfo.size = 0L
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001349 tarinfo.mtime = statres.st_mtime
Martin v. Löwis5dbdc592005-08-27 10:07:56 +00001350 tarinfo.type = type
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001351 tarinfo.linkname = linkname
1352 if pwd:
1353 try:
1354 tarinfo.uname = pwd.getpwuid(tarinfo.uid)[0]
1355 except KeyError:
1356 pass
1357 if grp:
1358 try:
1359 tarinfo.gname = grp.getgrgid(tarinfo.gid)[0]
1360 except KeyError:
1361 pass
1362
1363 if type in (CHRTYPE, BLKTYPE):
1364 if hasattr(os, "major") and hasattr(os, "minor"):
1365 tarinfo.devmajor = os.major(statres.st_rdev)
1366 tarinfo.devminor = os.minor(statres.st_rdev)
1367 return tarinfo
1368
1369 def list(self, verbose=True):
1370 """Print a table of contents to sys.stdout. If `verbose' is False, only
1371 the names of the members are printed. If it is True, an `ls -l'-like
1372 output is produced.
1373 """
1374 self._check()
1375
1376 for tarinfo in self:
1377 if verbose:
1378 print filemode(tarinfo.mode),
1379 print "%s/%s" % (tarinfo.uname or tarinfo.uid,
1380 tarinfo.gname or tarinfo.gid),
1381 if tarinfo.ischr() or tarinfo.isblk():
1382 print "%10s" % ("%d,%d" \
1383 % (tarinfo.devmajor, tarinfo.devminor)),
1384 else:
1385 print "%10d" % tarinfo.size,
1386 print "%d-%02d-%02d %02d:%02d:%02d" \
1387 % time.localtime(tarinfo.mtime)[:6],
1388
1389 print tarinfo.name,
1390
1391 if verbose:
1392 if tarinfo.issym():
1393 print "->", tarinfo.linkname,
1394 if tarinfo.islnk():
1395 print "link to", tarinfo.linkname,
1396 print
1397
1398 def add(self, name, arcname=None, recursive=True):
1399 """Add the file `name' to the archive. `name' may be any type of file
1400 (directory, fifo, symbolic link, etc.). If given, `arcname'
1401 specifies an alternative name for the file in the archive.
1402 Directories are added recursively by default. This can be avoided by
1403 setting `recursive' to False.
1404 """
1405 self._check("aw")
1406
1407 if arcname is None:
1408 arcname = name
1409
1410 # Skip if somebody tries to archive the archive...
Martin v. Löwisfaffa152005-08-24 06:43:09 +00001411 if self.name is not None \
1412 and os.path.abspath(name) == os.path.abspath(self.name):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001413 self._dbg(2, "tarfile: Skipped %r" % name)
1414 return
1415
1416 # Special case: The user wants to add the current
1417 # working directory.
1418 if name == ".":
1419 if recursive:
1420 if arcname == ".":
1421 arcname = ""
1422 for f in os.listdir("."):
1423 self.add(f, os.path.join(arcname, f))
1424 return
1425
1426 self._dbg(1, name)
1427
1428 # Create a TarInfo object from the file.
1429 tarinfo = self.gettarinfo(name, arcname)
1430
1431 if tarinfo is None:
1432 self._dbg(1, "tarfile: Unsupported type %r" % name)
1433 return
1434
1435 # Append the tar header and data to the archive.
1436 if tarinfo.isreg():
1437 f = file(name, "rb")
1438 self.addfile(tarinfo, f)
1439 f.close()
1440
Martin v. Löwis5dbdc592005-08-27 10:07:56 +00001441 elif tarinfo.isdir():
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001442 self.addfile(tarinfo)
1443 if recursive:
1444 for f in os.listdir(name):
1445 self.add(os.path.join(name, f), os.path.join(arcname, f))
1446
Martin v. Löwis5dbdc592005-08-27 10:07:56 +00001447 else:
1448 self.addfile(tarinfo)
1449
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001450 def addfile(self, tarinfo, fileobj=None):
1451 """Add the TarInfo object `tarinfo' to the archive. If `fileobj' is
1452 given, tarinfo.size bytes are read from it and added to the archive.
1453 You can create TarInfo objects using gettarinfo().
1454 On Windows platforms, `fileobj' should always be opened with mode
1455 'rb' to avoid irritation about the file size.
1456 """
1457 self._check("aw")
1458
Georg Brandl3354f282006-10-29 09:16:12 +00001459 tarinfo = copy.copy(tarinfo)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001460
Georg Brandl3354f282006-10-29 09:16:12 +00001461 buf = tarinfo.tobuf(self.posix)
1462 self.fileobj.write(buf)
1463 self.offset += len(buf)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001464
1465 # If there's data to follow, append it.
1466 if fileobj is not None:
1467 copyfileobj(fileobj, self.fileobj, tarinfo.size)
1468 blocks, remainder = divmod(tarinfo.size, BLOCKSIZE)
1469 if remainder > 0:
1470 self.fileobj.write(NUL * (BLOCKSIZE - remainder))
1471 blocks += 1
1472 self.offset += blocks * BLOCKSIZE
1473
Martin v. Löwisf3c56112004-09-18 09:08:52 +00001474 self.members.append(tarinfo)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001475
Martin v. Löwis00a73e72005-03-04 19:40:34 +00001476 def extractall(self, path=".", members=None):
1477 """Extract all members from the archive to the current working
1478 directory and set owner, modification time and permissions on
1479 directories afterwards. `path' specifies a different directory
1480 to extract to. `members' is optional and must be a subset of the
1481 list returned by getmembers().
1482 """
1483 directories = []
1484
1485 if members is None:
1486 members = self
1487
1488 for tarinfo in members:
1489 if tarinfo.isdir():
1490 # Extract directory with a safe mode, so that
1491 # all files below can be extracted as well.
1492 try:
1493 os.makedirs(os.path.join(path, tarinfo.name), 0777)
1494 except EnvironmentError:
1495 pass
1496 directories.append(tarinfo)
1497 else:
1498 self.extract(tarinfo, path)
1499
1500 # Reverse sort directories.
1501 directories.sort(lambda a, b: cmp(a.name, b.name))
1502 directories.reverse()
1503
1504 # Set correct owner, mtime and filemode on directories.
1505 for tarinfo in directories:
1506 path = os.path.join(path, tarinfo.name)
1507 try:
1508 self.chown(tarinfo, path)
1509 self.utime(tarinfo, path)
1510 self.chmod(tarinfo, path)
1511 except ExtractError, e:
1512 if self.errorlevel > 1:
1513 raise
1514 else:
1515 self._dbg(1, "tarfile: %s" % e)
1516
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001517 def extract(self, member, path=""):
1518 """Extract a member from the archive to the current working directory,
1519 using its full name. Its file information is extracted as accurately
1520 as possible. `member' may be a filename or a TarInfo object. You can
1521 specify a different directory using `path'.
1522 """
1523 self._check("r")
1524
1525 if isinstance(member, TarInfo):
1526 tarinfo = member
1527 else:
1528 tarinfo = self.getmember(member)
1529
Neal Norwitza4f651a2004-07-20 22:07:44 +00001530 # Prepare the link target for makelink().
1531 if tarinfo.islnk():
1532 tarinfo._link_target = os.path.join(path, tarinfo.linkname)
1533
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001534 try:
1535 self._extract_member(tarinfo, os.path.join(path, tarinfo.name))
1536 except EnvironmentError, e:
1537 if self.errorlevel > 0:
1538 raise
1539 else:
1540 if e.filename is None:
1541 self._dbg(1, "tarfile: %s" % e.strerror)
1542 else:
1543 self._dbg(1, "tarfile: %s %r" % (e.strerror, e.filename))
1544 except ExtractError, e:
1545 if self.errorlevel > 1:
1546 raise
1547 else:
1548 self._dbg(1, "tarfile: %s" % e)
1549
1550 def extractfile(self, member):
1551 """Extract a member from the archive as a file object. `member' may be
1552 a filename or a TarInfo object. If `member' is a regular file, a
1553 file-like object is returned. If `member' is a link, a file-like
1554 object is constructed from the link's target. If `member' is none of
1555 the above, None is returned.
1556 The file-like object is read-only and provides the following
1557 methods: read(), readline(), readlines(), seek() and tell()
1558 """
1559 self._check("r")
1560
1561 if isinstance(member, TarInfo):
1562 tarinfo = member
1563 else:
1564 tarinfo = self.getmember(member)
1565
1566 if tarinfo.isreg():
1567 return self.fileobject(self, tarinfo)
1568
1569 elif tarinfo.type not in SUPPORTED_TYPES:
1570 # If a member's type is unknown, it is treated as a
1571 # regular file.
1572 return self.fileobject(self, tarinfo)
1573
1574 elif tarinfo.islnk() or tarinfo.issym():
1575 if isinstance(self.fileobj, _Stream):
1576 # A small but ugly workaround for the case that someone tries
1577 # to extract a (sym)link as a file-object from a non-seekable
1578 # stream of tar blocks.
Georg Brandle4751e32006-05-18 06:11:19 +00001579 raise StreamError("cannot extract (sym)link as file object")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001580 else:
Georg Brandl7eb4b7d2005-07-22 21:49:32 +00001581 # A (sym)link's file object is its target's file object.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001582 return self.extractfile(self._getmember(tarinfo.linkname,
1583 tarinfo))
1584 else:
1585 # If there's no data associated with the member (directory, chrdev,
1586 # blkdev, etc.), return None instead of a file object.
1587 return None
1588
1589 def _extract_member(self, tarinfo, targetpath):
1590 """Extract the TarInfo object tarinfo to a physical
1591 file called targetpath.
1592 """
1593 # Fetch the TarInfo object for the given name
1594 # and build the destination pathname, replacing
1595 # forward slashes to platform specific separators.
1596 if targetpath[-1:] == "/":
1597 targetpath = targetpath[:-1]
1598 targetpath = os.path.normpath(targetpath)
1599
1600 # Create all upper directories.
1601 upperdirs = os.path.dirname(targetpath)
1602 if upperdirs and not os.path.exists(upperdirs):
1603 ti = TarInfo()
1604 ti.name = upperdirs
1605 ti.type = DIRTYPE
1606 ti.mode = 0777
1607 ti.mtime = tarinfo.mtime
1608 ti.uid = tarinfo.uid
1609 ti.gid = tarinfo.gid
1610 ti.uname = tarinfo.uname
1611 ti.gname = tarinfo.gname
1612 try:
1613 self._extract_member(ti, ti.name)
1614 except:
1615 pass
1616
1617 if tarinfo.islnk() or tarinfo.issym():
1618 self._dbg(1, "%s -> %s" % (tarinfo.name, tarinfo.linkname))
1619 else:
1620 self._dbg(1, tarinfo.name)
1621
1622 if tarinfo.isreg():
1623 self.makefile(tarinfo, targetpath)
1624 elif tarinfo.isdir():
1625 self.makedir(tarinfo, targetpath)
1626 elif tarinfo.isfifo():
1627 self.makefifo(tarinfo, targetpath)
1628 elif tarinfo.ischr() or tarinfo.isblk():
1629 self.makedev(tarinfo, targetpath)
1630 elif tarinfo.islnk() or tarinfo.issym():
1631 self.makelink(tarinfo, targetpath)
1632 elif tarinfo.type not in SUPPORTED_TYPES:
1633 self.makeunknown(tarinfo, targetpath)
1634 else:
1635 self.makefile(tarinfo, targetpath)
1636
1637 self.chown(tarinfo, targetpath)
1638 if not tarinfo.issym():
1639 self.chmod(tarinfo, targetpath)
1640 self.utime(tarinfo, targetpath)
1641
1642 #--------------------------------------------------------------------------
1643 # Below are the different file methods. They are called via
1644 # _extract_member() when extract() is called. They can be replaced in a
1645 # subclass to implement other functionality.
1646
1647 def makedir(self, tarinfo, targetpath):
1648 """Make a directory called targetpath.
1649 """
1650 try:
1651 os.mkdir(targetpath)
1652 except EnvironmentError, e:
1653 if e.errno != errno.EEXIST:
1654 raise
1655
1656 def makefile(self, tarinfo, targetpath):
1657 """Make a file called targetpath.
1658 """
1659 source = self.extractfile(tarinfo)
1660 target = file(targetpath, "wb")
1661 copyfileobj(source, target)
1662 source.close()
1663 target.close()
1664
1665 def makeunknown(self, tarinfo, targetpath):
1666 """Make a file from a TarInfo object with an unknown type
1667 at targetpath.
1668 """
1669 self.makefile(tarinfo, targetpath)
1670 self._dbg(1, "tarfile: Unknown file type %r, " \
1671 "extracted as regular file." % tarinfo.type)
1672
1673 def makefifo(self, tarinfo, targetpath):
1674 """Make a fifo called targetpath.
1675 """
1676 if hasattr(os, "mkfifo"):
1677 os.mkfifo(targetpath)
1678 else:
Georg Brandle4751e32006-05-18 06:11:19 +00001679 raise ExtractError("fifo not supported by system")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001680
1681 def makedev(self, tarinfo, targetpath):
1682 """Make a character or block device called targetpath.
1683 """
1684 if not hasattr(os, "mknod") or not hasattr(os, "makedev"):
Georg Brandle4751e32006-05-18 06:11:19 +00001685 raise ExtractError("special devices not supported by system")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001686
1687 mode = tarinfo.mode
1688 if tarinfo.isblk():
1689 mode |= stat.S_IFBLK
1690 else:
1691 mode |= stat.S_IFCHR
1692
1693 os.mknod(targetpath, mode,
1694 os.makedev(tarinfo.devmajor, tarinfo.devminor))
1695
1696 def makelink(self, tarinfo, targetpath):
1697 """Make a (symbolic) link called targetpath. If it cannot be created
1698 (platform limitation), we try to make a copy of the referenced file
1699 instead of a link.
1700 """
1701 linkpath = tarinfo.linkname
1702 try:
1703 if tarinfo.issym():
1704 os.symlink(linkpath, targetpath)
1705 else:
Neal Norwitza4f651a2004-07-20 22:07:44 +00001706 # See extract().
1707 os.link(tarinfo._link_target, targetpath)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001708 except AttributeError:
1709 if tarinfo.issym():
1710 linkpath = os.path.join(os.path.dirname(tarinfo.name),
1711 linkpath)
1712 linkpath = normpath(linkpath)
1713
1714 try:
1715 self._extract_member(self.getmember(linkpath), targetpath)
1716 except (EnvironmentError, KeyError), e:
1717 linkpath = os.path.normpath(linkpath)
1718 try:
1719 shutil.copy2(linkpath, targetpath)
1720 except EnvironmentError, e:
Georg Brandle4751e32006-05-18 06:11:19 +00001721 raise IOError("link could not be created")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001722
1723 def chown(self, tarinfo, targetpath):
1724 """Set owner of targetpath according to tarinfo.
1725 """
1726 if pwd and hasattr(os, "geteuid") and os.geteuid() == 0:
1727 # We have to be root to do so.
1728 try:
1729 g = grp.getgrnam(tarinfo.gname)[2]
1730 except KeyError:
1731 try:
1732 g = grp.getgrgid(tarinfo.gid)[2]
1733 except KeyError:
1734 g = os.getgid()
1735 try:
1736 u = pwd.getpwnam(tarinfo.uname)[2]
1737 except KeyError:
1738 try:
1739 u = pwd.getpwuid(tarinfo.uid)[2]
1740 except KeyError:
1741 u = os.getuid()
1742 try:
1743 if tarinfo.issym() and hasattr(os, "lchown"):
1744 os.lchown(targetpath, u, g)
1745 else:
Andrew MacIntyre7970d202003-02-19 12:51:34 +00001746 if sys.platform != "os2emx":
1747 os.chown(targetpath, u, g)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001748 except EnvironmentError, e:
Georg Brandle4751e32006-05-18 06:11:19 +00001749 raise ExtractError("could not change owner")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001750
1751 def chmod(self, tarinfo, targetpath):
1752 """Set file permissions of targetpath according to tarinfo.
1753 """
Jack Jansen834eff62003-03-07 12:47:06 +00001754 if hasattr(os, 'chmod'):
1755 try:
1756 os.chmod(targetpath, tarinfo.mode)
1757 except EnvironmentError, e:
Georg Brandle4751e32006-05-18 06:11:19 +00001758 raise ExtractError("could not change mode")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001759
1760 def utime(self, tarinfo, targetpath):
1761 """Set modification time of targetpath according to tarinfo.
1762 """
Jack Jansen834eff62003-03-07 12:47:06 +00001763 if not hasattr(os, 'utime'):
Tim Petersf9347782003-03-07 15:36:41 +00001764 return
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001765 if sys.platform == "win32" and tarinfo.isdir():
1766 # According to msdn.microsoft.com, it is an error (EACCES)
1767 # to use utime() on directories.
1768 return
1769 try:
1770 os.utime(targetpath, (tarinfo.mtime, tarinfo.mtime))
1771 except EnvironmentError, e:
Georg Brandle4751e32006-05-18 06:11:19 +00001772 raise ExtractError("could not change modification time")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001773
1774 #--------------------------------------------------------------------------
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001775 def next(self):
1776 """Return the next member of the archive as a TarInfo object, when
1777 TarFile is opened for reading. Return None if there is no more
1778 available.
1779 """
1780 self._check("ra")
1781 if self.firstmember is not None:
1782 m = self.firstmember
1783 self.firstmember = None
1784 return m
1785
1786 # Read the next block.
Andrew M. Kuchling864bba12004-07-10 22:02:11 +00001787 self.fileobj.seek(self.offset)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001788 while True:
1789 buf = self.fileobj.read(BLOCKSIZE)
1790 if not buf:
1791 return None
Georg Brandl38c6a222006-05-10 16:26:03 +00001792
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001793 try:
1794 tarinfo = TarInfo.frombuf(buf)
Tim Peters8a299d22006-05-19 19:16:34 +00001795
Georg Brandl38c6a222006-05-10 16:26:03 +00001796 # Set the TarInfo object's offset to the current position of the
1797 # TarFile and set self.offset to the position where the data blocks
1798 # should begin.
1799 tarinfo.offset = self.offset
1800 self.offset += BLOCKSIZE
1801
1802 tarinfo = self.proc_member(tarinfo)
1803
Georg Brandlebbeed72006-12-19 22:06:46 +00001804 except HeaderError, e:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001805 if self.ignore_zeros:
Georg Brandlebbeed72006-12-19 22:06:46 +00001806 self._dbg(2, "0x%X: %s" % (self.offset, e))
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001807 self.offset += BLOCKSIZE
1808 continue
1809 else:
Andrew M. Kuchling864bba12004-07-10 22:02:11 +00001810 if self.offset == 0:
Georg Brandlebbeed72006-12-19 22:06:46 +00001811 raise ReadError(str(e))
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001812 return None
1813 break
1814
Georg Brandl38c6a222006-05-10 16:26:03 +00001815 # Some old tar programs represent a directory as a regular
1816 # file with a trailing slash.
1817 if tarinfo.isreg() and tarinfo.name.endswith("/"):
1818 tarinfo.type = DIRTYPE
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001819
Georg Brandl38c6a222006-05-10 16:26:03 +00001820 # Directory names should have a '/' at the end.
1821 if tarinfo.isdir():
1822 tarinfo.name += "/"
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001823
Martin v. Löwisf3c56112004-09-18 09:08:52 +00001824 self.members.append(tarinfo)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001825 return tarinfo
1826
1827 #--------------------------------------------------------------------------
Georg Brandl38c6a222006-05-10 16:26:03 +00001828 # The following are methods that are called depending on the type of a
1829 # member. The entry point is proc_member() which is called with a TarInfo
1830 # object created from the header block from the current offset. The
1831 # proc_member() method can be overridden in a subclass to add custom
1832 # proc_*() methods. A proc_*() method MUST implement the following
1833 # operations:
1834 # 1. Set tarinfo.offset_data to the position where the data blocks begin,
1835 # if there is data that follows.
1836 # 2. Set self.offset to the position where the next member's header will
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001837 # begin.
Georg Brandl38c6a222006-05-10 16:26:03 +00001838 # 3. Return tarinfo or another valid TarInfo object.
1839 def proc_member(self, tarinfo):
1840 """Choose the right processing method for tarinfo depending
1841 on its type and call it.
1842 """
1843 if tarinfo.type in (GNUTYPE_LONGNAME, GNUTYPE_LONGLINK):
1844 return self.proc_gnulong(tarinfo)
1845 elif tarinfo.type == GNUTYPE_SPARSE:
1846 return self.proc_sparse(tarinfo)
1847 else:
1848 return self.proc_builtin(tarinfo)
1849
1850 def proc_builtin(self, tarinfo):
1851 """Process a builtin type member or an unknown member
1852 which will be treated as a regular file.
1853 """
1854 tarinfo.offset_data = self.offset
1855 if tarinfo.isreg() or tarinfo.type not in SUPPORTED_TYPES:
1856 # Skip the following data blocks.
1857 self.offset += self._block(tarinfo.size)
1858 return tarinfo
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001859
1860 def proc_gnulong(self, tarinfo):
Georg Brandl38c6a222006-05-10 16:26:03 +00001861 """Process the blocks that hold a GNU longname
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001862 or longlink member.
1863 """
1864 buf = ""
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001865 count = tarinfo.size
1866 while count > 0:
1867 block = self.fileobj.read(BLOCKSIZE)
1868 buf += block
1869 self.offset += BLOCKSIZE
1870 count -= BLOCKSIZE
1871
Georg Brandl38c6a222006-05-10 16:26:03 +00001872 # Fetch the next header and process it.
1873 b = self.fileobj.read(BLOCKSIZE)
1874 t = TarInfo.frombuf(b)
1875 t.offset = self.offset
1876 self.offset += BLOCKSIZE
1877 next = self.proc_member(t)
Andrew M. Kuchling864bba12004-07-10 22:02:11 +00001878
Georg Brandl38c6a222006-05-10 16:26:03 +00001879 # Patch the TarInfo object from the next header with
1880 # the longname information.
Andrew M. Kuchling864bba12004-07-10 22:02:11 +00001881 next.offset = tarinfo.offset
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001882 if tarinfo.type == GNUTYPE_LONGNAME:
Georg Brandle8953182006-05-27 14:02:03 +00001883 next.name = buf.rstrip(NUL)
Andrew M. Kuchling864bba12004-07-10 22:02:11 +00001884 elif tarinfo.type == GNUTYPE_LONGLINK:
Georg Brandle8953182006-05-27 14:02:03 +00001885 next.linkname = buf.rstrip(NUL)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001886
Andrew M. Kuchling864bba12004-07-10 22:02:11 +00001887 return next
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001888
1889 def proc_sparse(self, tarinfo):
Georg Brandl38c6a222006-05-10 16:26:03 +00001890 """Process a GNU sparse header plus extra headers.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001891 """
Georg Brandl38c6a222006-05-10 16:26:03 +00001892 buf = tarinfo.buf
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001893 sp = _ringbuffer()
1894 pos = 386
1895 lastpos = 0L
1896 realpos = 0L
1897 # There are 4 possible sparse structs in the
1898 # first header.
1899 for i in xrange(4):
1900 try:
Georg Brandl38c6a222006-05-10 16:26:03 +00001901 offset = nti(buf[pos:pos + 12])
1902 numbytes = nti(buf[pos + 12:pos + 24])
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001903 except ValueError:
1904 break
1905 if offset > lastpos:
1906 sp.append(_hole(lastpos, offset - lastpos))
1907 sp.append(_data(offset, numbytes, realpos))
1908 realpos += numbytes
1909 lastpos = offset + numbytes
1910 pos += 24
1911
1912 isextended = ord(buf[482])
Georg Brandl38c6a222006-05-10 16:26:03 +00001913 origsize = nti(buf[483:495])
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001914
1915 # If the isextended flag is given,
1916 # there are extra headers to process.
1917 while isextended == 1:
1918 buf = self.fileobj.read(BLOCKSIZE)
1919 self.offset += BLOCKSIZE
1920 pos = 0
1921 for i in xrange(21):
1922 try:
Georg Brandl38c6a222006-05-10 16:26:03 +00001923 offset = nti(buf[pos:pos + 12])
1924 numbytes = nti(buf[pos + 12:pos + 24])
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001925 except ValueError:
1926 break
1927 if offset > lastpos:
1928 sp.append(_hole(lastpos, offset - lastpos))
1929 sp.append(_data(offset, numbytes, realpos))
1930 realpos += numbytes
1931 lastpos = offset + numbytes
1932 pos += 24
1933 isextended = ord(buf[504])
1934
1935 if lastpos < origsize:
1936 sp.append(_hole(lastpos, origsize - lastpos))
1937
1938 tarinfo.sparse = sp
1939
1940 tarinfo.offset_data = self.offset
1941 self.offset += self._block(tarinfo.size)
1942 tarinfo.size = origsize
Andrew M. Kuchling864bba12004-07-10 22:02:11 +00001943
Georg Brandl38c6a222006-05-10 16:26:03 +00001944 return tarinfo
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001945
1946 #--------------------------------------------------------------------------
1947 # Little helper methods:
1948
1949 def _block(self, count):
1950 """Round up a byte count by BLOCKSIZE and return it,
1951 e.g. _block(834) => 1024.
1952 """
1953 blocks, remainder = divmod(count, BLOCKSIZE)
1954 if remainder:
1955 blocks += 1
1956 return blocks * BLOCKSIZE
1957
1958 def _getmember(self, name, tarinfo=None):
1959 """Find an archive member by name from bottom to top.
1960 If tarinfo is given, it is used as the starting point.
1961 """
Martin v. Löwisf3c56112004-09-18 09:08:52 +00001962 # Ensure that all members have been loaded.
1963 members = self.getmembers()
1964
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001965 if tarinfo is None:
Martin v. Löwisf3c56112004-09-18 09:08:52 +00001966 end = len(members)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001967 else:
Martin v. Löwisf3c56112004-09-18 09:08:52 +00001968 end = members.index(tarinfo)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001969
1970 for i in xrange(end - 1, -1, -1):
Martin v. Löwisf3c56112004-09-18 09:08:52 +00001971 if name == members[i].name:
1972 return members[i]
Andrew M. Kuchling864bba12004-07-10 22:02:11 +00001973
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001974 def _load(self):
1975 """Read through the entire archive file and look for readable
1976 members.
1977 """
1978 while True:
1979 tarinfo = self.next()
1980 if tarinfo is None:
1981 break
1982 self._loaded = True
1983
1984 def _check(self, mode=None):
1985 """Check if TarFile is still open, and if the operation's mode
1986 corresponds to TarFile's mode.
1987 """
1988 if self.closed:
Georg Brandle4751e32006-05-18 06:11:19 +00001989 raise IOError("%s is closed" % self.__class__.__name__)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001990 if mode is not None and self._mode not in mode:
Georg Brandle4751e32006-05-18 06:11:19 +00001991 raise IOError("bad operation for mode %r" % self._mode)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001992
1993 def __iter__(self):
1994 """Provide an iterator object.
1995 """
1996 if self._loaded:
1997 return iter(self.members)
1998 else:
1999 return TarIter(self)
2000
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002001 def _dbg(self, level, msg):
2002 """Write debugging output to sys.stderr.
2003 """
2004 if level <= self.debug:
2005 print >> sys.stderr, msg
2006# class TarFile
2007
2008class TarIter:
2009 """Iterator Class.
2010
2011 for tarinfo in TarFile(...):
2012 suite...
2013 """
2014
2015 def __init__(self, tarfile):
2016 """Construct a TarIter object.
2017 """
2018 self.tarfile = tarfile
Martin v. Löwis637431b2005-03-03 23:12:42 +00002019 self.index = 0
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002020 def __iter__(self):
2021 """Return iterator object.
2022 """
2023 return self
2024 def next(self):
2025 """Return the next item using TarFile's next() method.
2026 When all members have been read, set TarFile as _loaded.
2027 """
Martin v. Löwis637431b2005-03-03 23:12:42 +00002028 # Fix for SF #1100429: Under rare circumstances it can
2029 # happen that getmembers() is called during iteration,
2030 # which will cause TarIter to stop prematurely.
2031 if not self.tarfile._loaded:
2032 tarinfo = self.tarfile.next()
2033 if not tarinfo:
2034 self.tarfile._loaded = True
2035 raise StopIteration
2036 else:
2037 try:
2038 tarinfo = self.tarfile.members[self.index]
2039 except IndexError:
2040 raise StopIteration
2041 self.index += 1
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002042 return tarinfo
2043
2044# Helper classes for sparse file support
2045class _section:
2046 """Base class for _data and _hole.
2047 """
2048 def __init__(self, offset, size):
2049 self.offset = offset
2050 self.size = size
2051 def __contains__(self, offset):
2052 return self.offset <= offset < self.offset + self.size
2053
2054class _data(_section):
2055 """Represent a data section in a sparse file.
2056 """
2057 def __init__(self, offset, size, realpos):
2058 _section.__init__(self, offset, size)
2059 self.realpos = realpos
2060
2061class _hole(_section):
2062 """Represent a hole section in a sparse file.
2063 """
2064 pass
2065
2066class _ringbuffer(list):
2067 """Ringbuffer class which increases performance
2068 over a regular list.
2069 """
2070 def __init__(self):
2071 self.idx = 0
2072 def find(self, offset):
2073 idx = self.idx
2074 while True:
2075 item = self[idx]
2076 if offset in item:
2077 break
2078 idx += 1
2079 if idx == len(self):
2080 idx = 0
2081 if idx == self.idx:
2082 # End of File
2083 return None
2084 self.idx = idx
2085 return item
2086
2087#---------------------------------------------
2088# zipfile compatible TarFile class
2089#---------------------------------------------
2090TAR_PLAIN = 0 # zipfile.ZIP_STORED
2091TAR_GZIPPED = 8 # zipfile.ZIP_DEFLATED
2092class TarFileCompat:
2093 """TarFile class compatible with standard module zipfile's
2094 ZipFile class.
2095 """
2096 def __init__(self, file, mode="r", compression=TAR_PLAIN):
2097 if compression == TAR_PLAIN:
2098 self.tarfile = TarFile.taropen(file, mode)
2099 elif compression == TAR_GZIPPED:
2100 self.tarfile = TarFile.gzopen(file, mode)
2101 else:
Georg Brandle4751e32006-05-18 06:11:19 +00002102 raise ValueError("unknown compression constant")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002103 if mode[0:1] == "r":
2104 members = self.tarfile.getmembers()
Raymond Hettingera1d09e22005-09-11 16:34:05 +00002105 for m in members:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002106 m.filename = m.name
2107 m.file_size = m.size
2108 m.date_time = time.gmtime(m.mtime)[:6]
2109 def namelist(self):
2110 return map(lambda m: m.name, self.infolist())
2111 def infolist(self):
2112 return filter(lambda m: m.type in REGULAR_TYPES,
2113 self.tarfile.getmembers())
2114 def printdir(self):
2115 self.tarfile.list()
2116 def testzip(self):
2117 return
2118 def getinfo(self, name):
2119 return self.tarfile.getmember(name)
2120 def read(self, name):
2121 return self.tarfile.extractfile(self.tarfile.getmember(name)).read()
2122 def write(self, filename, arcname=None, compress_type=None):
2123 self.tarfile.add(filename, arcname)
2124 def writestr(self, zinfo, bytes):
Raymond Hettingera6172712004-12-31 19:15:26 +00002125 try:
2126 from cStringIO import StringIO
2127 except ImportError:
2128 from StringIO import StringIO
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002129 import calendar
2130 zinfo.name = zinfo.filename
2131 zinfo.size = zinfo.file_size
2132 zinfo.mtime = calendar.timegm(zinfo.date_time)
Raymond Hettingera6172712004-12-31 19:15:26 +00002133 self.tarfile.addfile(zinfo, StringIO(bytes))
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002134 def close(self):
2135 self.tarfile.close()
2136#class TarFileCompat
2137
2138#--------------------
2139# exported functions
2140#--------------------
2141def is_tarfile(name):
2142 """Return True if name points to a tar archive that we
2143 are able to handle, else return False.
2144 """
2145 try:
2146 t = open(name)
2147 t.close()
2148 return True
2149 except TarError:
2150 return False
2151
2152open = TarFile.open