blob: 1b8f1408a79c8aa772bb3008ae1a271328cefb24 [file] [log] [blame]
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001#!/usr/bin/env python
2# -*- coding: iso-8859-1 -*-
3#-------------------------------------------------------------------
4# tarfile.py
5#-------------------------------------------------------------------
6# Copyright (C) 2002 Lars Gustäbel <lars@gustaebel.de>
7# All rights reserved.
8#
9# Permission is hereby granted, free of charge, to any person
10# obtaining a copy of this software and associated documentation
11# files (the "Software"), to deal in the Software without
12# restriction, including without limitation the rights to use,
13# copy, modify, merge, publish, distribute, sublicense, and/or sell
14# copies of the Software, and to permit persons to whom the
15# Software is furnished to do so, subject to the following
16# conditions:
17#
18# The above copyright notice and this permission notice shall be
19# included in all copies or substantial portions of the Software.
20#
21# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
22# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
23# OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
24# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
25# HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
26# WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
27# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
28# OTHER DEALINGS IN THE SOFTWARE.
29#
30"""Read from and write to tar format archives.
31"""
32
33__version__ = "$Revision$"
34# $Source$
35
Georg Brandl38c6a222006-05-10 16:26:03 +000036version = "0.8.0"
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +000037__author__ = "Lars Gustäbel (lars@gustaebel.de)"
38__date__ = "$Date$"
39__cvsid__ = "$Id$"
40__credits__ = "Gustavo Niemeyer, Niels Gustäbel, Richard Townsend."
41
42#---------
43# Imports
44#---------
45import sys
46import os
47import shutil
48import stat
49import errno
50import time
51import struct
Georg Brandl2527f7f2006-10-29 09:16:15 +000052import copy
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +000053
Jack Jansencfc49022003-03-07 13:37:32 +000054if sys.platform == 'mac':
55 # This module needs work for MacOS9, especially in the area of pathname
56 # handling. In many places it is assumed a simple substitution of / by the
57 # local os.path.sep is good enough to convert pathnames, but this does not
58 # work with the mac rooted:path:name versus :nonrooted:path:name syntax
59 raise ImportError, "tarfile does not work for platform==mac"
60
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +000061try:
62 import grp, pwd
63except ImportError:
64 grp = pwd = None
65
66# from tarfile import *
67__all__ = ["TarFile", "TarInfo", "is_tarfile", "TarError"]
68
69#---------------------------------------------------------
70# tar constants
71#---------------------------------------------------------
72NUL = "\0" # the null character
73BLOCKSIZE = 512 # length of processing blocks
74RECORDSIZE = BLOCKSIZE * 20 # length of records
75MAGIC = "ustar" # magic tar string
76VERSION = "00" # version number
77
78LENGTH_NAME = 100 # maximum length of a filename
79LENGTH_LINK = 100 # maximum length of a linkname
80LENGTH_PREFIX = 155 # maximum length of the prefix field
81MAXSIZE_MEMBER = 077777777777L # maximum size of a file (11 octal digits)
82
83REGTYPE = "0" # regular file
84AREGTYPE = "\0" # regular file
85LNKTYPE = "1" # link (inside tarfile)
86SYMTYPE = "2" # symbolic link
87CHRTYPE = "3" # character special device
88BLKTYPE = "4" # block special device
89DIRTYPE = "5" # directory
90FIFOTYPE = "6" # fifo special device
91CONTTYPE = "7" # contiguous file
92
93GNUTYPE_LONGNAME = "L" # GNU tar extension for longnames
94GNUTYPE_LONGLINK = "K" # GNU tar extension for longlink
95GNUTYPE_SPARSE = "S" # GNU tar extension for sparse file
96
97#---------------------------------------------------------
98# tarfile constants
99#---------------------------------------------------------
100SUPPORTED_TYPES = (REGTYPE, AREGTYPE, LNKTYPE, # file types that tarfile
101 SYMTYPE, DIRTYPE, FIFOTYPE, # can cope with.
102 CONTTYPE, CHRTYPE, BLKTYPE,
103 GNUTYPE_LONGNAME, GNUTYPE_LONGLINK,
104 GNUTYPE_SPARSE)
105
106REGULAR_TYPES = (REGTYPE, AREGTYPE, # file types that somehow
107 CONTTYPE, GNUTYPE_SPARSE) # represent regular files
108
109#---------------------------------------------------------
110# Bits used in the mode field, values in octal.
111#---------------------------------------------------------
112S_IFLNK = 0120000 # symbolic link
113S_IFREG = 0100000 # regular file
114S_IFBLK = 0060000 # block device
115S_IFDIR = 0040000 # directory
116S_IFCHR = 0020000 # character device
117S_IFIFO = 0010000 # fifo
118
119TSUID = 04000 # set UID on execution
120TSGID = 02000 # set GID on execution
121TSVTX = 01000 # reserved
122
123TUREAD = 0400 # read by owner
124TUWRITE = 0200 # write by owner
125TUEXEC = 0100 # execute/search by owner
126TGREAD = 0040 # read by group
127TGWRITE = 0020 # write by group
128TGEXEC = 0010 # execute/search by group
129TOREAD = 0004 # read by other
130TOWRITE = 0002 # write by other
131TOEXEC = 0001 # execute/search by other
132
133#---------------------------------------------------------
134# Some useful functions
135#---------------------------------------------------------
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000136
Georg Brandl38c6a222006-05-10 16:26:03 +0000137def stn(s, length):
138 """Convert a python string to a null-terminated string buffer.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000139 """
Georg Brandlee23f4b2006-10-24 16:54:23 +0000140 return s[:length] + (length - len(s)) * NUL
Georg Brandl38c6a222006-05-10 16:26:03 +0000141
142def nti(s):
143 """Convert a number field to a python number.
144 """
145 # There are two possible encodings for a number field, see
146 # itn() below.
147 if s[0] != chr(0200):
Georg Brandl58bf57f2006-10-12 12:03:11 +0000148 n = int(s.rstrip(NUL + " ") or "0", 8)
Georg Brandl38c6a222006-05-10 16:26:03 +0000149 else:
150 n = 0L
151 for i in xrange(len(s) - 1):
152 n <<= 8
153 n += ord(s[i + 1])
154 return n
155
156def itn(n, digits=8, posix=False):
157 """Convert a python number to a number field.
158 """
159 # POSIX 1003.1-1988 requires numbers to be encoded as a string of
160 # octal digits followed by a null-byte, this allows values up to
161 # (8**(digits-1))-1. GNU tar allows storing numbers greater than
162 # that if necessary. A leading 0200 byte indicates this particular
163 # encoding, the following digits-1 bytes are a big-endian
164 # representation. This allows values up to (256**(digits-1))-1.
165 if 0 <= n < 8 ** (digits - 1):
166 s = "%0*o" % (digits - 1, n) + NUL
167 else:
168 if posix:
Georg Brandle4751e32006-05-18 06:11:19 +0000169 raise ValueError("overflow in number field")
Georg Brandl38c6a222006-05-10 16:26:03 +0000170
171 if n < 0:
172 # XXX We mimic GNU tar's behaviour with negative numbers,
173 # this could raise OverflowError.
174 n = struct.unpack("L", struct.pack("l", n))[0]
175
176 s = ""
177 for i in xrange(digits - 1):
178 s = chr(n & 0377) + s
179 n >>= 8
180 s = chr(0200) + s
181 return s
182
183def calc_chksums(buf):
184 """Calculate the checksum for a member's header by summing up all
185 characters except for the chksum field which is treated as if
186 it was filled with spaces. According to the GNU tar sources,
187 some tars (Sun and NeXT) calculate chksum with signed char,
188 which will be different if there are chars in the buffer with
189 the high bit set. So we calculate two checksums, unsigned and
190 signed.
191 """
192 unsigned_chksum = 256 + sum(struct.unpack("148B", buf[:148]) + struct.unpack("356B", buf[156:512]))
193 signed_chksum = 256 + sum(struct.unpack("148b", buf[:148]) + struct.unpack("356b", buf[156:512]))
194 return unsigned_chksum, signed_chksum
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000195
196def copyfileobj(src, dst, length=None):
197 """Copy length bytes from fileobj src to fileobj dst.
198 If length is None, copy the entire content.
199 """
200 if length == 0:
201 return
202 if length is None:
203 shutil.copyfileobj(src, dst)
204 return
205
206 BUFSIZE = 16 * 1024
207 blocks, remainder = divmod(length, BUFSIZE)
208 for b in xrange(blocks):
209 buf = src.read(BUFSIZE)
210 if len(buf) < BUFSIZE:
Georg Brandle4751e32006-05-18 06:11:19 +0000211 raise IOError("end of file reached")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000212 dst.write(buf)
213
214 if remainder != 0:
215 buf = src.read(remainder)
216 if len(buf) < remainder:
Georg Brandle4751e32006-05-18 06:11:19 +0000217 raise IOError("end of file reached")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000218 dst.write(buf)
219 return
220
221filemode_table = (
Andrew M. Kuchling8bc462f2004-10-20 11:48:42 +0000222 ((S_IFLNK, "l"),
223 (S_IFREG, "-"),
224 (S_IFBLK, "b"),
225 (S_IFDIR, "d"),
226 (S_IFCHR, "c"),
227 (S_IFIFO, "p")),
228
229 ((TUREAD, "r"),),
230 ((TUWRITE, "w"),),
231 ((TUEXEC|TSUID, "s"),
232 (TSUID, "S"),
233 (TUEXEC, "x")),
234
235 ((TGREAD, "r"),),
236 ((TGWRITE, "w"),),
237 ((TGEXEC|TSGID, "s"),
238 (TSGID, "S"),
239 (TGEXEC, "x")),
240
241 ((TOREAD, "r"),),
242 ((TOWRITE, "w"),),
243 ((TOEXEC|TSVTX, "t"),
244 (TSVTX, "T"),
245 (TOEXEC, "x"))
246)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000247
248def filemode(mode):
249 """Convert a file's mode to a string of the form
250 -rwxrwxrwx.
251 Used by TarFile.list()
252 """
Andrew M. Kuchling8bc462f2004-10-20 11:48:42 +0000253 perm = []
254 for table in filemode_table:
255 for bit, char in table:
256 if mode & bit == bit:
257 perm.append(char)
258 break
259 else:
260 perm.append("-")
261 return "".join(perm)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000262
263if os.sep != "/":
264 normpath = lambda path: os.path.normpath(path).replace(os.sep, "/")
265else:
266 normpath = os.path.normpath
267
268class TarError(Exception):
269 """Base exception."""
270 pass
271class ExtractError(TarError):
272 """General exception for extract errors."""
273 pass
274class ReadError(TarError):
275 """Exception for unreadble tar archives."""
276 pass
277class CompressionError(TarError):
278 """Exception for unavailable compression methods."""
279 pass
280class StreamError(TarError):
281 """Exception for unsupported operations on stream-like TarFiles."""
282 pass
283
284#---------------------------
285# internal stream interface
286#---------------------------
287class _LowLevelFile:
288 """Low-level file object. Supports reading and writing.
289 It is used instead of a regular file object for streaming
290 access.
291 """
292
293 def __init__(self, name, mode):
294 mode = {
295 "r": os.O_RDONLY,
296 "w": os.O_WRONLY | os.O_CREAT | os.O_TRUNC,
297 }[mode]
298 if hasattr(os, "O_BINARY"):
299 mode |= os.O_BINARY
300 self.fd = os.open(name, mode)
301
302 def close(self):
303 os.close(self.fd)
304
305 def read(self, size):
306 return os.read(self.fd, size)
307
308 def write(self, s):
309 os.write(self.fd, s)
310
311class _Stream:
312 """Class that serves as an adapter between TarFile and
313 a stream-like object. The stream-like object only
314 needs to have a read() or write() method and is accessed
315 blockwise. Use of gzip or bzip2 compression is possible.
316 A stream-like object could be for example: sys.stdin,
317 sys.stdout, a socket, a tape device etc.
318
319 _Stream is intended to be used only internally.
320 """
321
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000322 def __init__(self, name, mode, comptype, fileobj, bufsize):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000323 """Construct a _Stream object.
324 """
325 self._extfileobj = True
326 if fileobj is None:
327 fileobj = _LowLevelFile(name, mode)
328 self._extfileobj = False
329
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000330 if comptype == '*':
331 # Enable transparent compression detection for the
332 # stream interface
333 fileobj = _StreamProxy(fileobj)
334 comptype = fileobj.getcomptype()
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000335
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000336 self.name = name or ""
337 self.mode = mode
338 self.comptype = comptype
339 self.fileobj = fileobj
340 self.bufsize = bufsize
341 self.buf = ""
342 self.pos = 0L
343 self.closed = False
344
345 if comptype == "gz":
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000346 try:
347 import zlib
348 except ImportError:
Georg Brandle4751e32006-05-18 06:11:19 +0000349 raise CompressionError("zlib module is not available")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000350 self.zlib = zlib
351 self.crc = zlib.crc32("")
352 if mode == "r":
353 self._init_read_gz()
354 else:
355 self._init_write_gz()
356
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000357 if comptype == "bz2":
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000358 try:
359 import bz2
360 except ImportError:
Georg Brandle4751e32006-05-18 06:11:19 +0000361 raise CompressionError("bz2 module is not available")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000362 if mode == "r":
363 self.dbuf = ""
364 self.cmp = bz2.BZ2Decompressor()
365 else:
366 self.cmp = bz2.BZ2Compressor()
367
368 def __del__(self):
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000369 if hasattr(self, "closed") and not self.closed:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000370 self.close()
371
372 def _init_write_gz(self):
373 """Initialize for writing with gzip compression.
374 """
375 self.cmp = self.zlib.compressobj(9, self.zlib.DEFLATED,
376 -self.zlib.MAX_WBITS,
377 self.zlib.DEF_MEM_LEVEL,
378 0)
379 timestamp = struct.pack("<L", long(time.time()))
380 self.__write("\037\213\010\010%s\002\377" % timestamp)
381 if self.name.endswith(".gz"):
382 self.name = self.name[:-3]
383 self.__write(self.name + NUL)
384
385 def write(self, s):
386 """Write string s to the stream.
387 """
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000388 if self.comptype == "gz":
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000389 self.crc = self.zlib.crc32(s, self.crc)
390 self.pos += len(s)
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000391 if self.comptype != "tar":
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000392 s = self.cmp.compress(s)
393 self.__write(s)
394
395 def __write(self, s):
396 """Write string s to the stream if a whole new block
397 is ready to be written.
398 """
399 self.buf += s
400 while len(self.buf) > self.bufsize:
401 self.fileobj.write(self.buf[:self.bufsize])
402 self.buf = self.buf[self.bufsize:]
403
404 def close(self):
405 """Close the _Stream object. No operation should be
406 done on it afterwards.
407 """
408 if self.closed:
409 return
410
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000411 if self.mode == "w" and self.comptype != "tar":
Martin v. Löwisc234a522004-08-22 21:28:33 +0000412 self.buf += self.cmp.flush()
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000413
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000414 if self.mode == "w" and self.buf:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000415 self.fileobj.write(self.buf)
416 self.buf = ""
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000417 if self.comptype == "gz":
Tim Petersa05f6e22006-08-02 05:20:08 +0000418 # The native zlib crc is an unsigned 32-bit integer, but
419 # the Python wrapper implicitly casts that to a signed C
420 # long. So, on a 32-bit box self.crc may "look negative",
421 # while the same crc on a 64-bit box may "look positive".
422 # To avoid irksome warnings from the `struct` module, force
423 # it to look positive on all boxes.
424 self.fileobj.write(struct.pack("<L", self.crc & 0xffffffffL))
Andrew M. Kuchling10a44492003-10-24 17:38:34 +0000425 self.fileobj.write(struct.pack("<L", self.pos & 0xffffFFFFL))
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000426
427 if not self._extfileobj:
428 self.fileobj.close()
429
430 self.closed = True
431
432 def _init_read_gz(self):
433 """Initialize for reading a gzip compressed fileobj.
434 """
435 self.cmp = self.zlib.decompressobj(-self.zlib.MAX_WBITS)
436 self.dbuf = ""
437
438 # taken from gzip.GzipFile with some alterations
439 if self.__read(2) != "\037\213":
Georg Brandle4751e32006-05-18 06:11:19 +0000440 raise ReadError("not a gzip file")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000441 if self.__read(1) != "\010":
Georg Brandle4751e32006-05-18 06:11:19 +0000442 raise CompressionError("unsupported compression method")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000443
444 flag = ord(self.__read(1))
445 self.__read(6)
446
447 if flag & 4:
448 xlen = ord(self.__read(1)) + 256 * ord(self.__read(1))
449 self.read(xlen)
450 if flag & 8:
451 while True:
452 s = self.__read(1)
453 if not s or s == NUL:
454 break
455 if flag & 16:
456 while True:
457 s = self.__read(1)
458 if not s or s == NUL:
459 break
460 if flag & 2:
461 self.__read(2)
462
463 def tell(self):
464 """Return the stream's file pointer position.
465 """
466 return self.pos
467
468 def seek(self, pos=0):
469 """Set the stream's file pointer to pos. Negative seeking
470 is forbidden.
471 """
472 if pos - self.pos >= 0:
473 blocks, remainder = divmod(pos - self.pos, self.bufsize)
474 for i in xrange(blocks):
475 self.read(self.bufsize)
476 self.read(remainder)
477 else:
Georg Brandle4751e32006-05-18 06:11:19 +0000478 raise StreamError("seeking backwards is not allowed")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000479 return self.pos
480
481 def read(self, size=None):
482 """Return the next size number of bytes from the stream.
483 If size is not defined, return all bytes of the stream
484 up to EOF.
485 """
486 if size is None:
487 t = []
488 while True:
489 buf = self._read(self.bufsize)
490 if not buf:
491 break
492 t.append(buf)
493 buf = "".join(t)
494 else:
495 buf = self._read(size)
496 self.pos += len(buf)
497 return buf
498
499 def _read(self, size):
500 """Return size bytes from the stream.
501 """
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000502 if self.comptype == "tar":
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000503 return self.__read(size)
504
505 c = len(self.dbuf)
506 t = [self.dbuf]
507 while c < size:
508 buf = self.__read(self.bufsize)
509 if not buf:
510 break
511 buf = self.cmp.decompress(buf)
512 t.append(buf)
513 c += len(buf)
514 t = "".join(t)
515 self.dbuf = t[size:]
516 return t[:size]
517
518 def __read(self, size):
519 """Return size bytes from stream. If internal buffer is empty,
520 read another block from the stream.
521 """
522 c = len(self.buf)
523 t = [self.buf]
524 while c < size:
525 buf = self.fileobj.read(self.bufsize)
526 if not buf:
527 break
528 t.append(buf)
529 c += len(buf)
530 t = "".join(t)
531 self.buf = t[size:]
532 return t[:size]
533# class _Stream
534
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000535class _StreamProxy(object):
536 """Small proxy class that enables transparent compression
537 detection for the Stream interface (mode 'r|*').
538 """
539
540 def __init__(self, fileobj):
541 self.fileobj = fileobj
542 self.buf = self.fileobj.read(BLOCKSIZE)
543
544 def read(self, size):
545 self.read = self.fileobj.read
546 return self.buf
547
548 def getcomptype(self):
549 if self.buf.startswith("\037\213\010"):
550 return "gz"
551 if self.buf.startswith("BZh91"):
552 return "bz2"
553 return "tar"
554
555 def close(self):
556 self.fileobj.close()
557# class StreamProxy
558
Georg Brandl49c8f4c2006-05-15 19:30:35 +0000559class _BZ2Proxy(object):
560 """Small proxy class that enables external file object
561 support for "r:bz2" and "w:bz2" modes. This is actually
562 a workaround for a limitation in bz2 module's BZ2File
563 class which (unlike gzip.GzipFile) has no support for
564 a file object argument.
565 """
566
567 blocksize = 16 * 1024
568
569 def __init__(self, fileobj, mode):
570 self.fileobj = fileobj
571 self.mode = mode
572 self.init()
573
574 def init(self):
575 import bz2
576 self.pos = 0
577 if self.mode == "r":
578 self.bz2obj = bz2.BZ2Decompressor()
579 self.fileobj.seek(0)
580 self.buf = ""
581 else:
582 self.bz2obj = bz2.BZ2Compressor()
583
584 def read(self, size):
585 b = [self.buf]
586 x = len(self.buf)
587 while x < size:
588 try:
589 raw = self.fileobj.read(self.blocksize)
590 data = self.bz2obj.decompress(raw)
591 b.append(data)
592 except EOFError:
593 break
594 x += len(data)
595 self.buf = "".join(b)
596
597 buf = self.buf[:size]
598 self.buf = self.buf[size:]
599 self.pos += len(buf)
600 return buf
601
602 def seek(self, pos):
603 if pos < self.pos:
604 self.init()
605 self.read(pos - self.pos)
606
607 def tell(self):
608 return self.pos
609
610 def write(self, data):
611 self.pos += len(data)
612 raw = self.bz2obj.compress(data)
613 self.fileobj.write(raw)
614
615 def close(self):
616 if self.mode == "w":
617 raw = self.bz2obj.flush()
618 self.fileobj.write(raw)
Georg Brandle8953182006-05-27 14:02:03 +0000619 self.fileobj.close()
Georg Brandl49c8f4c2006-05-15 19:30:35 +0000620# class _BZ2Proxy
621
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000622#------------------------
623# Extraction file object
624#------------------------
625class ExFileObject(object):
626 """File-like object for reading an archive member.
627 Is returned by TarFile.extractfile(). Support for
628 sparse files included.
629 """
630
631 def __init__(self, tarfile, tarinfo):
632 self.fileobj = tarfile.fileobj
633 self.name = tarinfo.name
634 self.mode = "r"
635 self.closed = False
636 self.offset = tarinfo.offset_data
637 self.size = tarinfo.size
638 self.pos = 0L
639 self.linebuffer = ""
640 if tarinfo.issparse():
641 self.sparse = tarinfo.sparse
642 self.read = self._readsparse
643 else:
644 self.read = self._readnormal
645
646 def __read(self, size):
647 """Overloadable read method.
648 """
649 return self.fileobj.read(size)
650
651 def readline(self, size=-1):
652 """Read a line with approx. size. If size is negative,
653 read a whole line. readline() and read() must not
654 be mixed up (!).
655 """
656 if size < 0:
657 size = sys.maxint
658
659 nl = self.linebuffer.find("\n")
660 if nl >= 0:
661 nl = min(nl, size)
662 else:
663 size -= len(self.linebuffer)
Martin v. Löwisc11d6f12004-08-25 10:52:58 +0000664 while (nl < 0 and size > 0):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000665 buf = self.read(min(size, 100))
666 if not buf:
667 break
668 self.linebuffer += buf
669 size -= len(buf)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000670 nl = self.linebuffer.find("\n")
671 if nl == -1:
672 s = self.linebuffer
673 self.linebuffer = ""
674 return s
675 buf = self.linebuffer[:nl]
676 self.linebuffer = self.linebuffer[nl + 1:]
677 while buf[-1:] == "\r":
678 buf = buf[:-1]
679 return buf + "\n"
680
681 def readlines(self):
682 """Return a list with all (following) lines.
683 """
684 result = []
685 while True:
686 line = self.readline()
687 if not line: break
688 result.append(line)
689 return result
690
691 def _readnormal(self, size=None):
692 """Read operation for regular files.
693 """
694 if self.closed:
Georg Brandle4751e32006-05-18 06:11:19 +0000695 raise ValueError("file is closed")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000696 self.fileobj.seek(self.offset + self.pos)
697 bytesleft = self.size - self.pos
698 if size is None:
699 bytestoread = bytesleft
700 else:
701 bytestoread = min(size, bytesleft)
702 self.pos += bytestoread
703 return self.__read(bytestoread)
704
705 def _readsparse(self, size=None):
706 """Read operation for sparse files.
707 """
708 if self.closed:
Georg Brandle4751e32006-05-18 06:11:19 +0000709 raise ValueError("file is closed")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000710
711 if size is None:
712 size = self.size - self.pos
713
714 data = []
715 while size > 0:
716 buf = self._readsparsesection(size)
717 if not buf:
718 break
719 size -= len(buf)
720 data.append(buf)
721 return "".join(data)
722
723 def _readsparsesection(self, size):
724 """Read a single section of a sparse file.
725 """
726 section = self.sparse.find(self.pos)
727
728 if section is None:
729 return ""
730
731 toread = min(size, section.offset + section.size - self.pos)
732 if isinstance(section, _data):
733 realpos = section.realpos + self.pos - section.offset
734 self.pos += toread
735 self.fileobj.seek(self.offset + realpos)
736 return self.__read(toread)
737 else:
738 self.pos += toread
739 return NUL * toread
740
741 def tell(self):
742 """Return the current file position.
743 """
744 return self.pos
745
746 def seek(self, pos, whence=0):
747 """Seek to a position in the file.
748 """
749 self.linebuffer = ""
750 if whence == 0:
751 self.pos = min(max(pos, 0), self.size)
752 if whence == 1:
753 if pos < 0:
754 self.pos = max(self.pos + pos, 0)
755 else:
756 self.pos = min(self.pos + pos, self.size)
757 if whence == 2:
758 self.pos = max(min(self.size + pos, self.size), 0)
759
760 def close(self):
761 """Close the file object.
762 """
763 self.closed = True
Martin v. Löwisdf241532005-03-03 08:17:42 +0000764
765 def __iter__(self):
766 """Get an iterator over the file object.
767 """
768 if self.closed:
Georg Brandle4751e32006-05-18 06:11:19 +0000769 raise ValueError("I/O operation on closed file")
Martin v. Löwisdf241532005-03-03 08:17:42 +0000770 return self
771
772 def next(self):
773 """Get the next item from the file iterator.
774 """
775 result = self.readline()
776 if not result:
777 raise StopIteration
778 return result
Tim Peterseba28be2005-03-28 01:08:02 +0000779
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000780#class ExFileObject
781
782#------------------
783# Exported Classes
784#------------------
785class TarInfo(object):
786 """Informational class which holds the details about an
787 archive member given by a tar header block.
788 TarInfo objects are returned by TarFile.getmember(),
789 TarFile.getmembers() and TarFile.gettarinfo() and are
790 usually created internally.
791 """
792
793 def __init__(self, name=""):
794 """Construct a TarInfo object. name is the optional name
795 of the member.
796 """
Georg Brandl38c6a222006-05-10 16:26:03 +0000797 self.name = name # member name (dirnames must end with '/')
798 self.mode = 0666 # file permissions
799 self.uid = 0 # user id
800 self.gid = 0 # group id
801 self.size = 0 # file size
802 self.mtime = 0 # modification time
803 self.chksum = 0 # header checksum
804 self.type = REGTYPE # member type
805 self.linkname = "" # link name
806 self.uname = "user" # user name
807 self.gname = "group" # group name
808 self.devmajor = 0 # device major number
809 self.devminor = 0 # device minor number
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000810
Georg Brandl38c6a222006-05-10 16:26:03 +0000811 self.offset = 0 # the tar header starts here
812 self.offset_data = 0 # the file's data starts here
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000813
814 def __repr__(self):
815 return "<%s %r at %#x>" % (self.__class__.__name__,self.name,id(self))
816
Guido van Rossum75b64e62005-01-16 00:16:11 +0000817 @classmethod
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000818 def frombuf(cls, buf):
819 """Construct a TarInfo object from a 512 byte string buffer.
820 """
Georg Brandl38c6a222006-05-10 16:26:03 +0000821 if len(buf) != BLOCKSIZE:
Georg Brandle4751e32006-05-18 06:11:19 +0000822 raise ValueError("truncated header")
Georg Brandl38c6a222006-05-10 16:26:03 +0000823 if buf.count(NUL) == BLOCKSIZE:
Georg Brandle4751e32006-05-18 06:11:19 +0000824 raise ValueError("empty header")
Georg Brandl38c6a222006-05-10 16:26:03 +0000825
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000826 tarinfo = cls()
Georg Brandl38c6a222006-05-10 16:26:03 +0000827 tarinfo.buf = buf
Georg Brandle8953182006-05-27 14:02:03 +0000828 tarinfo.name = buf[0:100].rstrip(NUL)
Georg Brandl38c6a222006-05-10 16:26:03 +0000829 tarinfo.mode = nti(buf[100:108])
830 tarinfo.uid = nti(buf[108:116])
831 tarinfo.gid = nti(buf[116:124])
832 tarinfo.size = nti(buf[124:136])
833 tarinfo.mtime = nti(buf[136:148])
834 tarinfo.chksum = nti(buf[148:156])
835 tarinfo.type = buf[156:157]
Georg Brandle8953182006-05-27 14:02:03 +0000836 tarinfo.linkname = buf[157:257].rstrip(NUL)
837 tarinfo.uname = buf[265:297].rstrip(NUL)
838 tarinfo.gname = buf[297:329].rstrip(NUL)
Georg Brandl38c6a222006-05-10 16:26:03 +0000839 tarinfo.devmajor = nti(buf[329:337])
840 tarinfo.devminor = nti(buf[337:345])
Georg Brandl2527f7f2006-10-29 09:16:15 +0000841 prefix = buf[345:500].rstrip(NUL)
842
843 if prefix and not tarinfo.issparse():
844 tarinfo.name = prefix + "/" + tarinfo.name
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000845
Georg Brandl38c6a222006-05-10 16:26:03 +0000846 if tarinfo.chksum not in calc_chksums(buf):
Georg Brandle4751e32006-05-18 06:11:19 +0000847 raise ValueError("invalid header")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000848 return tarinfo
849
Georg Brandl38c6a222006-05-10 16:26:03 +0000850 def tobuf(self, posix=False):
Georg Brandl2527f7f2006-10-29 09:16:15 +0000851 """Return a tar header as a string of 512 byte blocks.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000852 """
Georg Brandl2527f7f2006-10-29 09:16:15 +0000853 buf = ""
854 type = self.type
855 prefix = ""
856
857 if self.name.endswith("/"):
858 type = DIRTYPE
859
Georg Brandl25f58f62006-12-06 22:21:23 +0000860 if type in (GNUTYPE_LONGNAME, GNUTYPE_LONGLINK):
861 # Prevent "././@LongLink" from being normalized.
862 name = self.name
863 else:
864 name = normpath(self.name)
Georg Brandl2527f7f2006-10-29 09:16:15 +0000865
866 if type == DIRTYPE:
867 # directories should end with '/'
868 name += "/"
869
870 linkname = self.linkname
871 if linkname:
872 # if linkname is empty we end up with a '.'
873 linkname = normpath(linkname)
874
875 if posix:
876 if self.size > MAXSIZE_MEMBER:
877 raise ValueError("file is too large (>= 8 GB)")
878
879 if len(self.linkname) > LENGTH_LINK:
880 raise ValueError("linkname is too long (>%d)" % (LENGTH_LINK))
881
882 if len(name) > LENGTH_NAME:
883 prefix = name[:LENGTH_PREFIX + 1]
884 while prefix and prefix[-1] != "/":
885 prefix = prefix[:-1]
886
887 name = name[len(prefix):]
888 prefix = prefix[:-1]
889
890 if not prefix or len(name) > LENGTH_NAME:
891 raise ValueError("name is too long")
892
893 else:
894 if len(self.linkname) > LENGTH_LINK:
895 buf += self._create_gnulong(self.linkname, GNUTYPE_LONGLINK)
896
897 if len(name) > LENGTH_NAME:
898 buf += self._create_gnulong(name, GNUTYPE_LONGNAME)
899
Georg Brandl38c6a222006-05-10 16:26:03 +0000900 parts = [
Georg Brandl2527f7f2006-10-29 09:16:15 +0000901 stn(name, 100),
Georg Brandl38c6a222006-05-10 16:26:03 +0000902 itn(self.mode & 07777, 8, posix),
903 itn(self.uid, 8, posix),
904 itn(self.gid, 8, posix),
905 itn(self.size, 12, posix),
906 itn(self.mtime, 12, posix),
907 " ", # checksum field
Georg Brandl2527f7f2006-10-29 09:16:15 +0000908 type,
Georg Brandl38c6a222006-05-10 16:26:03 +0000909 stn(self.linkname, 100),
910 stn(MAGIC, 6),
911 stn(VERSION, 2),
912 stn(self.uname, 32),
913 stn(self.gname, 32),
914 itn(self.devmajor, 8, posix),
915 itn(self.devminor, 8, posix),
Georg Brandl2527f7f2006-10-29 09:16:15 +0000916 stn(prefix, 155)
Georg Brandl38c6a222006-05-10 16:26:03 +0000917 ]
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000918
Georg Brandl2527f7f2006-10-29 09:16:15 +0000919 buf += struct.pack("%ds" % BLOCKSIZE, "".join(parts))
Georg Brandl25f58f62006-12-06 22:21:23 +0000920 chksum = calc_chksums(buf[-BLOCKSIZE:])[0]
Georg Brandl2527f7f2006-10-29 09:16:15 +0000921 buf = buf[:-364] + "%06o\0" % chksum + buf[-357:]
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000922 self.buf = buf
923 return buf
924
Georg Brandl2527f7f2006-10-29 09:16:15 +0000925 def _create_gnulong(self, name, type):
926 """Create a GNU longname/longlink header from name.
927 It consists of an extended tar header, with the length
928 of the longname as size, followed by data blocks,
929 which contain the longname as a null terminated string.
930 """
931 name += NUL
932
933 tarinfo = self.__class__()
934 tarinfo.name = "././@LongLink"
935 tarinfo.type = type
936 tarinfo.mode = 0
937 tarinfo.size = len(name)
938
939 # create extended header
940 buf = tarinfo.tobuf()
941 # create name blocks
942 buf += name
943 blocks, remainder = divmod(len(name), BLOCKSIZE)
944 if remainder > 0:
945 buf += (BLOCKSIZE - remainder) * NUL
946 return buf
947
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000948 def isreg(self):
949 return self.type in REGULAR_TYPES
950 def isfile(self):
951 return self.isreg()
952 def isdir(self):
953 return self.type == DIRTYPE
954 def issym(self):
955 return self.type == SYMTYPE
956 def islnk(self):
957 return self.type == LNKTYPE
958 def ischr(self):
959 return self.type == CHRTYPE
960 def isblk(self):
961 return self.type == BLKTYPE
962 def isfifo(self):
963 return self.type == FIFOTYPE
964 def issparse(self):
965 return self.type == GNUTYPE_SPARSE
966 def isdev(self):
967 return self.type in (CHRTYPE, BLKTYPE, FIFOTYPE)
968# class TarInfo
969
970class TarFile(object):
971 """The TarFile Class provides an interface to tar archives.
972 """
973
974 debug = 0 # May be set from 0 (no msgs) to 3 (all msgs)
975
976 dereference = False # If true, add content of linked file to the
977 # tar file, else the link.
978
979 ignore_zeros = False # If true, skips empty or invalid blocks and
980 # continues processing.
981
982 errorlevel = 0 # If 0, fatal errors only appear in debug
983 # messages (if debug >= 0). If > 0, errors
984 # are passed to the caller as exceptions.
985
Martin v. Löwis75b9da42004-08-18 13:57:44 +0000986 posix = False # If True, generates POSIX.1-1990-compliant
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000987 # archives (no GNU extensions!)
988
989 fileobject = ExFileObject
990
991 def __init__(self, name=None, mode="r", fileobj=None):
992 """Open an (uncompressed) tar archive `name'. `mode' is either 'r' to
993 read from an existing archive, 'a' to append data to an existing
994 file or 'w' to create a new file overwriting an existing one. `mode'
995 defaults to 'r'.
996 If `fileobj' is given, it is used for reading or writing data. If it
997 can be determined, `mode' is overridden by `fileobj's mode.
998 `fileobj' is not closed, when TarFile is closed.
999 """
Martin v. Löwisfaffa152005-08-24 06:43:09 +00001000 self.name = name
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001001
1002 if len(mode) > 1 or mode not in "raw":
Georg Brandle4751e32006-05-18 06:11:19 +00001003 raise ValueError("mode must be 'r', 'a' or 'w'")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001004 self._mode = mode
1005 self.mode = {"r": "rb", "a": "r+b", "w": "wb"}[mode]
1006
1007 if not fileobj:
1008 fileobj = file(self.name, self.mode)
1009 self._extfileobj = False
1010 else:
1011 if self.name is None and hasattr(fileobj, "name"):
Martin v. Löwisfaffa152005-08-24 06:43:09 +00001012 self.name = fileobj.name
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001013 if hasattr(fileobj, "mode"):
1014 self.mode = fileobj.mode
1015 self._extfileobj = True
1016 self.fileobj = fileobj
1017
1018 # Init datastructures
Georg Brandl38c6a222006-05-10 16:26:03 +00001019 self.closed = False
1020 self.members = [] # list of members as TarInfo objects
1021 self._loaded = False # flag if all members have been read
1022 self.offset = 0L # current position in the archive file
1023 self.inodes = {} # dictionary caching the inodes of
1024 # archive members already added
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001025
1026 if self._mode == "r":
1027 self.firstmember = None
1028 self.firstmember = self.next()
1029
1030 if self._mode == "a":
1031 # Move to the end of the archive,
1032 # before the first empty block.
1033 self.firstmember = None
1034 while True:
1035 try:
1036 tarinfo = self.next()
1037 except ReadError:
1038 self.fileobj.seek(0)
1039 break
1040 if tarinfo is None:
1041 self.fileobj.seek(- BLOCKSIZE, 1)
1042 break
1043
1044 if self._mode in "aw":
1045 self._loaded = True
1046
1047 #--------------------------------------------------------------------------
1048 # Below are the classmethods which act as alternate constructors to the
1049 # TarFile class. The open() method is the only one that is needed for
1050 # public use; it is the "super"-constructor and is able to select an
1051 # adequate "sub"-constructor for a particular compression using the mapping
1052 # from OPEN_METH.
1053 #
1054 # This concept allows one to subclass TarFile without losing the comfort of
1055 # the super-constructor. A sub-constructor is registered and made available
1056 # by adding it to the mapping in OPEN_METH.
1057
Guido van Rossum75b64e62005-01-16 00:16:11 +00001058 @classmethod
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001059 def open(cls, name=None, mode="r", fileobj=None, bufsize=20*512):
1060 """Open a tar archive for reading, writing or appending. Return
1061 an appropriate TarFile class.
1062
1063 mode:
Martin v. Löwis78be7df2005-03-05 12:47:42 +00001064 'r' or 'r:*' open for reading with transparent compression
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001065 'r:' open for reading exclusively uncompressed
1066 'r:gz' open for reading with gzip compression
1067 'r:bz2' open for reading with bzip2 compression
1068 'a' or 'a:' open for appending
1069 'w' or 'w:' open for writing without compression
1070 'w:gz' open for writing with gzip compression
1071 'w:bz2' open for writing with bzip2 compression
Martin v. Löwis78be7df2005-03-05 12:47:42 +00001072
1073 'r|*' open a stream of tar blocks with transparent compression
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001074 'r|' open an uncompressed stream of tar blocks for reading
1075 'r|gz' open a gzip compressed stream of tar blocks
1076 'r|bz2' open a bzip2 compressed stream of tar blocks
1077 'w|' open an uncompressed stream for writing
1078 'w|gz' open a gzip compressed stream for writing
1079 'w|bz2' open a bzip2 compressed stream for writing
1080 """
1081
1082 if not name and not fileobj:
Georg Brandle4751e32006-05-18 06:11:19 +00001083 raise ValueError("nothing to open")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001084
Martin v. Löwis78be7df2005-03-05 12:47:42 +00001085 if mode in ("r", "r:*"):
1086 # Find out which *open() is appropriate for opening the file.
1087 for comptype in cls.OPEN_METH:
1088 func = getattr(cls, cls.OPEN_METH[comptype])
1089 try:
1090 return func(name, "r", fileobj)
1091 except (ReadError, CompressionError):
1092 continue
Georg Brandle4751e32006-05-18 06:11:19 +00001093 raise ReadError("file could not be opened successfully")
Martin v. Löwis78be7df2005-03-05 12:47:42 +00001094
1095 elif ":" in mode:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001096 filemode, comptype = mode.split(":", 1)
1097 filemode = filemode or "r"
1098 comptype = comptype or "tar"
1099
1100 # Select the *open() function according to
1101 # given compression.
1102 if comptype in cls.OPEN_METH:
1103 func = getattr(cls, cls.OPEN_METH[comptype])
1104 else:
Georg Brandle4751e32006-05-18 06:11:19 +00001105 raise CompressionError("unknown compression type %r" % comptype)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001106 return func(name, filemode, fileobj)
1107
1108 elif "|" in mode:
1109 filemode, comptype = mode.split("|", 1)
1110 filemode = filemode or "r"
1111 comptype = comptype or "tar"
1112
1113 if filemode not in "rw":
Georg Brandle4751e32006-05-18 06:11:19 +00001114 raise ValueError("mode must be 'r' or 'w'")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001115
1116 t = cls(name, filemode,
1117 _Stream(name, filemode, comptype, fileobj, bufsize))
1118 t._extfileobj = False
1119 return t
1120
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001121 elif mode in "aw":
1122 return cls.taropen(name, mode, fileobj)
1123
Georg Brandle4751e32006-05-18 06:11:19 +00001124 raise ValueError("undiscernible mode")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001125
Guido van Rossum75b64e62005-01-16 00:16:11 +00001126 @classmethod
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001127 def taropen(cls, name, mode="r", fileobj=None):
1128 """Open uncompressed tar archive name for reading or writing.
1129 """
1130 if len(mode) > 1 or mode not in "raw":
Georg Brandle4751e32006-05-18 06:11:19 +00001131 raise ValueError("mode must be 'r', 'a' or 'w'")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001132 return cls(name, mode, fileobj)
1133
Guido van Rossum75b64e62005-01-16 00:16:11 +00001134 @classmethod
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001135 def gzopen(cls, name, mode="r", fileobj=None, compresslevel=9):
1136 """Open gzip compressed tar archive name for reading or writing.
1137 Appending is not allowed.
1138 """
1139 if len(mode) > 1 or mode not in "rw":
Georg Brandle4751e32006-05-18 06:11:19 +00001140 raise ValueError("mode must be 'r' or 'w'")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001141
1142 try:
1143 import gzip
Neal Norwitz4ec68242003-04-11 03:05:56 +00001144 gzip.GzipFile
1145 except (ImportError, AttributeError):
Georg Brandle4751e32006-05-18 06:11:19 +00001146 raise CompressionError("gzip module is not available")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001147
1148 pre, ext = os.path.splitext(name)
Martin v. Löwisfaffa152005-08-24 06:43:09 +00001149 pre = os.path.basename(pre)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001150 if ext == ".tgz":
1151 ext = ".tar"
1152 if ext == ".gz":
1153 ext = ""
Martin v. Löwisfaffa152005-08-24 06:43:09 +00001154 tarname = pre + ext
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001155
1156 if fileobj is None:
1157 fileobj = file(name, mode + "b")
1158
Martin v. Löwisfaffa152005-08-24 06:43:09 +00001159 if mode != "r":
1160 name = tarname
1161
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001162 try:
Martin v. Löwisfaffa152005-08-24 06:43:09 +00001163 t = cls.taropen(tarname, mode,
1164 gzip.GzipFile(name, mode, compresslevel, fileobj)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001165 )
1166 except IOError:
Georg Brandle4751e32006-05-18 06:11:19 +00001167 raise ReadError("not a gzip file")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001168 t._extfileobj = False
1169 return t
1170
Guido van Rossum75b64e62005-01-16 00:16:11 +00001171 @classmethod
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001172 def bz2open(cls, name, mode="r", fileobj=None, compresslevel=9):
1173 """Open bzip2 compressed tar archive name for reading or writing.
1174 Appending is not allowed.
1175 """
1176 if len(mode) > 1 or mode not in "rw":
Georg Brandle4751e32006-05-18 06:11:19 +00001177 raise ValueError("mode must be 'r' or 'w'.")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001178
1179 try:
1180 import bz2
1181 except ImportError:
Georg Brandle4751e32006-05-18 06:11:19 +00001182 raise CompressionError("bz2 module is not available")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001183
Martin v. Löwisfaffa152005-08-24 06:43:09 +00001184 pre, ext = os.path.splitext(name)
1185 pre = os.path.basename(pre)
1186 if ext == ".tbz2":
1187 ext = ".tar"
1188 if ext == ".bz2":
1189 ext = ""
1190 tarname = pre + ext
1191
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001192 if fileobj is not None:
Georg Brandl49c8f4c2006-05-15 19:30:35 +00001193 fileobj = _BZ2Proxy(fileobj, mode)
1194 else:
1195 fileobj = bz2.BZ2File(name, mode, compresslevel=compresslevel)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001196
1197 try:
Georg Brandl49c8f4c2006-05-15 19:30:35 +00001198 t = cls.taropen(tarname, mode, fileobj)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001199 except IOError:
Georg Brandle4751e32006-05-18 06:11:19 +00001200 raise ReadError("not a bzip2 file")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001201 t._extfileobj = False
1202 return t
1203
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001204 # All *open() methods are registered here.
1205 OPEN_METH = {
1206 "tar": "taropen", # uncompressed tar
1207 "gz": "gzopen", # gzip compressed tar
1208 "bz2": "bz2open" # bzip2 compressed tar
1209 }
1210
1211 #--------------------------------------------------------------------------
1212 # The public methods which TarFile provides:
1213
1214 def close(self):
1215 """Close the TarFile. In write-mode, two finishing zero blocks are
1216 appended to the archive.
1217 """
1218 if self.closed:
1219 return
1220
1221 if self._mode in "aw":
1222 self.fileobj.write(NUL * (BLOCKSIZE * 2))
1223 self.offset += (BLOCKSIZE * 2)
1224 # fill up the end with zero-blocks
1225 # (like option -b20 for tar does)
1226 blocks, remainder = divmod(self.offset, RECORDSIZE)
1227 if remainder > 0:
1228 self.fileobj.write(NUL * (RECORDSIZE - remainder))
1229
1230 if not self._extfileobj:
1231 self.fileobj.close()
1232 self.closed = True
1233
1234 def getmember(self, name):
1235 """Return a TarInfo object for member `name'. If `name' can not be
1236 found in the archive, KeyError is raised. If a member occurs more
1237 than once in the archive, its last occurence is assumed to be the
1238 most up-to-date version.
1239 """
Martin v. Löwisf3c56112004-09-18 09:08:52 +00001240 tarinfo = self._getmember(name)
1241 if tarinfo is None:
Georg Brandle4751e32006-05-18 06:11:19 +00001242 raise KeyError("filename %r not found" % name)
Martin v. Löwisf3c56112004-09-18 09:08:52 +00001243 return tarinfo
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001244
1245 def getmembers(self):
1246 """Return the members of the archive as a list of TarInfo objects. The
1247 list has the same order as the members in the archive.
1248 """
1249 self._check()
1250 if not self._loaded: # if we want to obtain a list of
1251 self._load() # all members, we first have to
1252 # scan the whole archive.
1253 return self.members
1254
1255 def getnames(self):
1256 """Return the members of the archive as a list of their names. It has
1257 the same order as the list returned by getmembers().
1258 """
Martin v. Löwisf3c56112004-09-18 09:08:52 +00001259 return [tarinfo.name for tarinfo in self.getmembers()]
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001260
1261 def gettarinfo(self, name=None, arcname=None, fileobj=None):
1262 """Create a TarInfo object for either the file `name' or the file
1263 object `fileobj' (using os.fstat on its file descriptor). You can
1264 modify some of the TarInfo's attributes before you add it using
1265 addfile(). If given, `arcname' specifies an alternative name for the
1266 file in the archive.
1267 """
1268 self._check("aw")
1269
1270 # When fileobj is given, replace name by
1271 # fileobj's real name.
1272 if fileobj is not None:
1273 name = fileobj.name
1274
1275 # Building the name of the member in the archive.
1276 # Backward slashes are converted to forward slashes,
1277 # Absolute paths are turned to relative paths.
1278 if arcname is None:
1279 arcname = name
1280 arcname = normpath(arcname)
1281 drv, arcname = os.path.splitdrive(arcname)
1282 while arcname[0:1] == "/":
1283 arcname = arcname[1:]
1284
1285 # Now, fill the TarInfo object with
1286 # information specific for the file.
1287 tarinfo = TarInfo()
1288
1289 # Use os.stat or os.lstat, depending on platform
1290 # and if symlinks shall be resolved.
1291 if fileobj is None:
1292 if hasattr(os, "lstat") and not self.dereference:
1293 statres = os.lstat(name)
1294 else:
1295 statres = os.stat(name)
1296 else:
1297 statres = os.fstat(fileobj.fileno())
1298 linkname = ""
1299
1300 stmd = statres.st_mode
1301 if stat.S_ISREG(stmd):
1302 inode = (statres.st_ino, statres.st_dev)
Neal Norwitzb0e32e22005-10-20 04:50:13 +00001303 if not self.dereference and \
1304 statres.st_nlink > 1 and inode in self.inodes:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001305 # Is it a hardlink to an already
1306 # archived file?
1307 type = LNKTYPE
1308 linkname = self.inodes[inode]
1309 else:
1310 # The inode is added only if its valid.
1311 # For win32 it is always 0.
1312 type = REGTYPE
1313 if inode[0]:
1314 self.inodes[inode] = arcname
1315 elif stat.S_ISDIR(stmd):
1316 type = DIRTYPE
1317 if arcname[-1:] != "/":
1318 arcname += "/"
1319 elif stat.S_ISFIFO(stmd):
1320 type = FIFOTYPE
1321 elif stat.S_ISLNK(stmd):
1322 type = SYMTYPE
1323 linkname = os.readlink(name)
1324 elif stat.S_ISCHR(stmd):
1325 type = CHRTYPE
1326 elif stat.S_ISBLK(stmd):
1327 type = BLKTYPE
1328 else:
1329 return None
1330
1331 # Fill the TarInfo object with all
1332 # information we can get.
Martin v. Löwis5dbdc592005-08-27 10:07:56 +00001333 tarinfo.name = arcname
1334 tarinfo.mode = stmd
1335 tarinfo.uid = statres.st_uid
1336 tarinfo.gid = statres.st_gid
1337 if stat.S_ISREG(stmd):
Martin v. Löwis61d77e02004-08-20 06:35:46 +00001338 tarinfo.size = statres.st_size
Martin v. Löwis5dbdc592005-08-27 10:07:56 +00001339 else:
1340 tarinfo.size = 0L
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001341 tarinfo.mtime = statres.st_mtime
Martin v. Löwis5dbdc592005-08-27 10:07:56 +00001342 tarinfo.type = type
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001343 tarinfo.linkname = linkname
1344 if pwd:
1345 try:
1346 tarinfo.uname = pwd.getpwuid(tarinfo.uid)[0]
1347 except KeyError:
1348 pass
1349 if grp:
1350 try:
1351 tarinfo.gname = grp.getgrgid(tarinfo.gid)[0]
1352 except KeyError:
1353 pass
1354
1355 if type in (CHRTYPE, BLKTYPE):
1356 if hasattr(os, "major") and hasattr(os, "minor"):
1357 tarinfo.devmajor = os.major(statres.st_rdev)
1358 tarinfo.devminor = os.minor(statres.st_rdev)
1359 return tarinfo
1360
1361 def list(self, verbose=True):
1362 """Print a table of contents to sys.stdout. If `verbose' is False, only
1363 the names of the members are printed. If it is True, an `ls -l'-like
1364 output is produced.
1365 """
1366 self._check()
1367
1368 for tarinfo in self:
1369 if verbose:
1370 print filemode(tarinfo.mode),
1371 print "%s/%s" % (tarinfo.uname or tarinfo.uid,
1372 tarinfo.gname or tarinfo.gid),
1373 if tarinfo.ischr() or tarinfo.isblk():
1374 print "%10s" % ("%d,%d" \
1375 % (tarinfo.devmajor, tarinfo.devminor)),
1376 else:
1377 print "%10d" % tarinfo.size,
1378 print "%d-%02d-%02d %02d:%02d:%02d" \
1379 % time.localtime(tarinfo.mtime)[:6],
1380
1381 print tarinfo.name,
1382
1383 if verbose:
1384 if tarinfo.issym():
1385 print "->", tarinfo.linkname,
1386 if tarinfo.islnk():
1387 print "link to", tarinfo.linkname,
1388 print
1389
1390 def add(self, name, arcname=None, recursive=True):
1391 """Add the file `name' to the archive. `name' may be any type of file
1392 (directory, fifo, symbolic link, etc.). If given, `arcname'
1393 specifies an alternative name for the file in the archive.
1394 Directories are added recursively by default. This can be avoided by
1395 setting `recursive' to False.
1396 """
1397 self._check("aw")
1398
1399 if arcname is None:
1400 arcname = name
1401
1402 # Skip if somebody tries to archive the archive...
Martin v. Löwisfaffa152005-08-24 06:43:09 +00001403 if self.name is not None \
1404 and os.path.abspath(name) == os.path.abspath(self.name):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001405 self._dbg(2, "tarfile: Skipped %r" % name)
1406 return
1407
1408 # Special case: The user wants to add the current
1409 # working directory.
1410 if name == ".":
1411 if recursive:
1412 if arcname == ".":
1413 arcname = ""
1414 for f in os.listdir("."):
1415 self.add(f, os.path.join(arcname, f))
1416 return
1417
1418 self._dbg(1, name)
1419
1420 # Create a TarInfo object from the file.
1421 tarinfo = self.gettarinfo(name, arcname)
1422
1423 if tarinfo is None:
1424 self._dbg(1, "tarfile: Unsupported type %r" % name)
1425 return
1426
1427 # Append the tar header and data to the archive.
1428 if tarinfo.isreg():
1429 f = file(name, "rb")
1430 self.addfile(tarinfo, f)
1431 f.close()
1432
Martin v. Löwis5dbdc592005-08-27 10:07:56 +00001433 elif tarinfo.isdir():
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001434 self.addfile(tarinfo)
1435 if recursive:
1436 for f in os.listdir(name):
1437 self.add(os.path.join(name, f), os.path.join(arcname, f))
1438
Martin v. Löwis5dbdc592005-08-27 10:07:56 +00001439 else:
1440 self.addfile(tarinfo)
1441
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001442 def addfile(self, tarinfo, fileobj=None):
1443 """Add the TarInfo object `tarinfo' to the archive. If `fileobj' is
1444 given, tarinfo.size bytes are read from it and added to the archive.
1445 You can create TarInfo objects using gettarinfo().
1446 On Windows platforms, `fileobj' should always be opened with mode
1447 'rb' to avoid irritation about the file size.
1448 """
1449 self._check("aw")
1450
Georg Brandl2527f7f2006-10-29 09:16:15 +00001451 tarinfo = copy.copy(tarinfo)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001452
Georg Brandl2527f7f2006-10-29 09:16:15 +00001453 buf = tarinfo.tobuf(self.posix)
1454 self.fileobj.write(buf)
1455 self.offset += len(buf)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001456
1457 # If there's data to follow, append it.
1458 if fileobj is not None:
1459 copyfileobj(fileobj, self.fileobj, tarinfo.size)
1460 blocks, remainder = divmod(tarinfo.size, BLOCKSIZE)
1461 if remainder > 0:
1462 self.fileobj.write(NUL * (BLOCKSIZE - remainder))
1463 blocks += 1
1464 self.offset += blocks * BLOCKSIZE
1465
Martin v. Löwisf3c56112004-09-18 09:08:52 +00001466 self.members.append(tarinfo)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001467
Martin v. Löwis00a73e72005-03-04 19:40:34 +00001468 def extractall(self, path=".", members=None):
1469 """Extract all members from the archive to the current working
1470 directory and set owner, modification time and permissions on
1471 directories afterwards. `path' specifies a different directory
1472 to extract to. `members' is optional and must be a subset of the
1473 list returned by getmembers().
1474 """
1475 directories = []
1476
1477 if members is None:
1478 members = self
1479
1480 for tarinfo in members:
1481 if tarinfo.isdir():
1482 # Extract directory with a safe mode, so that
1483 # all files below can be extracted as well.
1484 try:
1485 os.makedirs(os.path.join(path, tarinfo.name), 0777)
1486 except EnvironmentError:
1487 pass
1488 directories.append(tarinfo)
1489 else:
1490 self.extract(tarinfo, path)
1491
1492 # Reverse sort directories.
1493 directories.sort(lambda a, b: cmp(a.name, b.name))
1494 directories.reverse()
1495
1496 # Set correct owner, mtime and filemode on directories.
1497 for tarinfo in directories:
1498 path = os.path.join(path, tarinfo.name)
1499 try:
1500 self.chown(tarinfo, path)
1501 self.utime(tarinfo, path)
1502 self.chmod(tarinfo, path)
1503 except ExtractError, e:
1504 if self.errorlevel > 1:
1505 raise
1506 else:
1507 self._dbg(1, "tarfile: %s" % e)
1508
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001509 def extract(self, member, path=""):
1510 """Extract a member from the archive to the current working directory,
1511 using its full name. Its file information is extracted as accurately
1512 as possible. `member' may be a filename or a TarInfo object. You can
1513 specify a different directory using `path'.
1514 """
1515 self._check("r")
1516
1517 if isinstance(member, TarInfo):
1518 tarinfo = member
1519 else:
1520 tarinfo = self.getmember(member)
1521
Neal Norwitza4f651a2004-07-20 22:07:44 +00001522 # Prepare the link target for makelink().
1523 if tarinfo.islnk():
1524 tarinfo._link_target = os.path.join(path, tarinfo.linkname)
1525
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001526 try:
1527 self._extract_member(tarinfo, os.path.join(path, tarinfo.name))
1528 except EnvironmentError, e:
1529 if self.errorlevel > 0:
1530 raise
1531 else:
1532 if e.filename is None:
1533 self._dbg(1, "tarfile: %s" % e.strerror)
1534 else:
1535 self._dbg(1, "tarfile: %s %r" % (e.strerror, e.filename))
1536 except ExtractError, e:
1537 if self.errorlevel > 1:
1538 raise
1539 else:
1540 self._dbg(1, "tarfile: %s" % e)
1541
1542 def extractfile(self, member):
1543 """Extract a member from the archive as a file object. `member' may be
1544 a filename or a TarInfo object. If `member' is a regular file, a
1545 file-like object is returned. If `member' is a link, a file-like
1546 object is constructed from the link's target. If `member' is none of
1547 the above, None is returned.
1548 The file-like object is read-only and provides the following
1549 methods: read(), readline(), readlines(), seek() and tell()
1550 """
1551 self._check("r")
1552
1553 if isinstance(member, TarInfo):
1554 tarinfo = member
1555 else:
1556 tarinfo = self.getmember(member)
1557
1558 if tarinfo.isreg():
1559 return self.fileobject(self, tarinfo)
1560
1561 elif tarinfo.type not in SUPPORTED_TYPES:
1562 # If a member's type is unknown, it is treated as a
1563 # regular file.
1564 return self.fileobject(self, tarinfo)
1565
1566 elif tarinfo.islnk() or tarinfo.issym():
1567 if isinstance(self.fileobj, _Stream):
1568 # A small but ugly workaround for the case that someone tries
1569 # to extract a (sym)link as a file-object from a non-seekable
1570 # stream of tar blocks.
Georg Brandle4751e32006-05-18 06:11:19 +00001571 raise StreamError("cannot extract (sym)link as file object")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001572 else:
Georg Brandl7eb4b7d2005-07-22 21:49:32 +00001573 # A (sym)link's file object is its target's file object.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001574 return self.extractfile(self._getmember(tarinfo.linkname,
1575 tarinfo))
1576 else:
1577 # If there's no data associated with the member (directory, chrdev,
1578 # blkdev, etc.), return None instead of a file object.
1579 return None
1580
1581 def _extract_member(self, tarinfo, targetpath):
1582 """Extract the TarInfo object tarinfo to a physical
1583 file called targetpath.
1584 """
1585 # Fetch the TarInfo object for the given name
1586 # and build the destination pathname, replacing
1587 # forward slashes to platform specific separators.
1588 if targetpath[-1:] == "/":
1589 targetpath = targetpath[:-1]
1590 targetpath = os.path.normpath(targetpath)
1591
1592 # Create all upper directories.
1593 upperdirs = os.path.dirname(targetpath)
1594 if upperdirs and not os.path.exists(upperdirs):
1595 ti = TarInfo()
1596 ti.name = upperdirs
1597 ti.type = DIRTYPE
1598 ti.mode = 0777
1599 ti.mtime = tarinfo.mtime
1600 ti.uid = tarinfo.uid
1601 ti.gid = tarinfo.gid
1602 ti.uname = tarinfo.uname
1603 ti.gname = tarinfo.gname
1604 try:
1605 self._extract_member(ti, ti.name)
1606 except:
1607 pass
1608
1609 if tarinfo.islnk() or tarinfo.issym():
1610 self._dbg(1, "%s -> %s" % (tarinfo.name, tarinfo.linkname))
1611 else:
1612 self._dbg(1, tarinfo.name)
1613
1614 if tarinfo.isreg():
1615 self.makefile(tarinfo, targetpath)
1616 elif tarinfo.isdir():
1617 self.makedir(tarinfo, targetpath)
1618 elif tarinfo.isfifo():
1619 self.makefifo(tarinfo, targetpath)
1620 elif tarinfo.ischr() or tarinfo.isblk():
1621 self.makedev(tarinfo, targetpath)
1622 elif tarinfo.islnk() or tarinfo.issym():
1623 self.makelink(tarinfo, targetpath)
1624 elif tarinfo.type not in SUPPORTED_TYPES:
1625 self.makeunknown(tarinfo, targetpath)
1626 else:
1627 self.makefile(tarinfo, targetpath)
1628
1629 self.chown(tarinfo, targetpath)
1630 if not tarinfo.issym():
1631 self.chmod(tarinfo, targetpath)
1632 self.utime(tarinfo, targetpath)
1633
1634 #--------------------------------------------------------------------------
1635 # Below are the different file methods. They are called via
1636 # _extract_member() when extract() is called. They can be replaced in a
1637 # subclass to implement other functionality.
1638
1639 def makedir(self, tarinfo, targetpath):
1640 """Make a directory called targetpath.
1641 """
1642 try:
1643 os.mkdir(targetpath)
1644 except EnvironmentError, e:
1645 if e.errno != errno.EEXIST:
1646 raise
1647
1648 def makefile(self, tarinfo, targetpath):
1649 """Make a file called targetpath.
1650 """
1651 source = self.extractfile(tarinfo)
1652 target = file(targetpath, "wb")
1653 copyfileobj(source, target)
1654 source.close()
1655 target.close()
1656
1657 def makeunknown(self, tarinfo, targetpath):
1658 """Make a file from a TarInfo object with an unknown type
1659 at targetpath.
1660 """
1661 self.makefile(tarinfo, targetpath)
1662 self._dbg(1, "tarfile: Unknown file type %r, " \
1663 "extracted as regular file." % tarinfo.type)
1664
1665 def makefifo(self, tarinfo, targetpath):
1666 """Make a fifo called targetpath.
1667 """
1668 if hasattr(os, "mkfifo"):
1669 os.mkfifo(targetpath)
1670 else:
Georg Brandle4751e32006-05-18 06:11:19 +00001671 raise ExtractError("fifo not supported by system")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001672
1673 def makedev(self, tarinfo, targetpath):
1674 """Make a character or block device called targetpath.
1675 """
1676 if not hasattr(os, "mknod") or not hasattr(os, "makedev"):
Georg Brandle4751e32006-05-18 06:11:19 +00001677 raise ExtractError("special devices not supported by system")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001678
1679 mode = tarinfo.mode
1680 if tarinfo.isblk():
1681 mode |= stat.S_IFBLK
1682 else:
1683 mode |= stat.S_IFCHR
1684
1685 os.mknod(targetpath, mode,
1686 os.makedev(tarinfo.devmajor, tarinfo.devminor))
1687
1688 def makelink(self, tarinfo, targetpath):
1689 """Make a (symbolic) link called targetpath. If it cannot be created
1690 (platform limitation), we try to make a copy of the referenced file
1691 instead of a link.
1692 """
1693 linkpath = tarinfo.linkname
1694 try:
1695 if tarinfo.issym():
1696 os.symlink(linkpath, targetpath)
1697 else:
Neal Norwitza4f651a2004-07-20 22:07:44 +00001698 # See extract().
1699 os.link(tarinfo._link_target, targetpath)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001700 except AttributeError:
1701 if tarinfo.issym():
1702 linkpath = os.path.join(os.path.dirname(tarinfo.name),
1703 linkpath)
1704 linkpath = normpath(linkpath)
1705
1706 try:
1707 self._extract_member(self.getmember(linkpath), targetpath)
1708 except (EnvironmentError, KeyError), e:
1709 linkpath = os.path.normpath(linkpath)
1710 try:
1711 shutil.copy2(linkpath, targetpath)
1712 except EnvironmentError, e:
Georg Brandle4751e32006-05-18 06:11:19 +00001713 raise IOError("link could not be created")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001714
1715 def chown(self, tarinfo, targetpath):
1716 """Set owner of targetpath according to tarinfo.
1717 """
1718 if pwd and hasattr(os, "geteuid") and os.geteuid() == 0:
1719 # We have to be root to do so.
1720 try:
1721 g = grp.getgrnam(tarinfo.gname)[2]
1722 except KeyError:
1723 try:
1724 g = grp.getgrgid(tarinfo.gid)[2]
1725 except KeyError:
1726 g = os.getgid()
1727 try:
1728 u = pwd.getpwnam(tarinfo.uname)[2]
1729 except KeyError:
1730 try:
1731 u = pwd.getpwuid(tarinfo.uid)[2]
1732 except KeyError:
1733 u = os.getuid()
1734 try:
1735 if tarinfo.issym() and hasattr(os, "lchown"):
1736 os.lchown(targetpath, u, g)
1737 else:
Andrew MacIntyre7970d202003-02-19 12:51:34 +00001738 if sys.platform != "os2emx":
1739 os.chown(targetpath, u, g)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001740 except EnvironmentError, e:
Georg Brandle4751e32006-05-18 06:11:19 +00001741 raise ExtractError("could not change owner")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001742
1743 def chmod(self, tarinfo, targetpath):
1744 """Set file permissions of targetpath according to tarinfo.
1745 """
Jack Jansen834eff62003-03-07 12:47:06 +00001746 if hasattr(os, 'chmod'):
1747 try:
1748 os.chmod(targetpath, tarinfo.mode)
1749 except EnvironmentError, e:
Georg Brandle4751e32006-05-18 06:11:19 +00001750 raise ExtractError("could not change mode")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001751
1752 def utime(self, tarinfo, targetpath):
1753 """Set modification time of targetpath according to tarinfo.
1754 """
Jack Jansen834eff62003-03-07 12:47:06 +00001755 if not hasattr(os, 'utime'):
Tim Petersf9347782003-03-07 15:36:41 +00001756 return
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001757 if sys.platform == "win32" and tarinfo.isdir():
1758 # According to msdn.microsoft.com, it is an error (EACCES)
1759 # to use utime() on directories.
1760 return
1761 try:
1762 os.utime(targetpath, (tarinfo.mtime, tarinfo.mtime))
1763 except EnvironmentError, e:
Georg Brandle4751e32006-05-18 06:11:19 +00001764 raise ExtractError("could not change modification time")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001765
1766 #--------------------------------------------------------------------------
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001767 def next(self):
1768 """Return the next member of the archive as a TarInfo object, when
1769 TarFile is opened for reading. Return None if there is no more
1770 available.
1771 """
1772 self._check("ra")
1773 if self.firstmember is not None:
1774 m = self.firstmember
1775 self.firstmember = None
1776 return m
1777
1778 # Read the next block.
Andrew M. Kuchling864bba12004-07-10 22:02:11 +00001779 self.fileobj.seek(self.offset)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001780 while True:
1781 buf = self.fileobj.read(BLOCKSIZE)
1782 if not buf:
1783 return None
Georg Brandl38c6a222006-05-10 16:26:03 +00001784
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001785 try:
1786 tarinfo = TarInfo.frombuf(buf)
Tim Peters8a299d22006-05-19 19:16:34 +00001787
Georg Brandl38c6a222006-05-10 16:26:03 +00001788 # Set the TarInfo object's offset to the current position of the
1789 # TarFile and set self.offset to the position where the data blocks
1790 # should begin.
1791 tarinfo.offset = self.offset
1792 self.offset += BLOCKSIZE
1793
1794 tarinfo = self.proc_member(tarinfo)
1795
1796 except ValueError, e:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001797 if self.ignore_zeros:
Georg Brandle4751e32006-05-18 06:11:19 +00001798 self._dbg(2, "0x%X: empty or invalid block: %s" %
1799 (self.offset, e))
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001800 self.offset += BLOCKSIZE
1801 continue
1802 else:
Andrew M. Kuchling864bba12004-07-10 22:02:11 +00001803 if self.offset == 0:
Georg Brandle4751e32006-05-18 06:11:19 +00001804 raise ReadError("empty, unreadable or compressed "
1805 "file: %s" % e)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001806 return None
1807 break
1808
Georg Brandl38c6a222006-05-10 16:26:03 +00001809 # Some old tar programs represent a directory as a regular
1810 # file with a trailing slash.
1811 if tarinfo.isreg() and tarinfo.name.endswith("/"):
1812 tarinfo.type = DIRTYPE
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001813
Georg Brandl38c6a222006-05-10 16:26:03 +00001814 # Directory names should have a '/' at the end.
1815 if tarinfo.isdir():
1816 tarinfo.name += "/"
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001817
Martin v. Löwisf3c56112004-09-18 09:08:52 +00001818 self.members.append(tarinfo)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001819 return tarinfo
1820
1821 #--------------------------------------------------------------------------
Georg Brandl38c6a222006-05-10 16:26:03 +00001822 # The following are methods that are called depending on the type of a
1823 # member. The entry point is proc_member() which is called with a TarInfo
1824 # object created from the header block from the current offset. The
1825 # proc_member() method can be overridden in a subclass to add custom
1826 # proc_*() methods. A proc_*() method MUST implement the following
1827 # operations:
1828 # 1. Set tarinfo.offset_data to the position where the data blocks begin,
1829 # if there is data that follows.
1830 # 2. Set self.offset to the position where the next member's header will
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001831 # begin.
Georg Brandl38c6a222006-05-10 16:26:03 +00001832 # 3. Return tarinfo or another valid TarInfo object.
1833 def proc_member(self, tarinfo):
1834 """Choose the right processing method for tarinfo depending
1835 on its type and call it.
1836 """
1837 if tarinfo.type in (GNUTYPE_LONGNAME, GNUTYPE_LONGLINK):
1838 return self.proc_gnulong(tarinfo)
1839 elif tarinfo.type == GNUTYPE_SPARSE:
1840 return self.proc_sparse(tarinfo)
1841 else:
1842 return self.proc_builtin(tarinfo)
1843
1844 def proc_builtin(self, tarinfo):
1845 """Process a builtin type member or an unknown member
1846 which will be treated as a regular file.
1847 """
1848 tarinfo.offset_data = self.offset
1849 if tarinfo.isreg() or tarinfo.type not in SUPPORTED_TYPES:
1850 # Skip the following data blocks.
1851 self.offset += self._block(tarinfo.size)
1852 return tarinfo
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001853
1854 def proc_gnulong(self, tarinfo):
Georg Brandl38c6a222006-05-10 16:26:03 +00001855 """Process the blocks that hold a GNU longname
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001856 or longlink member.
1857 """
1858 buf = ""
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001859 count = tarinfo.size
1860 while count > 0:
1861 block = self.fileobj.read(BLOCKSIZE)
1862 buf += block
1863 self.offset += BLOCKSIZE
1864 count -= BLOCKSIZE
1865
Georg Brandl38c6a222006-05-10 16:26:03 +00001866 # Fetch the next header and process it.
1867 b = self.fileobj.read(BLOCKSIZE)
1868 t = TarInfo.frombuf(b)
1869 t.offset = self.offset
1870 self.offset += BLOCKSIZE
1871 next = self.proc_member(t)
Andrew M. Kuchling864bba12004-07-10 22:02:11 +00001872
Georg Brandl38c6a222006-05-10 16:26:03 +00001873 # Patch the TarInfo object from the next header with
1874 # the longname information.
Andrew M. Kuchling864bba12004-07-10 22:02:11 +00001875 next.offset = tarinfo.offset
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001876 if tarinfo.type == GNUTYPE_LONGNAME:
Georg Brandle8953182006-05-27 14:02:03 +00001877 next.name = buf.rstrip(NUL)
Andrew M. Kuchling864bba12004-07-10 22:02:11 +00001878 elif tarinfo.type == GNUTYPE_LONGLINK:
Georg Brandle8953182006-05-27 14:02:03 +00001879 next.linkname = buf.rstrip(NUL)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001880
Andrew M. Kuchling864bba12004-07-10 22:02:11 +00001881 return next
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001882
1883 def proc_sparse(self, tarinfo):
Georg Brandl38c6a222006-05-10 16:26:03 +00001884 """Process a GNU sparse header plus extra headers.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001885 """
Georg Brandl38c6a222006-05-10 16:26:03 +00001886 buf = tarinfo.buf
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001887 sp = _ringbuffer()
1888 pos = 386
1889 lastpos = 0L
1890 realpos = 0L
1891 # There are 4 possible sparse structs in the
1892 # first header.
1893 for i in xrange(4):
1894 try:
Georg Brandl38c6a222006-05-10 16:26:03 +00001895 offset = nti(buf[pos:pos + 12])
1896 numbytes = nti(buf[pos + 12:pos + 24])
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001897 except ValueError:
1898 break
1899 if offset > lastpos:
1900 sp.append(_hole(lastpos, offset - lastpos))
1901 sp.append(_data(offset, numbytes, realpos))
1902 realpos += numbytes
1903 lastpos = offset + numbytes
1904 pos += 24
1905
1906 isextended = ord(buf[482])
Georg Brandl38c6a222006-05-10 16:26:03 +00001907 origsize = nti(buf[483:495])
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001908
1909 # If the isextended flag is given,
1910 # there are extra headers to process.
1911 while isextended == 1:
1912 buf = self.fileobj.read(BLOCKSIZE)
1913 self.offset += BLOCKSIZE
1914 pos = 0
1915 for i in xrange(21):
1916 try:
Georg Brandl38c6a222006-05-10 16:26:03 +00001917 offset = nti(buf[pos:pos + 12])
1918 numbytes = nti(buf[pos + 12:pos + 24])
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001919 except ValueError:
1920 break
1921 if offset > lastpos:
1922 sp.append(_hole(lastpos, offset - lastpos))
1923 sp.append(_data(offset, numbytes, realpos))
1924 realpos += numbytes
1925 lastpos = offset + numbytes
1926 pos += 24
1927 isextended = ord(buf[504])
1928
1929 if lastpos < origsize:
1930 sp.append(_hole(lastpos, origsize - lastpos))
1931
1932 tarinfo.sparse = sp
1933
1934 tarinfo.offset_data = self.offset
1935 self.offset += self._block(tarinfo.size)
1936 tarinfo.size = origsize
Andrew M. Kuchling864bba12004-07-10 22:02:11 +00001937
Georg Brandl38c6a222006-05-10 16:26:03 +00001938 return tarinfo
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001939
1940 #--------------------------------------------------------------------------
1941 # Little helper methods:
1942
1943 def _block(self, count):
1944 """Round up a byte count by BLOCKSIZE and return it,
1945 e.g. _block(834) => 1024.
1946 """
1947 blocks, remainder = divmod(count, BLOCKSIZE)
1948 if remainder:
1949 blocks += 1
1950 return blocks * BLOCKSIZE
1951
1952 def _getmember(self, name, tarinfo=None):
1953 """Find an archive member by name from bottom to top.
1954 If tarinfo is given, it is used as the starting point.
1955 """
Martin v. Löwisf3c56112004-09-18 09:08:52 +00001956 # Ensure that all members have been loaded.
1957 members = self.getmembers()
1958
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001959 if tarinfo is None:
Martin v. Löwisf3c56112004-09-18 09:08:52 +00001960 end = len(members)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001961 else:
Martin v. Löwisf3c56112004-09-18 09:08:52 +00001962 end = members.index(tarinfo)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001963
1964 for i in xrange(end - 1, -1, -1):
Martin v. Löwisf3c56112004-09-18 09:08:52 +00001965 if name == members[i].name:
1966 return members[i]
Andrew M. Kuchling864bba12004-07-10 22:02:11 +00001967
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001968 def _load(self):
1969 """Read through the entire archive file and look for readable
1970 members.
1971 """
1972 while True:
1973 tarinfo = self.next()
1974 if tarinfo is None:
1975 break
1976 self._loaded = True
1977
1978 def _check(self, mode=None):
1979 """Check if TarFile is still open, and if the operation's mode
1980 corresponds to TarFile's mode.
1981 """
1982 if self.closed:
Georg Brandle4751e32006-05-18 06:11:19 +00001983 raise IOError("%s is closed" % self.__class__.__name__)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001984 if mode is not None and self._mode not in mode:
Georg Brandle4751e32006-05-18 06:11:19 +00001985 raise IOError("bad operation for mode %r" % self._mode)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001986
1987 def __iter__(self):
1988 """Provide an iterator object.
1989 """
1990 if self._loaded:
1991 return iter(self.members)
1992 else:
1993 return TarIter(self)
1994
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001995 def _dbg(self, level, msg):
1996 """Write debugging output to sys.stderr.
1997 """
1998 if level <= self.debug:
1999 print >> sys.stderr, msg
2000# class TarFile
2001
2002class TarIter:
2003 """Iterator Class.
2004
2005 for tarinfo in TarFile(...):
2006 suite...
2007 """
2008
2009 def __init__(self, tarfile):
2010 """Construct a TarIter object.
2011 """
2012 self.tarfile = tarfile
Martin v. Löwis637431b2005-03-03 23:12:42 +00002013 self.index = 0
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002014 def __iter__(self):
2015 """Return iterator object.
2016 """
2017 return self
2018 def next(self):
2019 """Return the next item using TarFile's next() method.
2020 When all members have been read, set TarFile as _loaded.
2021 """
Martin v. Löwis637431b2005-03-03 23:12:42 +00002022 # Fix for SF #1100429: Under rare circumstances it can
2023 # happen that getmembers() is called during iteration,
2024 # which will cause TarIter to stop prematurely.
2025 if not self.tarfile._loaded:
2026 tarinfo = self.tarfile.next()
2027 if not tarinfo:
2028 self.tarfile._loaded = True
2029 raise StopIteration
2030 else:
2031 try:
2032 tarinfo = self.tarfile.members[self.index]
2033 except IndexError:
2034 raise StopIteration
2035 self.index += 1
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002036 return tarinfo
2037
2038# Helper classes for sparse file support
2039class _section:
2040 """Base class for _data and _hole.
2041 """
2042 def __init__(self, offset, size):
2043 self.offset = offset
2044 self.size = size
2045 def __contains__(self, offset):
2046 return self.offset <= offset < self.offset + self.size
2047
2048class _data(_section):
2049 """Represent a data section in a sparse file.
2050 """
2051 def __init__(self, offset, size, realpos):
2052 _section.__init__(self, offset, size)
2053 self.realpos = realpos
2054
2055class _hole(_section):
2056 """Represent a hole section in a sparse file.
2057 """
2058 pass
2059
2060class _ringbuffer(list):
2061 """Ringbuffer class which increases performance
2062 over a regular list.
2063 """
2064 def __init__(self):
2065 self.idx = 0
2066 def find(self, offset):
2067 idx = self.idx
2068 while True:
2069 item = self[idx]
2070 if offset in item:
2071 break
2072 idx += 1
2073 if idx == len(self):
2074 idx = 0
2075 if idx == self.idx:
2076 # End of File
2077 return None
2078 self.idx = idx
2079 return item
2080
2081#---------------------------------------------
2082# zipfile compatible TarFile class
2083#---------------------------------------------
2084TAR_PLAIN = 0 # zipfile.ZIP_STORED
2085TAR_GZIPPED = 8 # zipfile.ZIP_DEFLATED
2086class TarFileCompat:
2087 """TarFile class compatible with standard module zipfile's
2088 ZipFile class.
2089 """
2090 def __init__(self, file, mode="r", compression=TAR_PLAIN):
2091 if compression == TAR_PLAIN:
2092 self.tarfile = TarFile.taropen(file, mode)
2093 elif compression == TAR_GZIPPED:
2094 self.tarfile = TarFile.gzopen(file, mode)
2095 else:
Georg Brandle4751e32006-05-18 06:11:19 +00002096 raise ValueError("unknown compression constant")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002097 if mode[0:1] == "r":
2098 members = self.tarfile.getmembers()
Raymond Hettingera1d09e22005-09-11 16:34:05 +00002099 for m in members:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002100 m.filename = m.name
2101 m.file_size = m.size
2102 m.date_time = time.gmtime(m.mtime)[:6]
2103 def namelist(self):
2104 return map(lambda m: m.name, self.infolist())
2105 def infolist(self):
2106 return filter(lambda m: m.type in REGULAR_TYPES,
2107 self.tarfile.getmembers())
2108 def printdir(self):
2109 self.tarfile.list()
2110 def testzip(self):
2111 return
2112 def getinfo(self, name):
2113 return self.tarfile.getmember(name)
2114 def read(self, name):
2115 return self.tarfile.extractfile(self.tarfile.getmember(name)).read()
2116 def write(self, filename, arcname=None, compress_type=None):
2117 self.tarfile.add(filename, arcname)
2118 def writestr(self, zinfo, bytes):
Raymond Hettingera6172712004-12-31 19:15:26 +00002119 try:
2120 from cStringIO import StringIO
2121 except ImportError:
2122 from StringIO import StringIO
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002123 import calendar
2124 zinfo.name = zinfo.filename
2125 zinfo.size = zinfo.file_size
2126 zinfo.mtime = calendar.timegm(zinfo.date_time)
Raymond Hettingera6172712004-12-31 19:15:26 +00002127 self.tarfile.addfile(zinfo, StringIO(bytes))
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002128 def close(self):
2129 self.tarfile.close()
2130#class TarFileCompat
2131
2132#--------------------
2133# exported functions
2134#--------------------
2135def is_tarfile(name):
2136 """Return True if name points to a tar archive that we
2137 are able to handle, else return False.
2138 """
2139 try:
2140 t = open(name)
2141 t.close()
2142 return True
2143 except TarError:
2144 return False
2145
2146open = TarFile.open