blob: b5f9f3034655c5fd1c79600aed864fcb2e02acd3 [file] [log] [blame]
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001#!/usr/bin/env python
2# -*- coding: iso-8859-1 -*-
3#-------------------------------------------------------------------
4# tarfile.py
5#-------------------------------------------------------------------
6# Copyright (C) 2002 Lars Gustäbel <lars@gustaebel.de>
7# All rights reserved.
8#
9# Permission is hereby granted, free of charge, to any person
10# obtaining a copy of this software and associated documentation
11# files (the "Software"), to deal in the Software without
12# restriction, including without limitation the rights to use,
13# copy, modify, merge, publish, distribute, sublicense, and/or sell
14# copies of the Software, and to permit persons to whom the
15# Software is furnished to do so, subject to the following
16# conditions:
17#
18# The above copyright notice and this permission notice shall be
19# included in all copies or substantial portions of the Software.
20#
21# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
22# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
23# OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
24# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
25# HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
26# WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
27# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
28# OTHER DEALINGS IN THE SOFTWARE.
29#
30"""Read from and write to tar format archives.
31"""
32
33__version__ = "$Revision$"
34# $Source$
35
Georg Brandl38c6a222006-05-10 16:26:03 +000036version = "0.8.0"
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +000037__author__ = "Lars Gustäbel (lars@gustaebel.de)"
38__date__ = "$Date$"
39__cvsid__ = "$Id$"
40__credits__ = "Gustavo Niemeyer, Niels Gustäbel, Richard Townsend."
41
42#---------
43# Imports
44#---------
45import sys
46import os
47import shutil
48import stat
49import errno
50import time
51import struct
Georg Brandl3354f282006-10-29 09:16:12 +000052import copy
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +000053
Jack Jansencfc49022003-03-07 13:37:32 +000054if sys.platform == 'mac':
55 # This module needs work for MacOS9, especially in the area of pathname
56 # handling. In many places it is assumed a simple substitution of / by the
57 # local os.path.sep is good enough to convert pathnames, but this does not
58 # work with the mac rooted:path:name versus :nonrooted:path:name syntax
59 raise ImportError, "tarfile does not work for platform==mac"
60
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +000061try:
62 import grp, pwd
63except ImportError:
64 grp = pwd = None
65
66# from tarfile import *
67__all__ = ["TarFile", "TarInfo", "is_tarfile", "TarError"]
68
69#---------------------------------------------------------
70# tar constants
71#---------------------------------------------------------
72NUL = "\0" # the null character
73BLOCKSIZE = 512 # length of processing blocks
74RECORDSIZE = BLOCKSIZE * 20 # length of records
75MAGIC = "ustar" # magic tar string
76VERSION = "00" # version number
77
78LENGTH_NAME = 100 # maximum length of a filename
79LENGTH_LINK = 100 # maximum length of a linkname
80LENGTH_PREFIX = 155 # maximum length of the prefix field
81MAXSIZE_MEMBER = 077777777777L # maximum size of a file (11 octal digits)
82
83REGTYPE = "0" # regular file
84AREGTYPE = "\0" # regular file
85LNKTYPE = "1" # link (inside tarfile)
86SYMTYPE = "2" # symbolic link
87CHRTYPE = "3" # character special device
88BLKTYPE = "4" # block special device
89DIRTYPE = "5" # directory
90FIFOTYPE = "6" # fifo special device
91CONTTYPE = "7" # contiguous file
92
93GNUTYPE_LONGNAME = "L" # GNU tar extension for longnames
94GNUTYPE_LONGLINK = "K" # GNU tar extension for longlink
95GNUTYPE_SPARSE = "S" # GNU tar extension for sparse file
96
97#---------------------------------------------------------
98# tarfile constants
99#---------------------------------------------------------
100SUPPORTED_TYPES = (REGTYPE, AREGTYPE, LNKTYPE, # file types that tarfile
101 SYMTYPE, DIRTYPE, FIFOTYPE, # can cope with.
102 CONTTYPE, CHRTYPE, BLKTYPE,
103 GNUTYPE_LONGNAME, GNUTYPE_LONGLINK,
104 GNUTYPE_SPARSE)
105
106REGULAR_TYPES = (REGTYPE, AREGTYPE, # file types that somehow
107 CONTTYPE, GNUTYPE_SPARSE) # represent regular files
108
109#---------------------------------------------------------
110# Bits used in the mode field, values in octal.
111#---------------------------------------------------------
112S_IFLNK = 0120000 # symbolic link
113S_IFREG = 0100000 # regular file
114S_IFBLK = 0060000 # block device
115S_IFDIR = 0040000 # directory
116S_IFCHR = 0020000 # character device
117S_IFIFO = 0010000 # fifo
118
119TSUID = 04000 # set UID on execution
120TSGID = 02000 # set GID on execution
121TSVTX = 01000 # reserved
122
123TUREAD = 0400 # read by owner
124TUWRITE = 0200 # write by owner
125TUEXEC = 0100 # execute/search by owner
126TGREAD = 0040 # read by group
127TGWRITE = 0020 # write by group
128TGEXEC = 0010 # execute/search by group
129TOREAD = 0004 # read by other
130TOWRITE = 0002 # write by other
131TOEXEC = 0001 # execute/search by other
132
133#---------------------------------------------------------
134# Some useful functions
135#---------------------------------------------------------
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000136
Georg Brandl38c6a222006-05-10 16:26:03 +0000137def stn(s, length):
138 """Convert a python string to a null-terminated string buffer.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000139 """
Georg Brandla32e0a02006-10-24 16:54:16 +0000140 return s[:length] + (length - len(s)) * NUL
Georg Brandl38c6a222006-05-10 16:26:03 +0000141
142def nti(s):
143 """Convert a number field to a python number.
144 """
145 # There are two possible encodings for a number field, see
146 # itn() below.
147 if s[0] != chr(0200):
Georg Brandl35207712006-10-12 12:03:07 +0000148 n = int(s.rstrip(NUL + " ") or "0", 8)
Georg Brandl38c6a222006-05-10 16:26:03 +0000149 else:
150 n = 0L
151 for i in xrange(len(s) - 1):
152 n <<= 8
153 n += ord(s[i + 1])
154 return n
155
156def itn(n, digits=8, posix=False):
157 """Convert a python number to a number field.
158 """
159 # POSIX 1003.1-1988 requires numbers to be encoded as a string of
160 # octal digits followed by a null-byte, this allows values up to
161 # (8**(digits-1))-1. GNU tar allows storing numbers greater than
162 # that if necessary. A leading 0200 byte indicates this particular
163 # encoding, the following digits-1 bytes are a big-endian
164 # representation. This allows values up to (256**(digits-1))-1.
165 if 0 <= n < 8 ** (digits - 1):
166 s = "%0*o" % (digits - 1, n) + NUL
167 else:
168 if posix:
Georg Brandle4751e32006-05-18 06:11:19 +0000169 raise ValueError("overflow in number field")
Georg Brandl38c6a222006-05-10 16:26:03 +0000170
171 if n < 0:
172 # XXX We mimic GNU tar's behaviour with negative numbers,
173 # this could raise OverflowError.
174 n = struct.unpack("L", struct.pack("l", n))[0]
175
176 s = ""
177 for i in xrange(digits - 1):
178 s = chr(n & 0377) + s
179 n >>= 8
180 s = chr(0200) + s
181 return s
182
183def calc_chksums(buf):
184 """Calculate the checksum for a member's header by summing up all
185 characters except for the chksum field which is treated as if
186 it was filled with spaces. According to the GNU tar sources,
187 some tars (Sun and NeXT) calculate chksum with signed char,
188 which will be different if there are chars in the buffer with
189 the high bit set. So we calculate two checksums, unsigned and
190 signed.
191 """
192 unsigned_chksum = 256 + sum(struct.unpack("148B", buf[:148]) + struct.unpack("356B", buf[156:512]))
193 signed_chksum = 256 + sum(struct.unpack("148b", buf[:148]) + struct.unpack("356b", buf[156:512]))
194 return unsigned_chksum, signed_chksum
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000195
196def copyfileobj(src, dst, length=None):
197 """Copy length bytes from fileobj src to fileobj dst.
198 If length is None, copy the entire content.
199 """
200 if length == 0:
201 return
202 if length is None:
203 shutil.copyfileobj(src, dst)
204 return
205
206 BUFSIZE = 16 * 1024
207 blocks, remainder = divmod(length, BUFSIZE)
208 for b in xrange(blocks):
209 buf = src.read(BUFSIZE)
210 if len(buf) < BUFSIZE:
Georg Brandle4751e32006-05-18 06:11:19 +0000211 raise IOError("end of file reached")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000212 dst.write(buf)
213
214 if remainder != 0:
215 buf = src.read(remainder)
216 if len(buf) < remainder:
Georg Brandle4751e32006-05-18 06:11:19 +0000217 raise IOError("end of file reached")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000218 dst.write(buf)
219 return
220
221filemode_table = (
Andrew M. Kuchling8bc462f2004-10-20 11:48:42 +0000222 ((S_IFLNK, "l"),
223 (S_IFREG, "-"),
224 (S_IFBLK, "b"),
225 (S_IFDIR, "d"),
226 (S_IFCHR, "c"),
227 (S_IFIFO, "p")),
228
229 ((TUREAD, "r"),),
230 ((TUWRITE, "w"),),
231 ((TUEXEC|TSUID, "s"),
232 (TSUID, "S"),
233 (TUEXEC, "x")),
234
235 ((TGREAD, "r"),),
236 ((TGWRITE, "w"),),
237 ((TGEXEC|TSGID, "s"),
238 (TSGID, "S"),
239 (TGEXEC, "x")),
240
241 ((TOREAD, "r"),),
242 ((TOWRITE, "w"),),
243 ((TOEXEC|TSVTX, "t"),
244 (TSVTX, "T"),
245 (TOEXEC, "x"))
246)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000247
248def filemode(mode):
249 """Convert a file's mode to a string of the form
250 -rwxrwxrwx.
251 Used by TarFile.list()
252 """
Andrew M. Kuchling8bc462f2004-10-20 11:48:42 +0000253 perm = []
254 for table in filemode_table:
255 for bit, char in table:
256 if mode & bit == bit:
257 perm.append(char)
258 break
259 else:
260 perm.append("-")
261 return "".join(perm)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000262
263if os.sep != "/":
264 normpath = lambda path: os.path.normpath(path).replace(os.sep, "/")
265else:
266 normpath = os.path.normpath
267
268class TarError(Exception):
269 """Base exception."""
270 pass
271class ExtractError(TarError):
272 """General exception for extract errors."""
273 pass
274class ReadError(TarError):
275 """Exception for unreadble tar archives."""
276 pass
277class CompressionError(TarError):
278 """Exception for unavailable compression methods."""
279 pass
280class StreamError(TarError):
281 """Exception for unsupported operations on stream-like TarFiles."""
282 pass
283
284#---------------------------
285# internal stream interface
286#---------------------------
287class _LowLevelFile:
288 """Low-level file object. Supports reading and writing.
289 It is used instead of a regular file object for streaming
290 access.
291 """
292
293 def __init__(self, name, mode):
294 mode = {
295 "r": os.O_RDONLY,
296 "w": os.O_WRONLY | os.O_CREAT | os.O_TRUNC,
297 }[mode]
298 if hasattr(os, "O_BINARY"):
299 mode |= os.O_BINARY
300 self.fd = os.open(name, mode)
301
302 def close(self):
303 os.close(self.fd)
304
305 def read(self, size):
306 return os.read(self.fd, size)
307
308 def write(self, s):
309 os.write(self.fd, s)
310
311class _Stream:
312 """Class that serves as an adapter between TarFile and
313 a stream-like object. The stream-like object only
314 needs to have a read() or write() method and is accessed
315 blockwise. Use of gzip or bzip2 compression is possible.
316 A stream-like object could be for example: sys.stdin,
317 sys.stdout, a socket, a tape device etc.
318
319 _Stream is intended to be used only internally.
320 """
321
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000322 def __init__(self, name, mode, comptype, fileobj, bufsize):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000323 """Construct a _Stream object.
324 """
325 self._extfileobj = True
326 if fileobj is None:
327 fileobj = _LowLevelFile(name, mode)
328 self._extfileobj = False
329
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000330 if comptype == '*':
331 # Enable transparent compression detection for the
332 # stream interface
333 fileobj = _StreamProxy(fileobj)
334 comptype = fileobj.getcomptype()
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000335
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000336 self.name = name or ""
337 self.mode = mode
338 self.comptype = comptype
339 self.fileobj = fileobj
340 self.bufsize = bufsize
341 self.buf = ""
342 self.pos = 0L
343 self.closed = False
344
345 if comptype == "gz":
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000346 try:
347 import zlib
348 except ImportError:
Georg Brandle4751e32006-05-18 06:11:19 +0000349 raise CompressionError("zlib module is not available")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000350 self.zlib = zlib
351 self.crc = zlib.crc32("")
352 if mode == "r":
353 self._init_read_gz()
354 else:
355 self._init_write_gz()
356
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000357 if comptype == "bz2":
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000358 try:
359 import bz2
360 except ImportError:
Georg Brandle4751e32006-05-18 06:11:19 +0000361 raise CompressionError("bz2 module is not available")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000362 if mode == "r":
363 self.dbuf = ""
364 self.cmp = bz2.BZ2Decompressor()
365 else:
366 self.cmp = bz2.BZ2Compressor()
367
368 def __del__(self):
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000369 if hasattr(self, "closed") and not self.closed:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000370 self.close()
371
372 def _init_write_gz(self):
373 """Initialize for writing with gzip compression.
374 """
375 self.cmp = self.zlib.compressobj(9, self.zlib.DEFLATED,
376 -self.zlib.MAX_WBITS,
377 self.zlib.DEF_MEM_LEVEL,
378 0)
379 timestamp = struct.pack("<L", long(time.time()))
380 self.__write("\037\213\010\010%s\002\377" % timestamp)
381 if self.name.endswith(".gz"):
382 self.name = self.name[:-3]
383 self.__write(self.name + NUL)
384
385 def write(self, s):
386 """Write string s to the stream.
387 """
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000388 if self.comptype == "gz":
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000389 self.crc = self.zlib.crc32(s, self.crc)
390 self.pos += len(s)
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000391 if self.comptype != "tar":
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000392 s = self.cmp.compress(s)
393 self.__write(s)
394
395 def __write(self, s):
396 """Write string s to the stream if a whole new block
397 is ready to be written.
398 """
399 self.buf += s
400 while len(self.buf) > self.bufsize:
401 self.fileobj.write(self.buf[:self.bufsize])
402 self.buf = self.buf[self.bufsize:]
403
404 def close(self):
405 """Close the _Stream object. No operation should be
406 done on it afterwards.
407 """
408 if self.closed:
409 return
410
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000411 if self.mode == "w" and self.comptype != "tar":
Martin v. Löwisc234a522004-08-22 21:28:33 +0000412 self.buf += self.cmp.flush()
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000413
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000414 if self.mode == "w" and self.buf:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000415 self.fileobj.write(self.buf)
416 self.buf = ""
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000417 if self.comptype == "gz":
Tim Petersa05f6e22006-08-02 05:20:08 +0000418 # The native zlib crc is an unsigned 32-bit integer, but
419 # the Python wrapper implicitly casts that to a signed C
420 # long. So, on a 32-bit box self.crc may "look negative",
421 # while the same crc on a 64-bit box may "look positive".
422 # To avoid irksome warnings from the `struct` module, force
423 # it to look positive on all boxes.
424 self.fileobj.write(struct.pack("<L", self.crc & 0xffffffffL))
Andrew M. Kuchling10a44492003-10-24 17:38:34 +0000425 self.fileobj.write(struct.pack("<L", self.pos & 0xffffFFFFL))
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000426
427 if not self._extfileobj:
428 self.fileobj.close()
429
430 self.closed = True
431
432 def _init_read_gz(self):
433 """Initialize for reading a gzip compressed fileobj.
434 """
435 self.cmp = self.zlib.decompressobj(-self.zlib.MAX_WBITS)
436 self.dbuf = ""
437
438 # taken from gzip.GzipFile with some alterations
439 if self.__read(2) != "\037\213":
Georg Brandle4751e32006-05-18 06:11:19 +0000440 raise ReadError("not a gzip file")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000441 if self.__read(1) != "\010":
Georg Brandle4751e32006-05-18 06:11:19 +0000442 raise CompressionError("unsupported compression method")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000443
444 flag = ord(self.__read(1))
445 self.__read(6)
446
447 if flag & 4:
448 xlen = ord(self.__read(1)) + 256 * ord(self.__read(1))
449 self.read(xlen)
450 if flag & 8:
451 while True:
452 s = self.__read(1)
453 if not s or s == NUL:
454 break
455 if flag & 16:
456 while True:
457 s = self.__read(1)
458 if not s or s == NUL:
459 break
460 if flag & 2:
461 self.__read(2)
462
463 def tell(self):
464 """Return the stream's file pointer position.
465 """
466 return self.pos
467
468 def seek(self, pos=0):
469 """Set the stream's file pointer to pos. Negative seeking
470 is forbidden.
471 """
472 if pos - self.pos >= 0:
473 blocks, remainder = divmod(pos - self.pos, self.bufsize)
474 for i in xrange(blocks):
475 self.read(self.bufsize)
476 self.read(remainder)
477 else:
Georg Brandle4751e32006-05-18 06:11:19 +0000478 raise StreamError("seeking backwards is not allowed")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000479 return self.pos
480
481 def read(self, size=None):
482 """Return the next size number of bytes from the stream.
483 If size is not defined, return all bytes of the stream
484 up to EOF.
485 """
486 if size is None:
487 t = []
488 while True:
489 buf = self._read(self.bufsize)
490 if not buf:
491 break
492 t.append(buf)
493 buf = "".join(t)
494 else:
495 buf = self._read(size)
496 self.pos += len(buf)
497 return buf
498
499 def _read(self, size):
500 """Return size bytes from the stream.
501 """
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000502 if self.comptype == "tar":
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000503 return self.__read(size)
504
505 c = len(self.dbuf)
506 t = [self.dbuf]
507 while c < size:
508 buf = self.__read(self.bufsize)
509 if not buf:
510 break
511 buf = self.cmp.decompress(buf)
512 t.append(buf)
513 c += len(buf)
514 t = "".join(t)
515 self.dbuf = t[size:]
516 return t[:size]
517
518 def __read(self, size):
519 """Return size bytes from stream. If internal buffer is empty,
520 read another block from the stream.
521 """
522 c = len(self.buf)
523 t = [self.buf]
524 while c < size:
525 buf = self.fileobj.read(self.bufsize)
526 if not buf:
527 break
528 t.append(buf)
529 c += len(buf)
530 t = "".join(t)
531 self.buf = t[size:]
532 return t[:size]
533# class _Stream
534
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000535class _StreamProxy(object):
536 """Small proxy class that enables transparent compression
537 detection for the Stream interface (mode 'r|*').
538 """
539
540 def __init__(self, fileobj):
541 self.fileobj = fileobj
542 self.buf = self.fileobj.read(BLOCKSIZE)
543
544 def read(self, size):
545 self.read = self.fileobj.read
546 return self.buf
547
548 def getcomptype(self):
549 if self.buf.startswith("\037\213\010"):
550 return "gz"
551 if self.buf.startswith("BZh91"):
552 return "bz2"
553 return "tar"
554
555 def close(self):
556 self.fileobj.close()
557# class StreamProxy
558
Georg Brandl49c8f4c2006-05-15 19:30:35 +0000559class _BZ2Proxy(object):
560 """Small proxy class that enables external file object
561 support for "r:bz2" and "w:bz2" modes. This is actually
562 a workaround for a limitation in bz2 module's BZ2File
563 class which (unlike gzip.GzipFile) has no support for
564 a file object argument.
565 """
566
567 blocksize = 16 * 1024
568
569 def __init__(self, fileobj, mode):
570 self.fileobj = fileobj
571 self.mode = mode
572 self.init()
573
574 def init(self):
575 import bz2
576 self.pos = 0
577 if self.mode == "r":
578 self.bz2obj = bz2.BZ2Decompressor()
579 self.fileobj.seek(0)
580 self.buf = ""
581 else:
582 self.bz2obj = bz2.BZ2Compressor()
583
584 def read(self, size):
585 b = [self.buf]
586 x = len(self.buf)
587 while x < size:
588 try:
589 raw = self.fileobj.read(self.blocksize)
590 data = self.bz2obj.decompress(raw)
591 b.append(data)
592 except EOFError:
593 break
594 x += len(data)
595 self.buf = "".join(b)
596
597 buf = self.buf[:size]
598 self.buf = self.buf[size:]
599 self.pos += len(buf)
600 return buf
601
602 def seek(self, pos):
603 if pos < self.pos:
604 self.init()
605 self.read(pos - self.pos)
606
607 def tell(self):
608 return self.pos
609
610 def write(self, data):
611 self.pos += len(data)
612 raw = self.bz2obj.compress(data)
613 self.fileobj.write(raw)
614
615 def close(self):
616 if self.mode == "w":
617 raw = self.bz2obj.flush()
618 self.fileobj.write(raw)
Georg Brandle8953182006-05-27 14:02:03 +0000619 self.fileobj.close()
Georg Brandl49c8f4c2006-05-15 19:30:35 +0000620# class _BZ2Proxy
621
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000622#------------------------
623# Extraction file object
624#------------------------
625class ExFileObject(object):
626 """File-like object for reading an archive member.
627 Is returned by TarFile.extractfile(). Support for
628 sparse files included.
629 """
630
631 def __init__(self, tarfile, tarinfo):
632 self.fileobj = tarfile.fileobj
633 self.name = tarinfo.name
634 self.mode = "r"
635 self.closed = False
636 self.offset = tarinfo.offset_data
637 self.size = tarinfo.size
638 self.pos = 0L
639 self.linebuffer = ""
640 if tarinfo.issparse():
641 self.sparse = tarinfo.sparse
642 self.read = self._readsparse
643 else:
644 self.read = self._readnormal
645
646 def __read(self, size):
647 """Overloadable read method.
648 """
649 return self.fileobj.read(size)
650
651 def readline(self, size=-1):
652 """Read a line with approx. size. If size is negative,
653 read a whole line. readline() and read() must not
654 be mixed up (!).
655 """
656 if size < 0:
657 size = sys.maxint
658
659 nl = self.linebuffer.find("\n")
660 if nl >= 0:
661 nl = min(nl, size)
662 else:
663 size -= len(self.linebuffer)
Martin v. Löwisc11d6f12004-08-25 10:52:58 +0000664 while (nl < 0 and size > 0):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000665 buf = self.read(min(size, 100))
666 if not buf:
667 break
668 self.linebuffer += buf
669 size -= len(buf)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000670 nl = self.linebuffer.find("\n")
671 if nl == -1:
672 s = self.linebuffer
673 self.linebuffer = ""
674 return s
675 buf = self.linebuffer[:nl]
676 self.linebuffer = self.linebuffer[nl + 1:]
677 while buf[-1:] == "\r":
678 buf = buf[:-1]
679 return buf + "\n"
680
681 def readlines(self):
682 """Return a list with all (following) lines.
683 """
684 result = []
685 while True:
686 line = self.readline()
687 if not line: break
688 result.append(line)
689 return result
690
691 def _readnormal(self, size=None):
692 """Read operation for regular files.
693 """
694 if self.closed:
Georg Brandle4751e32006-05-18 06:11:19 +0000695 raise ValueError("file is closed")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000696 self.fileobj.seek(self.offset + self.pos)
697 bytesleft = self.size - self.pos
698 if size is None:
699 bytestoread = bytesleft
700 else:
701 bytestoread = min(size, bytesleft)
702 self.pos += bytestoread
703 return self.__read(bytestoread)
704
705 def _readsparse(self, size=None):
706 """Read operation for sparse files.
707 """
708 if self.closed:
Georg Brandle4751e32006-05-18 06:11:19 +0000709 raise ValueError("file is closed")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000710
711 if size is None:
712 size = self.size - self.pos
713
714 data = []
715 while size > 0:
716 buf = self._readsparsesection(size)
717 if not buf:
718 break
719 size -= len(buf)
720 data.append(buf)
721 return "".join(data)
722
723 def _readsparsesection(self, size):
724 """Read a single section of a sparse file.
725 """
726 section = self.sparse.find(self.pos)
727
728 if section is None:
729 return ""
730
731 toread = min(size, section.offset + section.size - self.pos)
732 if isinstance(section, _data):
733 realpos = section.realpos + self.pos - section.offset
734 self.pos += toread
735 self.fileobj.seek(self.offset + realpos)
736 return self.__read(toread)
737 else:
738 self.pos += toread
739 return NUL * toread
740
741 def tell(self):
742 """Return the current file position.
743 """
744 return self.pos
745
746 def seek(self, pos, whence=0):
747 """Seek to a position in the file.
748 """
749 self.linebuffer = ""
750 if whence == 0:
751 self.pos = min(max(pos, 0), self.size)
752 if whence == 1:
753 if pos < 0:
754 self.pos = max(self.pos + pos, 0)
755 else:
756 self.pos = min(self.pos + pos, self.size)
757 if whence == 2:
758 self.pos = max(min(self.size + pos, self.size), 0)
759
760 def close(self):
761 """Close the file object.
762 """
763 self.closed = True
Martin v. Löwisdf241532005-03-03 08:17:42 +0000764
765 def __iter__(self):
766 """Get an iterator over the file object.
767 """
768 if self.closed:
Georg Brandle4751e32006-05-18 06:11:19 +0000769 raise ValueError("I/O operation on closed file")
Martin v. Löwisdf241532005-03-03 08:17:42 +0000770 return self
771
772 def next(self):
773 """Get the next item from the file iterator.
774 """
775 result = self.readline()
776 if not result:
777 raise StopIteration
778 return result
Tim Peterseba28be2005-03-28 01:08:02 +0000779
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000780#class ExFileObject
781
782#------------------
783# Exported Classes
784#------------------
785class TarInfo(object):
786 """Informational class which holds the details about an
787 archive member given by a tar header block.
788 TarInfo objects are returned by TarFile.getmember(),
789 TarFile.getmembers() and TarFile.gettarinfo() and are
790 usually created internally.
791 """
792
793 def __init__(self, name=""):
794 """Construct a TarInfo object. name is the optional name
795 of the member.
796 """
Georg Brandl38c6a222006-05-10 16:26:03 +0000797 self.name = name # member name (dirnames must end with '/')
798 self.mode = 0666 # file permissions
799 self.uid = 0 # user id
800 self.gid = 0 # group id
801 self.size = 0 # file size
802 self.mtime = 0 # modification time
803 self.chksum = 0 # header checksum
804 self.type = REGTYPE # member type
805 self.linkname = "" # link name
806 self.uname = "user" # user name
807 self.gname = "group" # group name
808 self.devmajor = 0 # device major number
809 self.devminor = 0 # device minor number
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000810
Georg Brandl38c6a222006-05-10 16:26:03 +0000811 self.offset = 0 # the tar header starts here
812 self.offset_data = 0 # the file's data starts here
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000813
814 def __repr__(self):
815 return "<%s %r at %#x>" % (self.__class__.__name__,self.name,id(self))
816
Guido van Rossum75b64e62005-01-16 00:16:11 +0000817 @classmethod
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000818 def frombuf(cls, buf):
819 """Construct a TarInfo object from a 512 byte string buffer.
820 """
Georg Brandl38c6a222006-05-10 16:26:03 +0000821 if len(buf) != BLOCKSIZE:
Georg Brandle4751e32006-05-18 06:11:19 +0000822 raise ValueError("truncated header")
Georg Brandl38c6a222006-05-10 16:26:03 +0000823 if buf.count(NUL) == BLOCKSIZE:
Georg Brandle4751e32006-05-18 06:11:19 +0000824 raise ValueError("empty header")
Georg Brandl38c6a222006-05-10 16:26:03 +0000825
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000826 tarinfo = cls()
Georg Brandl38c6a222006-05-10 16:26:03 +0000827 tarinfo.buf = buf
Georg Brandle8953182006-05-27 14:02:03 +0000828 tarinfo.name = buf[0:100].rstrip(NUL)
Georg Brandl38c6a222006-05-10 16:26:03 +0000829 tarinfo.mode = nti(buf[100:108])
830 tarinfo.uid = nti(buf[108:116])
831 tarinfo.gid = nti(buf[116:124])
832 tarinfo.size = nti(buf[124:136])
833 tarinfo.mtime = nti(buf[136:148])
834 tarinfo.chksum = nti(buf[148:156])
835 tarinfo.type = buf[156:157]
Georg Brandle8953182006-05-27 14:02:03 +0000836 tarinfo.linkname = buf[157:257].rstrip(NUL)
837 tarinfo.uname = buf[265:297].rstrip(NUL)
838 tarinfo.gname = buf[297:329].rstrip(NUL)
Georg Brandl38c6a222006-05-10 16:26:03 +0000839 tarinfo.devmajor = nti(buf[329:337])
840 tarinfo.devminor = nti(buf[337:345])
Georg Brandl3354f282006-10-29 09:16:12 +0000841 prefix = buf[345:500].rstrip(NUL)
842
843 if prefix and not tarinfo.issparse():
844 tarinfo.name = prefix + "/" + tarinfo.name
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000845
Georg Brandl38c6a222006-05-10 16:26:03 +0000846 if tarinfo.chksum not in calc_chksums(buf):
Georg Brandle4751e32006-05-18 06:11:19 +0000847 raise ValueError("invalid header")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000848 return tarinfo
849
Georg Brandl38c6a222006-05-10 16:26:03 +0000850 def tobuf(self, posix=False):
Georg Brandl3354f282006-10-29 09:16:12 +0000851 """Return a tar header as a string of 512 byte blocks.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000852 """
Georg Brandl3354f282006-10-29 09:16:12 +0000853 buf = ""
854 type = self.type
855 prefix = ""
856
857 if self.name.endswith("/"):
858 type = DIRTYPE
859
860 name = normpath(self.name)
861
862 if type == DIRTYPE:
863 # directories should end with '/'
864 name += "/"
865
866 linkname = self.linkname
867 if linkname:
868 # if linkname is empty we end up with a '.'
869 linkname = normpath(linkname)
870
871 if posix:
872 if self.size > MAXSIZE_MEMBER:
873 raise ValueError("file is too large (>= 8 GB)")
874
875 if len(self.linkname) > LENGTH_LINK:
876 raise ValueError("linkname is too long (>%d)" % (LENGTH_LINK))
877
878 if len(name) > LENGTH_NAME:
879 prefix = name[:LENGTH_PREFIX + 1]
880 while prefix and prefix[-1] != "/":
881 prefix = prefix[:-1]
882
883 name = name[len(prefix):]
884 prefix = prefix[:-1]
885
886 if not prefix or len(name) > LENGTH_NAME:
887 raise ValueError("name is too long")
888
889 else:
890 if len(self.linkname) > LENGTH_LINK:
891 buf += self._create_gnulong(self.linkname, GNUTYPE_LONGLINK)
892
893 if len(name) > LENGTH_NAME:
894 buf += self._create_gnulong(name, GNUTYPE_LONGNAME)
895
Georg Brandl38c6a222006-05-10 16:26:03 +0000896 parts = [
Georg Brandl3354f282006-10-29 09:16:12 +0000897 stn(name, 100),
Georg Brandl38c6a222006-05-10 16:26:03 +0000898 itn(self.mode & 07777, 8, posix),
899 itn(self.uid, 8, posix),
900 itn(self.gid, 8, posix),
901 itn(self.size, 12, posix),
902 itn(self.mtime, 12, posix),
903 " ", # checksum field
Georg Brandl3354f282006-10-29 09:16:12 +0000904 type,
Georg Brandl38c6a222006-05-10 16:26:03 +0000905 stn(self.linkname, 100),
906 stn(MAGIC, 6),
907 stn(VERSION, 2),
908 stn(self.uname, 32),
909 stn(self.gname, 32),
910 itn(self.devmajor, 8, posix),
911 itn(self.devminor, 8, posix),
Georg Brandl3354f282006-10-29 09:16:12 +0000912 stn(prefix, 155)
Georg Brandl38c6a222006-05-10 16:26:03 +0000913 ]
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000914
Georg Brandl3354f282006-10-29 09:16:12 +0000915 buf += struct.pack("%ds" % BLOCKSIZE, "".join(parts))
Georg Brandl38c6a222006-05-10 16:26:03 +0000916 chksum = calc_chksums(buf)[0]
Georg Brandl3354f282006-10-29 09:16:12 +0000917 buf = buf[:-364] + "%06o\0" % chksum + buf[-357:]
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000918 self.buf = buf
919 return buf
920
Georg Brandl3354f282006-10-29 09:16:12 +0000921 def _create_gnulong(self, name, type):
922 """Create a GNU longname/longlink header from name.
923 It consists of an extended tar header, with the length
924 of the longname as size, followed by data blocks,
925 which contain the longname as a null terminated string.
926 """
927 name += NUL
928
929 tarinfo = self.__class__()
930 tarinfo.name = "././@LongLink"
931 tarinfo.type = type
932 tarinfo.mode = 0
933 tarinfo.size = len(name)
934
935 # create extended header
936 buf = tarinfo.tobuf()
937 # create name blocks
938 buf += name
939 blocks, remainder = divmod(len(name), BLOCKSIZE)
940 if remainder > 0:
941 buf += (BLOCKSIZE - remainder) * NUL
942 return buf
943
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000944 def isreg(self):
945 return self.type in REGULAR_TYPES
946 def isfile(self):
947 return self.isreg()
948 def isdir(self):
949 return self.type == DIRTYPE
950 def issym(self):
951 return self.type == SYMTYPE
952 def islnk(self):
953 return self.type == LNKTYPE
954 def ischr(self):
955 return self.type == CHRTYPE
956 def isblk(self):
957 return self.type == BLKTYPE
958 def isfifo(self):
959 return self.type == FIFOTYPE
960 def issparse(self):
961 return self.type == GNUTYPE_SPARSE
962 def isdev(self):
963 return self.type in (CHRTYPE, BLKTYPE, FIFOTYPE)
964# class TarInfo
965
966class TarFile(object):
967 """The TarFile Class provides an interface to tar archives.
968 """
969
970 debug = 0 # May be set from 0 (no msgs) to 3 (all msgs)
971
972 dereference = False # If true, add content of linked file to the
973 # tar file, else the link.
974
975 ignore_zeros = False # If true, skips empty or invalid blocks and
976 # continues processing.
977
978 errorlevel = 0 # If 0, fatal errors only appear in debug
979 # messages (if debug >= 0). If > 0, errors
980 # are passed to the caller as exceptions.
981
Martin v. Löwis75b9da42004-08-18 13:57:44 +0000982 posix = False # If True, generates POSIX.1-1990-compliant
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000983 # archives (no GNU extensions!)
984
985 fileobject = ExFileObject
986
987 def __init__(self, name=None, mode="r", fileobj=None):
988 """Open an (uncompressed) tar archive `name'. `mode' is either 'r' to
989 read from an existing archive, 'a' to append data to an existing
990 file or 'w' to create a new file overwriting an existing one. `mode'
991 defaults to 'r'.
992 If `fileobj' is given, it is used for reading or writing data. If it
993 can be determined, `mode' is overridden by `fileobj's mode.
994 `fileobj' is not closed, when TarFile is closed.
995 """
Martin v. Löwisfaffa152005-08-24 06:43:09 +0000996 self.name = name
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000997
998 if len(mode) > 1 or mode not in "raw":
Georg Brandle4751e32006-05-18 06:11:19 +0000999 raise ValueError("mode must be 'r', 'a' or 'w'")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001000 self._mode = mode
1001 self.mode = {"r": "rb", "a": "r+b", "w": "wb"}[mode]
1002
1003 if not fileobj:
1004 fileobj = file(self.name, self.mode)
1005 self._extfileobj = False
1006 else:
1007 if self.name is None and hasattr(fileobj, "name"):
Martin v. Löwisfaffa152005-08-24 06:43:09 +00001008 self.name = fileobj.name
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001009 if hasattr(fileobj, "mode"):
1010 self.mode = fileobj.mode
1011 self._extfileobj = True
1012 self.fileobj = fileobj
1013
1014 # Init datastructures
Georg Brandl38c6a222006-05-10 16:26:03 +00001015 self.closed = False
1016 self.members = [] # list of members as TarInfo objects
1017 self._loaded = False # flag if all members have been read
1018 self.offset = 0L # current position in the archive file
1019 self.inodes = {} # dictionary caching the inodes of
1020 # archive members already added
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001021
1022 if self._mode == "r":
1023 self.firstmember = None
1024 self.firstmember = self.next()
1025
1026 if self._mode == "a":
1027 # Move to the end of the archive,
1028 # before the first empty block.
1029 self.firstmember = None
1030 while True:
1031 try:
1032 tarinfo = self.next()
1033 except ReadError:
1034 self.fileobj.seek(0)
1035 break
1036 if tarinfo is None:
1037 self.fileobj.seek(- BLOCKSIZE, 1)
1038 break
1039
1040 if self._mode in "aw":
1041 self._loaded = True
1042
1043 #--------------------------------------------------------------------------
1044 # Below are the classmethods which act as alternate constructors to the
1045 # TarFile class. The open() method is the only one that is needed for
1046 # public use; it is the "super"-constructor and is able to select an
1047 # adequate "sub"-constructor for a particular compression using the mapping
1048 # from OPEN_METH.
1049 #
1050 # This concept allows one to subclass TarFile without losing the comfort of
1051 # the super-constructor. A sub-constructor is registered and made available
1052 # by adding it to the mapping in OPEN_METH.
1053
Guido van Rossum75b64e62005-01-16 00:16:11 +00001054 @classmethod
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001055 def open(cls, name=None, mode="r", fileobj=None, bufsize=20*512):
1056 """Open a tar archive for reading, writing or appending. Return
1057 an appropriate TarFile class.
1058
1059 mode:
Martin v. Löwis78be7df2005-03-05 12:47:42 +00001060 'r' or 'r:*' open for reading with transparent compression
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001061 'r:' open for reading exclusively uncompressed
1062 'r:gz' open for reading with gzip compression
1063 'r:bz2' open for reading with bzip2 compression
1064 'a' or 'a:' open for appending
1065 'w' or 'w:' open for writing without compression
1066 'w:gz' open for writing with gzip compression
1067 'w:bz2' open for writing with bzip2 compression
Martin v. Löwis78be7df2005-03-05 12:47:42 +00001068
1069 'r|*' open a stream of tar blocks with transparent compression
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001070 'r|' open an uncompressed stream of tar blocks for reading
1071 'r|gz' open a gzip compressed stream of tar blocks
1072 'r|bz2' open a bzip2 compressed stream of tar blocks
1073 'w|' open an uncompressed stream for writing
1074 'w|gz' open a gzip compressed stream for writing
1075 'w|bz2' open a bzip2 compressed stream for writing
1076 """
1077
1078 if not name and not fileobj:
Georg Brandle4751e32006-05-18 06:11:19 +00001079 raise ValueError("nothing to open")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001080
Martin v. Löwis78be7df2005-03-05 12:47:42 +00001081 if mode in ("r", "r:*"):
1082 # Find out which *open() is appropriate for opening the file.
1083 for comptype in cls.OPEN_METH:
1084 func = getattr(cls, cls.OPEN_METH[comptype])
1085 try:
1086 return func(name, "r", fileobj)
1087 except (ReadError, CompressionError):
1088 continue
Georg Brandle4751e32006-05-18 06:11:19 +00001089 raise ReadError("file could not be opened successfully")
Martin v. Löwis78be7df2005-03-05 12:47:42 +00001090
1091 elif ":" in mode:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001092 filemode, comptype = mode.split(":", 1)
1093 filemode = filemode or "r"
1094 comptype = comptype or "tar"
1095
1096 # Select the *open() function according to
1097 # given compression.
1098 if comptype in cls.OPEN_METH:
1099 func = getattr(cls, cls.OPEN_METH[comptype])
1100 else:
Georg Brandle4751e32006-05-18 06:11:19 +00001101 raise CompressionError("unknown compression type %r" % comptype)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001102 return func(name, filemode, fileobj)
1103
1104 elif "|" in mode:
1105 filemode, comptype = mode.split("|", 1)
1106 filemode = filemode or "r"
1107 comptype = comptype or "tar"
1108
1109 if filemode not in "rw":
Georg Brandle4751e32006-05-18 06:11:19 +00001110 raise ValueError("mode must be 'r' or 'w'")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001111
1112 t = cls(name, filemode,
1113 _Stream(name, filemode, comptype, fileobj, bufsize))
1114 t._extfileobj = False
1115 return t
1116
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001117 elif mode in "aw":
1118 return cls.taropen(name, mode, fileobj)
1119
Georg Brandle4751e32006-05-18 06:11:19 +00001120 raise ValueError("undiscernible mode")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001121
Guido van Rossum75b64e62005-01-16 00:16:11 +00001122 @classmethod
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001123 def taropen(cls, name, mode="r", fileobj=None):
1124 """Open uncompressed tar archive name for reading or writing.
1125 """
1126 if len(mode) > 1 or mode not in "raw":
Georg Brandle4751e32006-05-18 06:11:19 +00001127 raise ValueError("mode must be 'r', 'a' or 'w'")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001128 return cls(name, mode, fileobj)
1129
Guido van Rossum75b64e62005-01-16 00:16:11 +00001130 @classmethod
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001131 def gzopen(cls, name, mode="r", fileobj=None, compresslevel=9):
1132 """Open gzip compressed tar archive name for reading or writing.
1133 Appending is not allowed.
1134 """
1135 if len(mode) > 1 or mode not in "rw":
Georg Brandle4751e32006-05-18 06:11:19 +00001136 raise ValueError("mode must be 'r' or 'w'")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001137
1138 try:
1139 import gzip
Neal Norwitz4ec68242003-04-11 03:05:56 +00001140 gzip.GzipFile
1141 except (ImportError, AttributeError):
Georg Brandle4751e32006-05-18 06:11:19 +00001142 raise CompressionError("gzip module is not available")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001143
1144 pre, ext = os.path.splitext(name)
Martin v. Löwisfaffa152005-08-24 06:43:09 +00001145 pre = os.path.basename(pre)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001146 if ext == ".tgz":
1147 ext = ".tar"
1148 if ext == ".gz":
1149 ext = ""
Martin v. Löwisfaffa152005-08-24 06:43:09 +00001150 tarname = pre + ext
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001151
1152 if fileobj is None:
1153 fileobj = file(name, mode + "b")
1154
Martin v. Löwisfaffa152005-08-24 06:43:09 +00001155 if mode != "r":
1156 name = tarname
1157
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001158 try:
Martin v. Löwisfaffa152005-08-24 06:43:09 +00001159 t = cls.taropen(tarname, mode,
1160 gzip.GzipFile(name, mode, compresslevel, fileobj)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001161 )
1162 except IOError:
Georg Brandle4751e32006-05-18 06:11:19 +00001163 raise ReadError("not a gzip file")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001164 t._extfileobj = False
1165 return t
1166
Guido van Rossum75b64e62005-01-16 00:16:11 +00001167 @classmethod
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001168 def bz2open(cls, name, mode="r", fileobj=None, compresslevel=9):
1169 """Open bzip2 compressed tar archive name for reading or writing.
1170 Appending is not allowed.
1171 """
1172 if len(mode) > 1 or mode not in "rw":
Georg Brandle4751e32006-05-18 06:11:19 +00001173 raise ValueError("mode must be 'r' or 'w'.")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001174
1175 try:
1176 import bz2
1177 except ImportError:
Georg Brandle4751e32006-05-18 06:11:19 +00001178 raise CompressionError("bz2 module is not available")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001179
Martin v. Löwisfaffa152005-08-24 06:43:09 +00001180 pre, ext = os.path.splitext(name)
1181 pre = os.path.basename(pre)
1182 if ext == ".tbz2":
1183 ext = ".tar"
1184 if ext == ".bz2":
1185 ext = ""
1186 tarname = pre + ext
1187
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001188 if fileobj is not None:
Georg Brandl49c8f4c2006-05-15 19:30:35 +00001189 fileobj = _BZ2Proxy(fileobj, mode)
1190 else:
1191 fileobj = bz2.BZ2File(name, mode, compresslevel=compresslevel)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001192
1193 try:
Georg Brandl49c8f4c2006-05-15 19:30:35 +00001194 t = cls.taropen(tarname, mode, fileobj)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001195 except IOError:
Georg Brandle4751e32006-05-18 06:11:19 +00001196 raise ReadError("not a bzip2 file")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001197 t._extfileobj = False
1198 return t
1199
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001200 # All *open() methods are registered here.
1201 OPEN_METH = {
1202 "tar": "taropen", # uncompressed tar
1203 "gz": "gzopen", # gzip compressed tar
1204 "bz2": "bz2open" # bzip2 compressed tar
1205 }
1206
1207 #--------------------------------------------------------------------------
1208 # The public methods which TarFile provides:
1209
1210 def close(self):
1211 """Close the TarFile. In write-mode, two finishing zero blocks are
1212 appended to the archive.
1213 """
1214 if self.closed:
1215 return
1216
1217 if self._mode in "aw":
1218 self.fileobj.write(NUL * (BLOCKSIZE * 2))
1219 self.offset += (BLOCKSIZE * 2)
1220 # fill up the end with zero-blocks
1221 # (like option -b20 for tar does)
1222 blocks, remainder = divmod(self.offset, RECORDSIZE)
1223 if remainder > 0:
1224 self.fileobj.write(NUL * (RECORDSIZE - remainder))
1225
1226 if not self._extfileobj:
1227 self.fileobj.close()
1228 self.closed = True
1229
1230 def getmember(self, name):
1231 """Return a TarInfo object for member `name'. If `name' can not be
1232 found in the archive, KeyError is raised. If a member occurs more
1233 than once in the archive, its last occurence is assumed to be the
1234 most up-to-date version.
1235 """
Martin v. Löwisf3c56112004-09-18 09:08:52 +00001236 tarinfo = self._getmember(name)
1237 if tarinfo is None:
Georg Brandle4751e32006-05-18 06:11:19 +00001238 raise KeyError("filename %r not found" % name)
Martin v. Löwisf3c56112004-09-18 09:08:52 +00001239 return tarinfo
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001240
1241 def getmembers(self):
1242 """Return the members of the archive as a list of TarInfo objects. The
1243 list has the same order as the members in the archive.
1244 """
1245 self._check()
1246 if not self._loaded: # if we want to obtain a list of
1247 self._load() # all members, we first have to
1248 # scan the whole archive.
1249 return self.members
1250
1251 def getnames(self):
1252 """Return the members of the archive as a list of their names. It has
1253 the same order as the list returned by getmembers().
1254 """
Martin v. Löwisf3c56112004-09-18 09:08:52 +00001255 return [tarinfo.name for tarinfo in self.getmembers()]
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001256
1257 def gettarinfo(self, name=None, arcname=None, fileobj=None):
1258 """Create a TarInfo object for either the file `name' or the file
1259 object `fileobj' (using os.fstat on its file descriptor). You can
1260 modify some of the TarInfo's attributes before you add it using
1261 addfile(). If given, `arcname' specifies an alternative name for the
1262 file in the archive.
1263 """
1264 self._check("aw")
1265
1266 # When fileobj is given, replace name by
1267 # fileobj's real name.
1268 if fileobj is not None:
1269 name = fileobj.name
1270
1271 # Building the name of the member in the archive.
1272 # Backward slashes are converted to forward slashes,
1273 # Absolute paths are turned to relative paths.
1274 if arcname is None:
1275 arcname = name
1276 arcname = normpath(arcname)
1277 drv, arcname = os.path.splitdrive(arcname)
1278 while arcname[0:1] == "/":
1279 arcname = arcname[1:]
1280
1281 # Now, fill the TarInfo object with
1282 # information specific for the file.
1283 tarinfo = TarInfo()
1284
1285 # Use os.stat or os.lstat, depending on platform
1286 # and if symlinks shall be resolved.
1287 if fileobj is None:
1288 if hasattr(os, "lstat") and not self.dereference:
1289 statres = os.lstat(name)
1290 else:
1291 statres = os.stat(name)
1292 else:
1293 statres = os.fstat(fileobj.fileno())
1294 linkname = ""
1295
1296 stmd = statres.st_mode
1297 if stat.S_ISREG(stmd):
1298 inode = (statres.st_ino, statres.st_dev)
Neal Norwitzb0e32e22005-10-20 04:50:13 +00001299 if not self.dereference and \
1300 statres.st_nlink > 1 and inode in self.inodes:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001301 # Is it a hardlink to an already
1302 # archived file?
1303 type = LNKTYPE
1304 linkname = self.inodes[inode]
1305 else:
1306 # The inode is added only if its valid.
1307 # For win32 it is always 0.
1308 type = REGTYPE
1309 if inode[0]:
1310 self.inodes[inode] = arcname
1311 elif stat.S_ISDIR(stmd):
1312 type = DIRTYPE
1313 if arcname[-1:] != "/":
1314 arcname += "/"
1315 elif stat.S_ISFIFO(stmd):
1316 type = FIFOTYPE
1317 elif stat.S_ISLNK(stmd):
1318 type = SYMTYPE
1319 linkname = os.readlink(name)
1320 elif stat.S_ISCHR(stmd):
1321 type = CHRTYPE
1322 elif stat.S_ISBLK(stmd):
1323 type = BLKTYPE
1324 else:
1325 return None
1326
1327 # Fill the TarInfo object with all
1328 # information we can get.
Martin v. Löwis5dbdc592005-08-27 10:07:56 +00001329 tarinfo.name = arcname
1330 tarinfo.mode = stmd
1331 tarinfo.uid = statres.st_uid
1332 tarinfo.gid = statres.st_gid
1333 if stat.S_ISREG(stmd):
Martin v. Löwis61d77e02004-08-20 06:35:46 +00001334 tarinfo.size = statres.st_size
Martin v. Löwis5dbdc592005-08-27 10:07:56 +00001335 else:
1336 tarinfo.size = 0L
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001337 tarinfo.mtime = statres.st_mtime
Martin v. Löwis5dbdc592005-08-27 10:07:56 +00001338 tarinfo.type = type
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001339 tarinfo.linkname = linkname
1340 if pwd:
1341 try:
1342 tarinfo.uname = pwd.getpwuid(tarinfo.uid)[0]
1343 except KeyError:
1344 pass
1345 if grp:
1346 try:
1347 tarinfo.gname = grp.getgrgid(tarinfo.gid)[0]
1348 except KeyError:
1349 pass
1350
1351 if type in (CHRTYPE, BLKTYPE):
1352 if hasattr(os, "major") and hasattr(os, "minor"):
1353 tarinfo.devmajor = os.major(statres.st_rdev)
1354 tarinfo.devminor = os.minor(statres.st_rdev)
1355 return tarinfo
1356
1357 def list(self, verbose=True):
1358 """Print a table of contents to sys.stdout. If `verbose' is False, only
1359 the names of the members are printed. If it is True, an `ls -l'-like
1360 output is produced.
1361 """
1362 self._check()
1363
1364 for tarinfo in self:
1365 if verbose:
1366 print filemode(tarinfo.mode),
1367 print "%s/%s" % (tarinfo.uname or tarinfo.uid,
1368 tarinfo.gname or tarinfo.gid),
1369 if tarinfo.ischr() or tarinfo.isblk():
1370 print "%10s" % ("%d,%d" \
1371 % (tarinfo.devmajor, tarinfo.devminor)),
1372 else:
1373 print "%10d" % tarinfo.size,
1374 print "%d-%02d-%02d %02d:%02d:%02d" \
1375 % time.localtime(tarinfo.mtime)[:6],
1376
1377 print tarinfo.name,
1378
1379 if verbose:
1380 if tarinfo.issym():
1381 print "->", tarinfo.linkname,
1382 if tarinfo.islnk():
1383 print "link to", tarinfo.linkname,
1384 print
1385
1386 def add(self, name, arcname=None, recursive=True):
1387 """Add the file `name' to the archive. `name' may be any type of file
1388 (directory, fifo, symbolic link, etc.). If given, `arcname'
1389 specifies an alternative name for the file in the archive.
1390 Directories are added recursively by default. This can be avoided by
1391 setting `recursive' to False.
1392 """
1393 self._check("aw")
1394
1395 if arcname is None:
1396 arcname = name
1397
1398 # Skip if somebody tries to archive the archive...
Martin v. Löwisfaffa152005-08-24 06:43:09 +00001399 if self.name is not None \
1400 and os.path.abspath(name) == os.path.abspath(self.name):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001401 self._dbg(2, "tarfile: Skipped %r" % name)
1402 return
1403
1404 # Special case: The user wants to add the current
1405 # working directory.
1406 if name == ".":
1407 if recursive:
1408 if arcname == ".":
1409 arcname = ""
1410 for f in os.listdir("."):
1411 self.add(f, os.path.join(arcname, f))
1412 return
1413
1414 self._dbg(1, name)
1415
1416 # Create a TarInfo object from the file.
1417 tarinfo = self.gettarinfo(name, arcname)
1418
1419 if tarinfo is None:
1420 self._dbg(1, "tarfile: Unsupported type %r" % name)
1421 return
1422
1423 # Append the tar header and data to the archive.
1424 if tarinfo.isreg():
1425 f = file(name, "rb")
1426 self.addfile(tarinfo, f)
1427 f.close()
1428
Martin v. Löwis5dbdc592005-08-27 10:07:56 +00001429 elif tarinfo.isdir():
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001430 self.addfile(tarinfo)
1431 if recursive:
1432 for f in os.listdir(name):
1433 self.add(os.path.join(name, f), os.path.join(arcname, f))
1434
Martin v. Löwis5dbdc592005-08-27 10:07:56 +00001435 else:
1436 self.addfile(tarinfo)
1437
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001438 def addfile(self, tarinfo, fileobj=None):
1439 """Add the TarInfo object `tarinfo' to the archive. If `fileobj' is
1440 given, tarinfo.size bytes are read from it and added to the archive.
1441 You can create TarInfo objects using gettarinfo().
1442 On Windows platforms, `fileobj' should always be opened with mode
1443 'rb' to avoid irritation about the file size.
1444 """
1445 self._check("aw")
1446
Georg Brandl3354f282006-10-29 09:16:12 +00001447 tarinfo = copy.copy(tarinfo)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001448
Georg Brandl3354f282006-10-29 09:16:12 +00001449 buf = tarinfo.tobuf(self.posix)
1450 self.fileobj.write(buf)
1451 self.offset += len(buf)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001452
1453 # If there's data to follow, append it.
1454 if fileobj is not None:
1455 copyfileobj(fileobj, self.fileobj, tarinfo.size)
1456 blocks, remainder = divmod(tarinfo.size, BLOCKSIZE)
1457 if remainder > 0:
1458 self.fileobj.write(NUL * (BLOCKSIZE - remainder))
1459 blocks += 1
1460 self.offset += blocks * BLOCKSIZE
1461
Martin v. Löwisf3c56112004-09-18 09:08:52 +00001462 self.members.append(tarinfo)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001463
Martin v. Löwis00a73e72005-03-04 19:40:34 +00001464 def extractall(self, path=".", members=None):
1465 """Extract all members from the archive to the current working
1466 directory and set owner, modification time and permissions on
1467 directories afterwards. `path' specifies a different directory
1468 to extract to. `members' is optional and must be a subset of the
1469 list returned by getmembers().
1470 """
1471 directories = []
1472
1473 if members is None:
1474 members = self
1475
1476 for tarinfo in members:
1477 if tarinfo.isdir():
1478 # Extract directory with a safe mode, so that
1479 # all files below can be extracted as well.
1480 try:
1481 os.makedirs(os.path.join(path, tarinfo.name), 0777)
1482 except EnvironmentError:
1483 pass
1484 directories.append(tarinfo)
1485 else:
1486 self.extract(tarinfo, path)
1487
1488 # Reverse sort directories.
1489 directories.sort(lambda a, b: cmp(a.name, b.name))
1490 directories.reverse()
1491
1492 # Set correct owner, mtime and filemode on directories.
1493 for tarinfo in directories:
1494 path = os.path.join(path, tarinfo.name)
1495 try:
1496 self.chown(tarinfo, path)
1497 self.utime(tarinfo, path)
1498 self.chmod(tarinfo, path)
1499 except ExtractError, e:
1500 if self.errorlevel > 1:
1501 raise
1502 else:
1503 self._dbg(1, "tarfile: %s" % e)
1504
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001505 def extract(self, member, path=""):
1506 """Extract a member from the archive to the current working directory,
1507 using its full name. Its file information is extracted as accurately
1508 as possible. `member' may be a filename or a TarInfo object. You can
1509 specify a different directory using `path'.
1510 """
1511 self._check("r")
1512
1513 if isinstance(member, TarInfo):
1514 tarinfo = member
1515 else:
1516 tarinfo = self.getmember(member)
1517
Neal Norwitza4f651a2004-07-20 22:07:44 +00001518 # Prepare the link target for makelink().
1519 if tarinfo.islnk():
1520 tarinfo._link_target = os.path.join(path, tarinfo.linkname)
1521
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001522 try:
1523 self._extract_member(tarinfo, os.path.join(path, tarinfo.name))
1524 except EnvironmentError, e:
1525 if self.errorlevel > 0:
1526 raise
1527 else:
1528 if e.filename is None:
1529 self._dbg(1, "tarfile: %s" % e.strerror)
1530 else:
1531 self._dbg(1, "tarfile: %s %r" % (e.strerror, e.filename))
1532 except ExtractError, e:
1533 if self.errorlevel > 1:
1534 raise
1535 else:
1536 self._dbg(1, "tarfile: %s" % e)
1537
1538 def extractfile(self, member):
1539 """Extract a member from the archive as a file object. `member' may be
1540 a filename or a TarInfo object. If `member' is a regular file, a
1541 file-like object is returned. If `member' is a link, a file-like
1542 object is constructed from the link's target. If `member' is none of
1543 the above, None is returned.
1544 The file-like object is read-only and provides the following
1545 methods: read(), readline(), readlines(), seek() and tell()
1546 """
1547 self._check("r")
1548
1549 if isinstance(member, TarInfo):
1550 tarinfo = member
1551 else:
1552 tarinfo = self.getmember(member)
1553
1554 if tarinfo.isreg():
1555 return self.fileobject(self, tarinfo)
1556
1557 elif tarinfo.type not in SUPPORTED_TYPES:
1558 # If a member's type is unknown, it is treated as a
1559 # regular file.
1560 return self.fileobject(self, tarinfo)
1561
1562 elif tarinfo.islnk() or tarinfo.issym():
1563 if isinstance(self.fileobj, _Stream):
1564 # A small but ugly workaround for the case that someone tries
1565 # to extract a (sym)link as a file-object from a non-seekable
1566 # stream of tar blocks.
Georg Brandle4751e32006-05-18 06:11:19 +00001567 raise StreamError("cannot extract (sym)link as file object")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001568 else:
Georg Brandl7eb4b7d2005-07-22 21:49:32 +00001569 # A (sym)link's file object is its target's file object.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001570 return self.extractfile(self._getmember(tarinfo.linkname,
1571 tarinfo))
1572 else:
1573 # If there's no data associated with the member (directory, chrdev,
1574 # blkdev, etc.), return None instead of a file object.
1575 return None
1576
1577 def _extract_member(self, tarinfo, targetpath):
1578 """Extract the TarInfo object tarinfo to a physical
1579 file called targetpath.
1580 """
1581 # Fetch the TarInfo object for the given name
1582 # and build the destination pathname, replacing
1583 # forward slashes to platform specific separators.
1584 if targetpath[-1:] == "/":
1585 targetpath = targetpath[:-1]
1586 targetpath = os.path.normpath(targetpath)
1587
1588 # Create all upper directories.
1589 upperdirs = os.path.dirname(targetpath)
1590 if upperdirs and not os.path.exists(upperdirs):
1591 ti = TarInfo()
1592 ti.name = upperdirs
1593 ti.type = DIRTYPE
1594 ti.mode = 0777
1595 ti.mtime = tarinfo.mtime
1596 ti.uid = tarinfo.uid
1597 ti.gid = tarinfo.gid
1598 ti.uname = tarinfo.uname
1599 ti.gname = tarinfo.gname
1600 try:
1601 self._extract_member(ti, ti.name)
1602 except:
1603 pass
1604
1605 if tarinfo.islnk() or tarinfo.issym():
1606 self._dbg(1, "%s -> %s" % (tarinfo.name, tarinfo.linkname))
1607 else:
1608 self._dbg(1, tarinfo.name)
1609
1610 if tarinfo.isreg():
1611 self.makefile(tarinfo, targetpath)
1612 elif tarinfo.isdir():
1613 self.makedir(tarinfo, targetpath)
1614 elif tarinfo.isfifo():
1615 self.makefifo(tarinfo, targetpath)
1616 elif tarinfo.ischr() or tarinfo.isblk():
1617 self.makedev(tarinfo, targetpath)
1618 elif tarinfo.islnk() or tarinfo.issym():
1619 self.makelink(tarinfo, targetpath)
1620 elif tarinfo.type not in SUPPORTED_TYPES:
1621 self.makeunknown(tarinfo, targetpath)
1622 else:
1623 self.makefile(tarinfo, targetpath)
1624
1625 self.chown(tarinfo, targetpath)
1626 if not tarinfo.issym():
1627 self.chmod(tarinfo, targetpath)
1628 self.utime(tarinfo, targetpath)
1629
1630 #--------------------------------------------------------------------------
1631 # Below are the different file methods. They are called via
1632 # _extract_member() when extract() is called. They can be replaced in a
1633 # subclass to implement other functionality.
1634
1635 def makedir(self, tarinfo, targetpath):
1636 """Make a directory called targetpath.
1637 """
1638 try:
1639 os.mkdir(targetpath)
1640 except EnvironmentError, e:
1641 if e.errno != errno.EEXIST:
1642 raise
1643
1644 def makefile(self, tarinfo, targetpath):
1645 """Make a file called targetpath.
1646 """
1647 source = self.extractfile(tarinfo)
1648 target = file(targetpath, "wb")
1649 copyfileobj(source, target)
1650 source.close()
1651 target.close()
1652
1653 def makeunknown(self, tarinfo, targetpath):
1654 """Make a file from a TarInfo object with an unknown type
1655 at targetpath.
1656 """
1657 self.makefile(tarinfo, targetpath)
1658 self._dbg(1, "tarfile: Unknown file type %r, " \
1659 "extracted as regular file." % tarinfo.type)
1660
1661 def makefifo(self, tarinfo, targetpath):
1662 """Make a fifo called targetpath.
1663 """
1664 if hasattr(os, "mkfifo"):
1665 os.mkfifo(targetpath)
1666 else:
Georg Brandle4751e32006-05-18 06:11:19 +00001667 raise ExtractError("fifo not supported by system")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001668
1669 def makedev(self, tarinfo, targetpath):
1670 """Make a character or block device called targetpath.
1671 """
1672 if not hasattr(os, "mknod") or not hasattr(os, "makedev"):
Georg Brandle4751e32006-05-18 06:11:19 +00001673 raise ExtractError("special devices not supported by system")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001674
1675 mode = tarinfo.mode
1676 if tarinfo.isblk():
1677 mode |= stat.S_IFBLK
1678 else:
1679 mode |= stat.S_IFCHR
1680
1681 os.mknod(targetpath, mode,
1682 os.makedev(tarinfo.devmajor, tarinfo.devminor))
1683
1684 def makelink(self, tarinfo, targetpath):
1685 """Make a (symbolic) link called targetpath. If it cannot be created
1686 (platform limitation), we try to make a copy of the referenced file
1687 instead of a link.
1688 """
1689 linkpath = tarinfo.linkname
1690 try:
1691 if tarinfo.issym():
1692 os.symlink(linkpath, targetpath)
1693 else:
Neal Norwitza4f651a2004-07-20 22:07:44 +00001694 # See extract().
1695 os.link(tarinfo._link_target, targetpath)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001696 except AttributeError:
1697 if tarinfo.issym():
1698 linkpath = os.path.join(os.path.dirname(tarinfo.name),
1699 linkpath)
1700 linkpath = normpath(linkpath)
1701
1702 try:
1703 self._extract_member(self.getmember(linkpath), targetpath)
1704 except (EnvironmentError, KeyError), e:
1705 linkpath = os.path.normpath(linkpath)
1706 try:
1707 shutil.copy2(linkpath, targetpath)
1708 except EnvironmentError, e:
Georg Brandle4751e32006-05-18 06:11:19 +00001709 raise IOError("link could not be created")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001710
1711 def chown(self, tarinfo, targetpath):
1712 """Set owner of targetpath according to tarinfo.
1713 """
1714 if pwd and hasattr(os, "geteuid") and os.geteuid() == 0:
1715 # We have to be root to do so.
1716 try:
1717 g = grp.getgrnam(tarinfo.gname)[2]
1718 except KeyError:
1719 try:
1720 g = grp.getgrgid(tarinfo.gid)[2]
1721 except KeyError:
1722 g = os.getgid()
1723 try:
1724 u = pwd.getpwnam(tarinfo.uname)[2]
1725 except KeyError:
1726 try:
1727 u = pwd.getpwuid(tarinfo.uid)[2]
1728 except KeyError:
1729 u = os.getuid()
1730 try:
1731 if tarinfo.issym() and hasattr(os, "lchown"):
1732 os.lchown(targetpath, u, g)
1733 else:
Andrew MacIntyre7970d202003-02-19 12:51:34 +00001734 if sys.platform != "os2emx":
1735 os.chown(targetpath, u, g)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001736 except EnvironmentError, e:
Georg Brandle4751e32006-05-18 06:11:19 +00001737 raise ExtractError("could not change owner")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001738
1739 def chmod(self, tarinfo, targetpath):
1740 """Set file permissions of targetpath according to tarinfo.
1741 """
Jack Jansen834eff62003-03-07 12:47:06 +00001742 if hasattr(os, 'chmod'):
1743 try:
1744 os.chmod(targetpath, tarinfo.mode)
1745 except EnvironmentError, e:
Georg Brandle4751e32006-05-18 06:11:19 +00001746 raise ExtractError("could not change mode")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001747
1748 def utime(self, tarinfo, targetpath):
1749 """Set modification time of targetpath according to tarinfo.
1750 """
Jack Jansen834eff62003-03-07 12:47:06 +00001751 if not hasattr(os, 'utime'):
Tim Petersf9347782003-03-07 15:36:41 +00001752 return
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001753 if sys.platform == "win32" and tarinfo.isdir():
1754 # According to msdn.microsoft.com, it is an error (EACCES)
1755 # to use utime() on directories.
1756 return
1757 try:
1758 os.utime(targetpath, (tarinfo.mtime, tarinfo.mtime))
1759 except EnvironmentError, e:
Georg Brandle4751e32006-05-18 06:11:19 +00001760 raise ExtractError("could not change modification time")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001761
1762 #--------------------------------------------------------------------------
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001763 def next(self):
1764 """Return the next member of the archive as a TarInfo object, when
1765 TarFile is opened for reading. Return None if there is no more
1766 available.
1767 """
1768 self._check("ra")
1769 if self.firstmember is not None:
1770 m = self.firstmember
1771 self.firstmember = None
1772 return m
1773
1774 # Read the next block.
Andrew M. Kuchling864bba12004-07-10 22:02:11 +00001775 self.fileobj.seek(self.offset)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001776 while True:
1777 buf = self.fileobj.read(BLOCKSIZE)
1778 if not buf:
1779 return None
Georg Brandl38c6a222006-05-10 16:26:03 +00001780
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001781 try:
1782 tarinfo = TarInfo.frombuf(buf)
Tim Peters8a299d22006-05-19 19:16:34 +00001783
Georg Brandl38c6a222006-05-10 16:26:03 +00001784 # Set the TarInfo object's offset to the current position of the
1785 # TarFile and set self.offset to the position where the data blocks
1786 # should begin.
1787 tarinfo.offset = self.offset
1788 self.offset += BLOCKSIZE
1789
1790 tarinfo = self.proc_member(tarinfo)
1791
1792 except ValueError, e:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001793 if self.ignore_zeros:
Georg Brandle4751e32006-05-18 06:11:19 +00001794 self._dbg(2, "0x%X: empty or invalid block: %s" %
1795 (self.offset, e))
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001796 self.offset += BLOCKSIZE
1797 continue
1798 else:
Andrew M. Kuchling864bba12004-07-10 22:02:11 +00001799 if self.offset == 0:
Georg Brandle4751e32006-05-18 06:11:19 +00001800 raise ReadError("empty, unreadable or compressed "
1801 "file: %s" % e)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001802 return None
1803 break
1804
Georg Brandl38c6a222006-05-10 16:26:03 +00001805 # Some old tar programs represent a directory as a regular
1806 # file with a trailing slash.
1807 if tarinfo.isreg() and tarinfo.name.endswith("/"):
1808 tarinfo.type = DIRTYPE
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001809
Georg Brandl38c6a222006-05-10 16:26:03 +00001810 # Directory names should have a '/' at the end.
1811 if tarinfo.isdir():
1812 tarinfo.name += "/"
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001813
Martin v. Löwisf3c56112004-09-18 09:08:52 +00001814 self.members.append(tarinfo)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001815 return tarinfo
1816
1817 #--------------------------------------------------------------------------
Georg Brandl38c6a222006-05-10 16:26:03 +00001818 # The following are methods that are called depending on the type of a
1819 # member. The entry point is proc_member() which is called with a TarInfo
1820 # object created from the header block from the current offset. The
1821 # proc_member() method can be overridden in a subclass to add custom
1822 # proc_*() methods. A proc_*() method MUST implement the following
1823 # operations:
1824 # 1. Set tarinfo.offset_data to the position where the data blocks begin,
1825 # if there is data that follows.
1826 # 2. Set self.offset to the position where the next member's header will
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001827 # begin.
Georg Brandl38c6a222006-05-10 16:26:03 +00001828 # 3. Return tarinfo or another valid TarInfo object.
1829 def proc_member(self, tarinfo):
1830 """Choose the right processing method for tarinfo depending
1831 on its type and call it.
1832 """
1833 if tarinfo.type in (GNUTYPE_LONGNAME, GNUTYPE_LONGLINK):
1834 return self.proc_gnulong(tarinfo)
1835 elif tarinfo.type == GNUTYPE_SPARSE:
1836 return self.proc_sparse(tarinfo)
1837 else:
1838 return self.proc_builtin(tarinfo)
1839
1840 def proc_builtin(self, tarinfo):
1841 """Process a builtin type member or an unknown member
1842 which will be treated as a regular file.
1843 """
1844 tarinfo.offset_data = self.offset
1845 if tarinfo.isreg() or tarinfo.type not in SUPPORTED_TYPES:
1846 # Skip the following data blocks.
1847 self.offset += self._block(tarinfo.size)
1848 return tarinfo
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001849
1850 def proc_gnulong(self, tarinfo):
Georg Brandl38c6a222006-05-10 16:26:03 +00001851 """Process the blocks that hold a GNU longname
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001852 or longlink member.
1853 """
1854 buf = ""
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001855 count = tarinfo.size
1856 while count > 0:
1857 block = self.fileobj.read(BLOCKSIZE)
1858 buf += block
1859 self.offset += BLOCKSIZE
1860 count -= BLOCKSIZE
1861
Georg Brandl38c6a222006-05-10 16:26:03 +00001862 # Fetch the next header and process it.
1863 b = self.fileobj.read(BLOCKSIZE)
1864 t = TarInfo.frombuf(b)
1865 t.offset = self.offset
1866 self.offset += BLOCKSIZE
1867 next = self.proc_member(t)
Andrew M. Kuchling864bba12004-07-10 22:02:11 +00001868
Georg Brandl38c6a222006-05-10 16:26:03 +00001869 # Patch the TarInfo object from the next header with
1870 # the longname information.
Andrew M. Kuchling864bba12004-07-10 22:02:11 +00001871 next.offset = tarinfo.offset
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001872 if tarinfo.type == GNUTYPE_LONGNAME:
Georg Brandle8953182006-05-27 14:02:03 +00001873 next.name = buf.rstrip(NUL)
Andrew M. Kuchling864bba12004-07-10 22:02:11 +00001874 elif tarinfo.type == GNUTYPE_LONGLINK:
Georg Brandle8953182006-05-27 14:02:03 +00001875 next.linkname = buf.rstrip(NUL)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001876
Andrew M. Kuchling864bba12004-07-10 22:02:11 +00001877 return next
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001878
1879 def proc_sparse(self, tarinfo):
Georg Brandl38c6a222006-05-10 16:26:03 +00001880 """Process a GNU sparse header plus extra headers.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001881 """
Georg Brandl38c6a222006-05-10 16:26:03 +00001882 buf = tarinfo.buf
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001883 sp = _ringbuffer()
1884 pos = 386
1885 lastpos = 0L
1886 realpos = 0L
1887 # There are 4 possible sparse structs in the
1888 # first header.
1889 for i in xrange(4):
1890 try:
Georg Brandl38c6a222006-05-10 16:26:03 +00001891 offset = nti(buf[pos:pos + 12])
1892 numbytes = nti(buf[pos + 12:pos + 24])
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001893 except ValueError:
1894 break
1895 if offset > lastpos:
1896 sp.append(_hole(lastpos, offset - lastpos))
1897 sp.append(_data(offset, numbytes, realpos))
1898 realpos += numbytes
1899 lastpos = offset + numbytes
1900 pos += 24
1901
1902 isextended = ord(buf[482])
Georg Brandl38c6a222006-05-10 16:26:03 +00001903 origsize = nti(buf[483:495])
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001904
1905 # If the isextended flag is given,
1906 # there are extra headers to process.
1907 while isextended == 1:
1908 buf = self.fileobj.read(BLOCKSIZE)
1909 self.offset += BLOCKSIZE
1910 pos = 0
1911 for i in xrange(21):
1912 try:
Georg Brandl38c6a222006-05-10 16:26:03 +00001913 offset = nti(buf[pos:pos + 12])
1914 numbytes = nti(buf[pos + 12:pos + 24])
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001915 except ValueError:
1916 break
1917 if offset > lastpos:
1918 sp.append(_hole(lastpos, offset - lastpos))
1919 sp.append(_data(offset, numbytes, realpos))
1920 realpos += numbytes
1921 lastpos = offset + numbytes
1922 pos += 24
1923 isextended = ord(buf[504])
1924
1925 if lastpos < origsize:
1926 sp.append(_hole(lastpos, origsize - lastpos))
1927
1928 tarinfo.sparse = sp
1929
1930 tarinfo.offset_data = self.offset
1931 self.offset += self._block(tarinfo.size)
1932 tarinfo.size = origsize
Andrew M. Kuchling864bba12004-07-10 22:02:11 +00001933
Georg Brandl38c6a222006-05-10 16:26:03 +00001934 return tarinfo
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001935
1936 #--------------------------------------------------------------------------
1937 # Little helper methods:
1938
1939 def _block(self, count):
1940 """Round up a byte count by BLOCKSIZE and return it,
1941 e.g. _block(834) => 1024.
1942 """
1943 blocks, remainder = divmod(count, BLOCKSIZE)
1944 if remainder:
1945 blocks += 1
1946 return blocks * BLOCKSIZE
1947
1948 def _getmember(self, name, tarinfo=None):
1949 """Find an archive member by name from bottom to top.
1950 If tarinfo is given, it is used as the starting point.
1951 """
Martin v. Löwisf3c56112004-09-18 09:08:52 +00001952 # Ensure that all members have been loaded.
1953 members = self.getmembers()
1954
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001955 if tarinfo is None:
Martin v. Löwisf3c56112004-09-18 09:08:52 +00001956 end = len(members)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001957 else:
Martin v. Löwisf3c56112004-09-18 09:08:52 +00001958 end = members.index(tarinfo)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001959
1960 for i in xrange(end - 1, -1, -1):
Martin v. Löwisf3c56112004-09-18 09:08:52 +00001961 if name == members[i].name:
1962 return members[i]
Andrew M. Kuchling864bba12004-07-10 22:02:11 +00001963
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001964 def _load(self):
1965 """Read through the entire archive file and look for readable
1966 members.
1967 """
1968 while True:
1969 tarinfo = self.next()
1970 if tarinfo is None:
1971 break
1972 self._loaded = True
1973
1974 def _check(self, mode=None):
1975 """Check if TarFile is still open, and if the operation's mode
1976 corresponds to TarFile's mode.
1977 """
1978 if self.closed:
Georg Brandle4751e32006-05-18 06:11:19 +00001979 raise IOError("%s is closed" % self.__class__.__name__)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001980 if mode is not None and self._mode not in mode:
Georg Brandle4751e32006-05-18 06:11:19 +00001981 raise IOError("bad operation for mode %r" % self._mode)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001982
1983 def __iter__(self):
1984 """Provide an iterator object.
1985 """
1986 if self._loaded:
1987 return iter(self.members)
1988 else:
1989 return TarIter(self)
1990
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001991 def _dbg(self, level, msg):
1992 """Write debugging output to sys.stderr.
1993 """
1994 if level <= self.debug:
1995 print >> sys.stderr, msg
1996# class TarFile
1997
1998class TarIter:
1999 """Iterator Class.
2000
2001 for tarinfo in TarFile(...):
2002 suite...
2003 """
2004
2005 def __init__(self, tarfile):
2006 """Construct a TarIter object.
2007 """
2008 self.tarfile = tarfile
Martin v. Löwis637431b2005-03-03 23:12:42 +00002009 self.index = 0
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002010 def __iter__(self):
2011 """Return iterator object.
2012 """
2013 return self
2014 def next(self):
2015 """Return the next item using TarFile's next() method.
2016 When all members have been read, set TarFile as _loaded.
2017 """
Martin v. Löwis637431b2005-03-03 23:12:42 +00002018 # Fix for SF #1100429: Under rare circumstances it can
2019 # happen that getmembers() is called during iteration,
2020 # which will cause TarIter to stop prematurely.
2021 if not self.tarfile._loaded:
2022 tarinfo = self.tarfile.next()
2023 if not tarinfo:
2024 self.tarfile._loaded = True
2025 raise StopIteration
2026 else:
2027 try:
2028 tarinfo = self.tarfile.members[self.index]
2029 except IndexError:
2030 raise StopIteration
2031 self.index += 1
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002032 return tarinfo
2033
2034# Helper classes for sparse file support
2035class _section:
2036 """Base class for _data and _hole.
2037 """
2038 def __init__(self, offset, size):
2039 self.offset = offset
2040 self.size = size
2041 def __contains__(self, offset):
2042 return self.offset <= offset < self.offset + self.size
2043
2044class _data(_section):
2045 """Represent a data section in a sparse file.
2046 """
2047 def __init__(self, offset, size, realpos):
2048 _section.__init__(self, offset, size)
2049 self.realpos = realpos
2050
2051class _hole(_section):
2052 """Represent a hole section in a sparse file.
2053 """
2054 pass
2055
2056class _ringbuffer(list):
2057 """Ringbuffer class which increases performance
2058 over a regular list.
2059 """
2060 def __init__(self):
2061 self.idx = 0
2062 def find(self, offset):
2063 idx = self.idx
2064 while True:
2065 item = self[idx]
2066 if offset in item:
2067 break
2068 idx += 1
2069 if idx == len(self):
2070 idx = 0
2071 if idx == self.idx:
2072 # End of File
2073 return None
2074 self.idx = idx
2075 return item
2076
2077#---------------------------------------------
2078# zipfile compatible TarFile class
2079#---------------------------------------------
2080TAR_PLAIN = 0 # zipfile.ZIP_STORED
2081TAR_GZIPPED = 8 # zipfile.ZIP_DEFLATED
2082class TarFileCompat:
2083 """TarFile class compatible with standard module zipfile's
2084 ZipFile class.
2085 """
2086 def __init__(self, file, mode="r", compression=TAR_PLAIN):
2087 if compression == TAR_PLAIN:
2088 self.tarfile = TarFile.taropen(file, mode)
2089 elif compression == TAR_GZIPPED:
2090 self.tarfile = TarFile.gzopen(file, mode)
2091 else:
Georg Brandle4751e32006-05-18 06:11:19 +00002092 raise ValueError("unknown compression constant")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002093 if mode[0:1] == "r":
2094 members = self.tarfile.getmembers()
Raymond Hettingera1d09e22005-09-11 16:34:05 +00002095 for m in members:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002096 m.filename = m.name
2097 m.file_size = m.size
2098 m.date_time = time.gmtime(m.mtime)[:6]
2099 def namelist(self):
2100 return map(lambda m: m.name, self.infolist())
2101 def infolist(self):
2102 return filter(lambda m: m.type in REGULAR_TYPES,
2103 self.tarfile.getmembers())
2104 def printdir(self):
2105 self.tarfile.list()
2106 def testzip(self):
2107 return
2108 def getinfo(self, name):
2109 return self.tarfile.getmember(name)
2110 def read(self, name):
2111 return self.tarfile.extractfile(self.tarfile.getmember(name)).read()
2112 def write(self, filename, arcname=None, compress_type=None):
2113 self.tarfile.add(filename, arcname)
2114 def writestr(self, zinfo, bytes):
Raymond Hettingera6172712004-12-31 19:15:26 +00002115 try:
2116 from cStringIO import StringIO
2117 except ImportError:
2118 from StringIO import StringIO
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002119 import calendar
2120 zinfo.name = zinfo.filename
2121 zinfo.size = zinfo.file_size
2122 zinfo.mtime = calendar.timegm(zinfo.date_time)
Raymond Hettingera6172712004-12-31 19:15:26 +00002123 self.tarfile.addfile(zinfo, StringIO(bytes))
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002124 def close(self):
2125 self.tarfile.close()
2126#class TarFileCompat
2127
2128#--------------------
2129# exported functions
2130#--------------------
2131def is_tarfile(name):
2132 """Return True if name points to a tar archive that we
2133 are able to handle, else return False.
2134 """
2135 try:
2136 t = open(name)
2137 t.close()
2138 return True
2139 except TarError:
2140 return False
2141
2142open = TarFile.open