blob: 8b477fe76e79f1cca9322342680516d32a88f75d [file] [log] [blame]
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001#!/usr/bin/env python
2# -*- coding: iso-8859-1 -*-
3#-------------------------------------------------------------------
4# tarfile.py
5#-------------------------------------------------------------------
6# Copyright (C) 2002 Lars Gustäbel <lars@gustaebel.de>
7# All rights reserved.
8#
9# Permission is hereby granted, free of charge, to any person
10# obtaining a copy of this software and associated documentation
11# files (the "Software"), to deal in the Software without
12# restriction, including without limitation the rights to use,
13# copy, modify, merge, publish, distribute, sublicense, and/or sell
14# copies of the Software, and to permit persons to whom the
15# Software is furnished to do so, subject to the following
16# conditions:
17#
18# The above copyright notice and this permission notice shall be
19# included in all copies or substantial portions of the Software.
20#
21# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
22# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
23# OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
24# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
25# HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
26# WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
27# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
28# OTHER DEALINGS IN THE SOFTWARE.
29#
30"""Read from and write to tar format archives.
31"""
32
33__version__ = "$Revision$"
34# $Source$
35
Georg Brandl38c6a222006-05-10 16:26:03 +000036version = "0.8.0"
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +000037__author__ = "Lars Gustäbel (lars@gustaebel.de)"
38__date__ = "$Date$"
39__cvsid__ = "$Id$"
40__credits__ = "Gustavo Niemeyer, Niels Gustäbel, Richard Townsend."
41
42#---------
43# Imports
44#---------
45import sys
46import os
47import shutil
48import stat
49import errno
50import time
51import struct
Georg Brandl2527f7f2006-10-29 09:16:15 +000052import copy
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +000053
Jack Jansencfc49022003-03-07 13:37:32 +000054if sys.platform == 'mac':
55 # This module needs work for MacOS9, especially in the area of pathname
56 # handling. In many places it is assumed a simple substitution of / by the
57 # local os.path.sep is good enough to convert pathnames, but this does not
58 # work with the mac rooted:path:name versus :nonrooted:path:name syntax
59 raise ImportError, "tarfile does not work for platform==mac"
60
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +000061try:
62 import grp, pwd
63except ImportError:
64 grp = pwd = None
65
66# from tarfile import *
67__all__ = ["TarFile", "TarInfo", "is_tarfile", "TarError"]
68
69#---------------------------------------------------------
70# tar constants
71#---------------------------------------------------------
72NUL = "\0" # the null character
73BLOCKSIZE = 512 # length of processing blocks
74RECORDSIZE = BLOCKSIZE * 20 # length of records
75MAGIC = "ustar" # magic tar string
76VERSION = "00" # version number
77
78LENGTH_NAME = 100 # maximum length of a filename
79LENGTH_LINK = 100 # maximum length of a linkname
80LENGTH_PREFIX = 155 # maximum length of the prefix field
81MAXSIZE_MEMBER = 077777777777L # maximum size of a file (11 octal digits)
82
83REGTYPE = "0" # regular file
84AREGTYPE = "\0" # regular file
85LNKTYPE = "1" # link (inside tarfile)
86SYMTYPE = "2" # symbolic link
87CHRTYPE = "3" # character special device
88BLKTYPE = "4" # block special device
89DIRTYPE = "5" # directory
90FIFOTYPE = "6" # fifo special device
91CONTTYPE = "7" # contiguous file
92
93GNUTYPE_LONGNAME = "L" # GNU tar extension for longnames
94GNUTYPE_LONGLINK = "K" # GNU tar extension for longlink
95GNUTYPE_SPARSE = "S" # GNU tar extension for sparse file
96
97#---------------------------------------------------------
98# tarfile constants
99#---------------------------------------------------------
100SUPPORTED_TYPES = (REGTYPE, AREGTYPE, LNKTYPE, # file types that tarfile
101 SYMTYPE, DIRTYPE, FIFOTYPE, # can cope with.
102 CONTTYPE, CHRTYPE, BLKTYPE,
103 GNUTYPE_LONGNAME, GNUTYPE_LONGLINK,
104 GNUTYPE_SPARSE)
105
106REGULAR_TYPES = (REGTYPE, AREGTYPE, # file types that somehow
107 CONTTYPE, GNUTYPE_SPARSE) # represent regular files
108
109#---------------------------------------------------------
110# Bits used in the mode field, values in octal.
111#---------------------------------------------------------
112S_IFLNK = 0120000 # symbolic link
113S_IFREG = 0100000 # regular file
114S_IFBLK = 0060000 # block device
115S_IFDIR = 0040000 # directory
116S_IFCHR = 0020000 # character device
117S_IFIFO = 0010000 # fifo
118
119TSUID = 04000 # set UID on execution
120TSGID = 02000 # set GID on execution
121TSVTX = 01000 # reserved
122
123TUREAD = 0400 # read by owner
124TUWRITE = 0200 # write by owner
125TUEXEC = 0100 # execute/search by owner
126TGREAD = 0040 # read by group
127TGWRITE = 0020 # write by group
128TGEXEC = 0010 # execute/search by group
129TOREAD = 0004 # read by other
130TOWRITE = 0002 # write by other
131TOEXEC = 0001 # execute/search by other
132
133#---------------------------------------------------------
134# Some useful functions
135#---------------------------------------------------------
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000136
Georg Brandl38c6a222006-05-10 16:26:03 +0000137def stn(s, length):
138 """Convert a python string to a null-terminated string buffer.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000139 """
Georg Brandlee23f4b2006-10-24 16:54:23 +0000140 return s[:length] + (length - len(s)) * NUL
Georg Brandl38c6a222006-05-10 16:26:03 +0000141
Lars Gustäbel08303db2008-02-11 18:36:07 +0000142def nts(s):
143 """Convert a null-terminated string field to a python string.
144 """
145 # Use the string up to the first null char.
146 p = s.find("\0")
147 if p == -1:
148 return s
149 return s[:p]
150
Georg Brandl38c6a222006-05-10 16:26:03 +0000151def nti(s):
152 """Convert a number field to a python number.
153 """
154 # There are two possible encodings for a number field, see
155 # itn() below.
156 if s[0] != chr(0200):
Lars Gustäbel08303db2008-02-11 18:36:07 +0000157 n = int(nts(s) or "0", 8)
Georg Brandl38c6a222006-05-10 16:26:03 +0000158 else:
159 n = 0L
160 for i in xrange(len(s) - 1):
161 n <<= 8
162 n += ord(s[i + 1])
163 return n
164
165def itn(n, digits=8, posix=False):
166 """Convert a python number to a number field.
167 """
168 # POSIX 1003.1-1988 requires numbers to be encoded as a string of
169 # octal digits followed by a null-byte, this allows values up to
170 # (8**(digits-1))-1. GNU tar allows storing numbers greater than
171 # that if necessary. A leading 0200 byte indicates this particular
172 # encoding, the following digits-1 bytes are a big-endian
173 # representation. This allows values up to (256**(digits-1))-1.
174 if 0 <= n < 8 ** (digits - 1):
175 s = "%0*o" % (digits - 1, n) + NUL
176 else:
177 if posix:
Georg Brandle4751e32006-05-18 06:11:19 +0000178 raise ValueError("overflow in number field")
Georg Brandl38c6a222006-05-10 16:26:03 +0000179
180 if n < 0:
181 # XXX We mimic GNU tar's behaviour with negative numbers,
182 # this could raise OverflowError.
183 n = struct.unpack("L", struct.pack("l", n))[0]
184
185 s = ""
186 for i in xrange(digits - 1):
187 s = chr(n & 0377) + s
188 n >>= 8
189 s = chr(0200) + s
190 return s
191
192def calc_chksums(buf):
193 """Calculate the checksum for a member's header by summing up all
194 characters except for the chksum field which is treated as if
195 it was filled with spaces. According to the GNU tar sources,
196 some tars (Sun and NeXT) calculate chksum with signed char,
197 which will be different if there are chars in the buffer with
198 the high bit set. So we calculate two checksums, unsigned and
199 signed.
200 """
201 unsigned_chksum = 256 + sum(struct.unpack("148B", buf[:148]) + struct.unpack("356B", buf[156:512]))
202 signed_chksum = 256 + sum(struct.unpack("148b", buf[:148]) + struct.unpack("356b", buf[156:512]))
203 return unsigned_chksum, signed_chksum
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000204
205def copyfileobj(src, dst, length=None):
206 """Copy length bytes from fileobj src to fileobj dst.
207 If length is None, copy the entire content.
208 """
209 if length == 0:
210 return
211 if length is None:
212 shutil.copyfileobj(src, dst)
213 return
214
215 BUFSIZE = 16 * 1024
216 blocks, remainder = divmod(length, BUFSIZE)
217 for b in xrange(blocks):
218 buf = src.read(BUFSIZE)
219 if len(buf) < BUFSIZE:
Georg Brandle4751e32006-05-18 06:11:19 +0000220 raise IOError("end of file reached")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000221 dst.write(buf)
222
223 if remainder != 0:
224 buf = src.read(remainder)
225 if len(buf) < remainder:
Georg Brandle4751e32006-05-18 06:11:19 +0000226 raise IOError("end of file reached")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000227 dst.write(buf)
228 return
229
230filemode_table = (
Andrew M. Kuchling8bc462f2004-10-20 11:48:42 +0000231 ((S_IFLNK, "l"),
232 (S_IFREG, "-"),
233 (S_IFBLK, "b"),
234 (S_IFDIR, "d"),
235 (S_IFCHR, "c"),
236 (S_IFIFO, "p")),
237
238 ((TUREAD, "r"),),
239 ((TUWRITE, "w"),),
240 ((TUEXEC|TSUID, "s"),
241 (TSUID, "S"),
242 (TUEXEC, "x")),
243
244 ((TGREAD, "r"),),
245 ((TGWRITE, "w"),),
246 ((TGEXEC|TSGID, "s"),
247 (TSGID, "S"),
248 (TGEXEC, "x")),
249
250 ((TOREAD, "r"),),
251 ((TOWRITE, "w"),),
252 ((TOEXEC|TSVTX, "t"),
253 (TSVTX, "T"),
254 (TOEXEC, "x"))
255)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000256
257def filemode(mode):
258 """Convert a file's mode to a string of the form
259 -rwxrwxrwx.
260 Used by TarFile.list()
261 """
Andrew M. Kuchling8bc462f2004-10-20 11:48:42 +0000262 perm = []
263 for table in filemode_table:
264 for bit, char in table:
265 if mode & bit == bit:
266 perm.append(char)
267 break
268 else:
269 perm.append("-")
270 return "".join(perm)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000271
272if os.sep != "/":
273 normpath = lambda path: os.path.normpath(path).replace(os.sep, "/")
274else:
275 normpath = os.path.normpath
276
277class TarError(Exception):
278 """Base exception."""
279 pass
280class ExtractError(TarError):
281 """General exception for extract errors."""
282 pass
283class ReadError(TarError):
284 """Exception for unreadble tar archives."""
285 pass
286class CompressionError(TarError):
287 """Exception for unavailable compression methods."""
288 pass
289class StreamError(TarError):
290 """Exception for unsupported operations on stream-like TarFiles."""
291 pass
292
293#---------------------------
294# internal stream interface
295#---------------------------
296class _LowLevelFile:
297 """Low-level file object. Supports reading and writing.
298 It is used instead of a regular file object for streaming
299 access.
300 """
301
302 def __init__(self, name, mode):
303 mode = {
304 "r": os.O_RDONLY,
305 "w": os.O_WRONLY | os.O_CREAT | os.O_TRUNC,
306 }[mode]
307 if hasattr(os, "O_BINARY"):
308 mode |= os.O_BINARY
309 self.fd = os.open(name, mode)
310
311 def close(self):
312 os.close(self.fd)
313
314 def read(self, size):
315 return os.read(self.fd, size)
316
317 def write(self, s):
318 os.write(self.fd, s)
319
320class _Stream:
321 """Class that serves as an adapter between TarFile and
322 a stream-like object. The stream-like object only
323 needs to have a read() or write() method and is accessed
324 blockwise. Use of gzip or bzip2 compression is possible.
325 A stream-like object could be for example: sys.stdin,
326 sys.stdout, a socket, a tape device etc.
327
328 _Stream is intended to be used only internally.
329 """
330
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000331 def __init__(self, name, mode, comptype, fileobj, bufsize):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000332 """Construct a _Stream object.
333 """
334 self._extfileobj = True
335 if fileobj is None:
336 fileobj = _LowLevelFile(name, mode)
337 self._extfileobj = False
338
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000339 if comptype == '*':
340 # Enable transparent compression detection for the
341 # stream interface
342 fileobj = _StreamProxy(fileobj)
343 comptype = fileobj.getcomptype()
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000344
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000345 self.name = name or ""
346 self.mode = mode
347 self.comptype = comptype
348 self.fileobj = fileobj
349 self.bufsize = bufsize
350 self.buf = ""
351 self.pos = 0L
352 self.closed = False
353
354 if comptype == "gz":
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000355 try:
356 import zlib
357 except ImportError:
Georg Brandle4751e32006-05-18 06:11:19 +0000358 raise CompressionError("zlib module is not available")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000359 self.zlib = zlib
360 self.crc = zlib.crc32("")
361 if mode == "r":
362 self._init_read_gz()
363 else:
364 self._init_write_gz()
365
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000366 if comptype == "bz2":
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000367 try:
368 import bz2
369 except ImportError:
Georg Brandle4751e32006-05-18 06:11:19 +0000370 raise CompressionError("bz2 module is not available")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000371 if mode == "r":
372 self.dbuf = ""
373 self.cmp = bz2.BZ2Decompressor()
374 else:
375 self.cmp = bz2.BZ2Compressor()
376
377 def __del__(self):
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000378 if hasattr(self, "closed") and not self.closed:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000379 self.close()
380
381 def _init_write_gz(self):
382 """Initialize for writing with gzip compression.
383 """
384 self.cmp = self.zlib.compressobj(9, self.zlib.DEFLATED,
385 -self.zlib.MAX_WBITS,
386 self.zlib.DEF_MEM_LEVEL,
387 0)
388 timestamp = struct.pack("<L", long(time.time()))
389 self.__write("\037\213\010\010%s\002\377" % timestamp)
390 if self.name.endswith(".gz"):
391 self.name = self.name[:-3]
392 self.__write(self.name + NUL)
393
394 def write(self, s):
395 """Write string s to the stream.
396 """
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000397 if self.comptype == "gz":
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000398 self.crc = self.zlib.crc32(s, self.crc)
399 self.pos += len(s)
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000400 if self.comptype != "tar":
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000401 s = self.cmp.compress(s)
402 self.__write(s)
403
404 def __write(self, s):
405 """Write string s to the stream if a whole new block
406 is ready to be written.
407 """
408 self.buf += s
409 while len(self.buf) > self.bufsize:
410 self.fileobj.write(self.buf[:self.bufsize])
411 self.buf = self.buf[self.bufsize:]
412
413 def close(self):
414 """Close the _Stream object. No operation should be
415 done on it afterwards.
416 """
417 if self.closed:
418 return
419
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000420 if self.mode == "w" and self.comptype != "tar":
Martin v. Löwisc234a522004-08-22 21:28:33 +0000421 self.buf += self.cmp.flush()
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000422
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000423 if self.mode == "w" and self.buf:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000424 self.fileobj.write(self.buf)
425 self.buf = ""
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000426 if self.comptype == "gz":
Tim Petersa05f6e22006-08-02 05:20:08 +0000427 # The native zlib crc is an unsigned 32-bit integer, but
428 # the Python wrapper implicitly casts that to a signed C
429 # long. So, on a 32-bit box self.crc may "look negative",
430 # while the same crc on a 64-bit box may "look positive".
431 # To avoid irksome warnings from the `struct` module, force
432 # it to look positive on all boxes.
433 self.fileobj.write(struct.pack("<L", self.crc & 0xffffffffL))
Andrew M. Kuchling10a44492003-10-24 17:38:34 +0000434 self.fileobj.write(struct.pack("<L", self.pos & 0xffffFFFFL))
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000435
436 if not self._extfileobj:
437 self.fileobj.close()
438
439 self.closed = True
440
441 def _init_read_gz(self):
442 """Initialize for reading a gzip compressed fileobj.
443 """
444 self.cmp = self.zlib.decompressobj(-self.zlib.MAX_WBITS)
445 self.dbuf = ""
446
447 # taken from gzip.GzipFile with some alterations
448 if self.__read(2) != "\037\213":
Georg Brandle4751e32006-05-18 06:11:19 +0000449 raise ReadError("not a gzip file")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000450 if self.__read(1) != "\010":
Georg Brandle4751e32006-05-18 06:11:19 +0000451 raise CompressionError("unsupported compression method")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000452
453 flag = ord(self.__read(1))
454 self.__read(6)
455
456 if flag & 4:
457 xlen = ord(self.__read(1)) + 256 * ord(self.__read(1))
458 self.read(xlen)
459 if flag & 8:
460 while True:
461 s = self.__read(1)
462 if not s or s == NUL:
463 break
464 if flag & 16:
465 while True:
466 s = self.__read(1)
467 if not s or s == NUL:
468 break
469 if flag & 2:
470 self.__read(2)
471
472 def tell(self):
473 """Return the stream's file pointer position.
474 """
475 return self.pos
476
477 def seek(self, pos=0):
478 """Set the stream's file pointer to pos. Negative seeking
479 is forbidden.
480 """
481 if pos - self.pos >= 0:
482 blocks, remainder = divmod(pos - self.pos, self.bufsize)
483 for i in xrange(blocks):
484 self.read(self.bufsize)
485 self.read(remainder)
486 else:
Georg Brandle4751e32006-05-18 06:11:19 +0000487 raise StreamError("seeking backwards is not allowed")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000488 return self.pos
489
490 def read(self, size=None):
491 """Return the next size number of bytes from the stream.
492 If size is not defined, return all bytes of the stream
493 up to EOF.
494 """
495 if size is None:
496 t = []
497 while True:
498 buf = self._read(self.bufsize)
499 if not buf:
500 break
501 t.append(buf)
502 buf = "".join(t)
503 else:
504 buf = self._read(size)
505 self.pos += len(buf)
506 return buf
507
508 def _read(self, size):
509 """Return size bytes from the stream.
510 """
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000511 if self.comptype == "tar":
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000512 return self.__read(size)
513
514 c = len(self.dbuf)
515 t = [self.dbuf]
516 while c < size:
517 buf = self.__read(self.bufsize)
518 if not buf:
519 break
520 buf = self.cmp.decompress(buf)
521 t.append(buf)
522 c += len(buf)
523 t = "".join(t)
524 self.dbuf = t[size:]
525 return t[:size]
526
527 def __read(self, size):
528 """Return size bytes from stream. If internal buffer is empty,
529 read another block from the stream.
530 """
531 c = len(self.buf)
532 t = [self.buf]
533 while c < size:
534 buf = self.fileobj.read(self.bufsize)
535 if not buf:
536 break
537 t.append(buf)
538 c += len(buf)
539 t = "".join(t)
540 self.buf = t[size:]
541 return t[:size]
542# class _Stream
543
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000544class _StreamProxy(object):
545 """Small proxy class that enables transparent compression
546 detection for the Stream interface (mode 'r|*').
547 """
548
549 def __init__(self, fileobj):
550 self.fileobj = fileobj
551 self.buf = self.fileobj.read(BLOCKSIZE)
552
553 def read(self, size):
554 self.read = self.fileobj.read
555 return self.buf
556
557 def getcomptype(self):
558 if self.buf.startswith("\037\213\010"):
559 return "gz"
560 if self.buf.startswith("BZh91"):
561 return "bz2"
562 return "tar"
563
564 def close(self):
565 self.fileobj.close()
566# class StreamProxy
567
Georg Brandl49c8f4c2006-05-15 19:30:35 +0000568class _BZ2Proxy(object):
569 """Small proxy class that enables external file object
570 support for "r:bz2" and "w:bz2" modes. This is actually
571 a workaround for a limitation in bz2 module's BZ2File
572 class which (unlike gzip.GzipFile) has no support for
573 a file object argument.
574 """
575
576 blocksize = 16 * 1024
577
578 def __init__(self, fileobj, mode):
579 self.fileobj = fileobj
580 self.mode = mode
581 self.init()
582
583 def init(self):
584 import bz2
585 self.pos = 0
586 if self.mode == "r":
587 self.bz2obj = bz2.BZ2Decompressor()
588 self.fileobj.seek(0)
589 self.buf = ""
590 else:
591 self.bz2obj = bz2.BZ2Compressor()
592
593 def read(self, size):
594 b = [self.buf]
595 x = len(self.buf)
596 while x < size:
Lars Gustäbel76232942009-03-22 20:48:03 +0000597 raw = self.fileobj.read(self.blocksize)
598 if not raw:
Georg Brandl49c8f4c2006-05-15 19:30:35 +0000599 break
Lars Gustäbel76232942009-03-22 20:48:03 +0000600 data = self.bz2obj.decompress(raw)
601 b.append(data)
Georg Brandl49c8f4c2006-05-15 19:30:35 +0000602 x += len(data)
603 self.buf = "".join(b)
604
605 buf = self.buf[:size]
606 self.buf = self.buf[size:]
607 self.pos += len(buf)
608 return buf
609
610 def seek(self, pos):
611 if pos < self.pos:
612 self.init()
613 self.read(pos - self.pos)
614
615 def tell(self):
616 return self.pos
617
618 def write(self, data):
619 self.pos += len(data)
620 raw = self.bz2obj.compress(data)
621 self.fileobj.write(raw)
622
623 def close(self):
624 if self.mode == "w":
625 raw = self.bz2obj.flush()
626 self.fileobj.write(raw)
Georg Brandle8953182006-05-27 14:02:03 +0000627 self.fileobj.close()
Georg Brandl49c8f4c2006-05-15 19:30:35 +0000628# class _BZ2Proxy
629
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000630#------------------------
631# Extraction file object
632#------------------------
Lars Gustäbelaedb92e2006-12-23 16:51:47 +0000633class _FileInFile(object):
634 """A thin wrapper around an existing file object that
635 provides a part of its data as an individual file
636 object.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000637 """
638
Lars Gustäbelaedb92e2006-12-23 16:51:47 +0000639 def __init__(self, fileobj, offset, size, sparse=None):
640 self.fileobj = fileobj
641 self.offset = offset
642 self.size = size
643 self.sparse = sparse
644 self.position = 0
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000645
Lars Gustäbelaedb92e2006-12-23 16:51:47 +0000646 def tell(self):
647 """Return the current file position.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000648 """
Lars Gustäbelaedb92e2006-12-23 16:51:47 +0000649 return self.position
650
651 def seek(self, position):
652 """Seek to a position in the file.
653 """
654 self.position = position
655
656 def read(self, size=None):
657 """Read data from the file.
658 """
659 if size is None:
660 size = self.size - self.position
661 else:
662 size = min(size, self.size - self.position)
663
664 if self.sparse is None:
665 return self.readnormal(size)
666 else:
667 return self.readsparse(size)
668
669 def readnormal(self, size):
670 """Read operation for regular files.
671 """
672 self.fileobj.seek(self.offset + self.position)
673 self.position += size
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000674 return self.fileobj.read(size)
675
Lars Gustäbelaedb92e2006-12-23 16:51:47 +0000676 def readsparse(self, size):
677 """Read operation for sparse files.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000678 """
Lars Gustäbelaedb92e2006-12-23 16:51:47 +0000679 data = []
680 while size > 0:
681 buf = self.readsparsesection(size)
682 if not buf:
683 break
684 size -= len(buf)
685 data.append(buf)
686 return "".join(data)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000687
Lars Gustäbelaedb92e2006-12-23 16:51:47 +0000688 def readsparsesection(self, size):
689 """Read a single section of a sparse file.
690 """
691 section = self.sparse.find(self.position)
692
693 if section is None:
694 return ""
695
696 size = min(size, section.offset + section.size - self.position)
697
698 if isinstance(section, _data):
699 realpos = section.realpos + self.position - section.offset
700 self.fileobj.seek(self.offset + realpos)
701 self.position += size
702 return self.fileobj.read(size)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000703 else:
Lars Gustäbelaedb92e2006-12-23 16:51:47 +0000704 self.position += size
705 return NUL * size
706#class _FileInFile
707
708
709class ExFileObject(object):
710 """File-like object for reading an archive member.
711 Is returned by TarFile.extractfile().
712 """
713 blocksize = 1024
714
715 def __init__(self, tarfile, tarinfo):
716 self.fileobj = _FileInFile(tarfile.fileobj,
717 tarinfo.offset_data,
718 tarinfo.size,
719 getattr(tarinfo, "sparse", None))
720 self.name = tarinfo.name
721 self.mode = "r"
722 self.closed = False
723 self.size = tarinfo.size
724
725 self.position = 0
726 self.buffer = ""
727
728 def read(self, size=None):
729 """Read at most size bytes from the file. If size is not
730 present or None, read all data until EOF is reached.
731 """
732 if self.closed:
733 raise ValueError("I/O operation on closed file")
734
735 buf = ""
736 if self.buffer:
737 if size is None:
738 buf = self.buffer
739 self.buffer = ""
740 else:
741 buf = self.buffer[:size]
742 self.buffer = self.buffer[size:]
743
744 if size is None:
745 buf += self.fileobj.read()
746 else:
747 buf += self.fileobj.read(size - len(buf))
748
749 self.position += len(buf)
750 return buf
751
752 def readline(self, size=-1):
753 """Read one entire line from the file. If size is present
754 and non-negative, return a string with at most that
755 size, which may be an incomplete line.
756 """
757 if self.closed:
758 raise ValueError("I/O operation on closed file")
759
760 if "\n" in self.buffer:
761 pos = self.buffer.find("\n") + 1
762 else:
763 buffers = [self.buffer]
764 while True:
765 buf = self.fileobj.read(self.blocksize)
766 buffers.append(buf)
767 if not buf or "\n" in buf:
768 self.buffer = "".join(buffers)
769 pos = self.buffer.find("\n") + 1
770 if pos == 0:
771 # no newline found.
772 pos = len(self.buffer)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000773 break
Lars Gustäbelaedb92e2006-12-23 16:51:47 +0000774
775 if size != -1:
776 pos = min(size, pos)
777
778 buf = self.buffer[:pos]
779 self.buffer = self.buffer[pos:]
780 self.position += len(buf)
781 return buf
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000782
783 def readlines(self):
Lars Gustäbelaedb92e2006-12-23 16:51:47 +0000784 """Return a list with all remaining lines.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000785 """
786 result = []
787 while True:
788 line = self.readline()
789 if not line: break
790 result.append(line)
791 return result
792
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000793 def tell(self):
794 """Return the current file position.
795 """
Lars Gustäbelaedb92e2006-12-23 16:51:47 +0000796 if self.closed:
797 raise ValueError("I/O operation on closed file")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000798
Lars Gustäbelaedb92e2006-12-23 16:51:47 +0000799 return self.position
800
801 def seek(self, pos, whence=os.SEEK_SET):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000802 """Seek to a position in the file.
803 """
Lars Gustäbelaedb92e2006-12-23 16:51:47 +0000804 if self.closed:
805 raise ValueError("I/O operation on closed file")
806
807 if whence == os.SEEK_SET:
808 self.position = min(max(pos, 0), self.size)
809 elif whence == os.SEEK_CUR:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000810 if pos < 0:
Lars Gustäbelaedb92e2006-12-23 16:51:47 +0000811 self.position = max(self.position + pos, 0)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000812 else:
Lars Gustäbelaedb92e2006-12-23 16:51:47 +0000813 self.position = min(self.position + pos, self.size)
814 elif whence == os.SEEK_END:
815 self.position = max(min(self.size + pos, self.size), 0)
816 else:
817 raise ValueError("Invalid argument")
818
819 self.buffer = ""
820 self.fileobj.seek(self.position)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000821
822 def close(self):
823 """Close the file object.
824 """
825 self.closed = True
Martin v. Löwisdf241532005-03-03 08:17:42 +0000826
827 def __iter__(self):
Lars Gustäbelaedb92e2006-12-23 16:51:47 +0000828 """Get an iterator over the file's lines.
Martin v. Löwisdf241532005-03-03 08:17:42 +0000829 """
Lars Gustäbelaedb92e2006-12-23 16:51:47 +0000830 while True:
831 line = self.readline()
832 if not line:
833 break
834 yield line
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000835#class ExFileObject
836
837#------------------
838# Exported Classes
839#------------------
840class TarInfo(object):
841 """Informational class which holds the details about an
842 archive member given by a tar header block.
843 TarInfo objects are returned by TarFile.getmember(),
844 TarFile.getmembers() and TarFile.gettarinfo() and are
845 usually created internally.
846 """
847
848 def __init__(self, name=""):
849 """Construct a TarInfo object. name is the optional name
850 of the member.
851 """
Georg Brandl38c6a222006-05-10 16:26:03 +0000852 self.name = name # member name (dirnames must end with '/')
853 self.mode = 0666 # file permissions
854 self.uid = 0 # user id
855 self.gid = 0 # group id
856 self.size = 0 # file size
857 self.mtime = 0 # modification time
858 self.chksum = 0 # header checksum
859 self.type = REGTYPE # member type
860 self.linkname = "" # link name
861 self.uname = "user" # user name
862 self.gname = "group" # group name
863 self.devmajor = 0 # device major number
864 self.devminor = 0 # device minor number
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000865
Georg Brandl38c6a222006-05-10 16:26:03 +0000866 self.offset = 0 # the tar header starts here
867 self.offset_data = 0 # the file's data starts here
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000868
869 def __repr__(self):
870 return "<%s %r at %#x>" % (self.__class__.__name__,self.name,id(self))
871
Guido van Rossum75b64e62005-01-16 00:16:11 +0000872 @classmethod
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000873 def frombuf(cls, buf):
874 """Construct a TarInfo object from a 512 byte string buffer.
875 """
Georg Brandl38c6a222006-05-10 16:26:03 +0000876 if len(buf) != BLOCKSIZE:
Georg Brandle4751e32006-05-18 06:11:19 +0000877 raise ValueError("truncated header")
Georg Brandl38c6a222006-05-10 16:26:03 +0000878 if buf.count(NUL) == BLOCKSIZE:
Georg Brandle4751e32006-05-18 06:11:19 +0000879 raise ValueError("empty header")
Georg Brandl38c6a222006-05-10 16:26:03 +0000880
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000881 tarinfo = cls()
Georg Brandl38c6a222006-05-10 16:26:03 +0000882 tarinfo.buf = buf
Lars Gustäbel08303db2008-02-11 18:36:07 +0000883 tarinfo.name = nts(buf[0:100])
Georg Brandl38c6a222006-05-10 16:26:03 +0000884 tarinfo.mode = nti(buf[100:108])
885 tarinfo.uid = nti(buf[108:116])
886 tarinfo.gid = nti(buf[116:124])
887 tarinfo.size = nti(buf[124:136])
888 tarinfo.mtime = nti(buf[136:148])
889 tarinfo.chksum = nti(buf[148:156])
890 tarinfo.type = buf[156:157]
Lars Gustäbel08303db2008-02-11 18:36:07 +0000891 tarinfo.linkname = nts(buf[157:257])
892 tarinfo.uname = nts(buf[265:297])
893 tarinfo.gname = nts(buf[297:329])
Georg Brandl38c6a222006-05-10 16:26:03 +0000894 tarinfo.devmajor = nti(buf[329:337])
895 tarinfo.devminor = nti(buf[337:345])
Lars Gustäbel08303db2008-02-11 18:36:07 +0000896 prefix = nts(buf[345:500])
Georg Brandl2527f7f2006-10-29 09:16:15 +0000897
898 if prefix and not tarinfo.issparse():
899 tarinfo.name = prefix + "/" + tarinfo.name
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000900
Georg Brandl38c6a222006-05-10 16:26:03 +0000901 if tarinfo.chksum not in calc_chksums(buf):
Georg Brandle4751e32006-05-18 06:11:19 +0000902 raise ValueError("invalid header")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000903 return tarinfo
904
Georg Brandl38c6a222006-05-10 16:26:03 +0000905 def tobuf(self, posix=False):
Georg Brandl2527f7f2006-10-29 09:16:15 +0000906 """Return a tar header as a string of 512 byte blocks.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000907 """
Georg Brandl2527f7f2006-10-29 09:16:15 +0000908 buf = ""
909 type = self.type
910 prefix = ""
911
912 if self.name.endswith("/"):
913 type = DIRTYPE
914
Georg Brandl25f58f62006-12-06 22:21:23 +0000915 if type in (GNUTYPE_LONGNAME, GNUTYPE_LONGLINK):
916 # Prevent "././@LongLink" from being normalized.
917 name = self.name
918 else:
919 name = normpath(self.name)
Georg Brandl2527f7f2006-10-29 09:16:15 +0000920
921 if type == DIRTYPE:
922 # directories should end with '/'
923 name += "/"
924
925 linkname = self.linkname
926 if linkname:
927 # if linkname is empty we end up with a '.'
928 linkname = normpath(linkname)
929
930 if posix:
931 if self.size > MAXSIZE_MEMBER:
932 raise ValueError("file is too large (>= 8 GB)")
933
934 if len(self.linkname) > LENGTH_LINK:
935 raise ValueError("linkname is too long (>%d)" % (LENGTH_LINK))
936
937 if len(name) > LENGTH_NAME:
938 prefix = name[:LENGTH_PREFIX + 1]
939 while prefix and prefix[-1] != "/":
940 prefix = prefix[:-1]
941
942 name = name[len(prefix):]
943 prefix = prefix[:-1]
944
945 if not prefix or len(name) > LENGTH_NAME:
946 raise ValueError("name is too long")
947
948 else:
949 if len(self.linkname) > LENGTH_LINK:
950 buf += self._create_gnulong(self.linkname, GNUTYPE_LONGLINK)
951
952 if len(name) > LENGTH_NAME:
953 buf += self._create_gnulong(name, GNUTYPE_LONGNAME)
954
Georg Brandl38c6a222006-05-10 16:26:03 +0000955 parts = [
Georg Brandl2527f7f2006-10-29 09:16:15 +0000956 stn(name, 100),
Georg Brandl38c6a222006-05-10 16:26:03 +0000957 itn(self.mode & 07777, 8, posix),
958 itn(self.uid, 8, posix),
959 itn(self.gid, 8, posix),
960 itn(self.size, 12, posix),
961 itn(self.mtime, 12, posix),
962 " ", # checksum field
Georg Brandl2527f7f2006-10-29 09:16:15 +0000963 type,
Georg Brandl38c6a222006-05-10 16:26:03 +0000964 stn(self.linkname, 100),
965 stn(MAGIC, 6),
966 stn(VERSION, 2),
967 stn(self.uname, 32),
968 stn(self.gname, 32),
969 itn(self.devmajor, 8, posix),
970 itn(self.devminor, 8, posix),
Georg Brandl2527f7f2006-10-29 09:16:15 +0000971 stn(prefix, 155)
Georg Brandl38c6a222006-05-10 16:26:03 +0000972 ]
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000973
Lars Gustäbel8ff1f6a2007-04-21 12:20:09 +0000974 buf += "".join(parts).ljust(BLOCKSIZE, NUL)
Georg Brandl25f58f62006-12-06 22:21:23 +0000975 chksum = calc_chksums(buf[-BLOCKSIZE:])[0]
Georg Brandl2527f7f2006-10-29 09:16:15 +0000976 buf = buf[:-364] + "%06o\0" % chksum + buf[-357:]
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000977 self.buf = buf
978 return buf
979
Georg Brandl2527f7f2006-10-29 09:16:15 +0000980 def _create_gnulong(self, name, type):
981 """Create a GNU longname/longlink header from name.
982 It consists of an extended tar header, with the length
983 of the longname as size, followed by data blocks,
984 which contain the longname as a null terminated string.
985 """
986 name += NUL
987
988 tarinfo = self.__class__()
989 tarinfo.name = "././@LongLink"
990 tarinfo.type = type
991 tarinfo.mode = 0
992 tarinfo.size = len(name)
993
994 # create extended header
995 buf = tarinfo.tobuf()
996 # create name blocks
997 buf += name
998 blocks, remainder = divmod(len(name), BLOCKSIZE)
999 if remainder > 0:
1000 buf += (BLOCKSIZE - remainder) * NUL
1001 return buf
1002
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001003 def isreg(self):
1004 return self.type in REGULAR_TYPES
1005 def isfile(self):
1006 return self.isreg()
1007 def isdir(self):
1008 return self.type == DIRTYPE
1009 def issym(self):
1010 return self.type == SYMTYPE
1011 def islnk(self):
1012 return self.type == LNKTYPE
1013 def ischr(self):
1014 return self.type == CHRTYPE
1015 def isblk(self):
1016 return self.type == BLKTYPE
1017 def isfifo(self):
1018 return self.type == FIFOTYPE
1019 def issparse(self):
1020 return self.type == GNUTYPE_SPARSE
1021 def isdev(self):
1022 return self.type in (CHRTYPE, BLKTYPE, FIFOTYPE)
1023# class TarInfo
1024
1025class TarFile(object):
1026 """The TarFile Class provides an interface to tar archives.
1027 """
1028
1029 debug = 0 # May be set from 0 (no msgs) to 3 (all msgs)
1030
1031 dereference = False # If true, add content of linked file to the
1032 # tar file, else the link.
1033
1034 ignore_zeros = False # If true, skips empty or invalid blocks and
1035 # continues processing.
1036
1037 errorlevel = 0 # If 0, fatal errors only appear in debug
1038 # messages (if debug >= 0). If > 0, errors
1039 # are passed to the caller as exceptions.
1040
Martin v. Löwis75b9da42004-08-18 13:57:44 +00001041 posix = False # If True, generates POSIX.1-1990-compliant
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001042 # archives (no GNU extensions!)
1043
1044 fileobject = ExFileObject
1045
1046 def __init__(self, name=None, mode="r", fileobj=None):
1047 """Open an (uncompressed) tar archive `name'. `mode' is either 'r' to
1048 read from an existing archive, 'a' to append data to an existing
1049 file or 'w' to create a new file overwriting an existing one. `mode'
1050 defaults to 'r'.
1051 If `fileobj' is given, it is used for reading or writing data. If it
1052 can be determined, `mode' is overridden by `fileobj's mode.
1053 `fileobj' is not closed, when TarFile is closed.
1054 """
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001055 if len(mode) > 1 or mode not in "raw":
Georg Brandle4751e32006-05-18 06:11:19 +00001056 raise ValueError("mode must be 'r', 'a' or 'w'")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001057 self._mode = mode
1058 self.mode = {"r": "rb", "a": "r+b", "w": "wb"}[mode]
1059
1060 if not fileobj:
Lars Gustäbela9bad982007-08-28 12:33:15 +00001061 fileobj = file(name, self.mode)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001062 self._extfileobj = False
1063 else:
Lars Gustäbela9bad982007-08-28 12:33:15 +00001064 if name is None and hasattr(fileobj, "name"):
1065 name = fileobj.name
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001066 if hasattr(fileobj, "mode"):
1067 self.mode = fileobj.mode
1068 self._extfileobj = True
Lars Gustäbela9bad982007-08-28 12:33:15 +00001069 self.name = os.path.abspath(name) if name else None
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001070 self.fileobj = fileobj
1071
1072 # Init datastructures
Georg Brandl38c6a222006-05-10 16:26:03 +00001073 self.closed = False
1074 self.members = [] # list of members as TarInfo objects
1075 self._loaded = False # flag if all members have been read
Lars Gustäbel7cc9c8b2007-12-01 21:06:06 +00001076 self.offset = self.fileobj.tell()
1077 # current position in the archive file
Georg Brandl38c6a222006-05-10 16:26:03 +00001078 self.inodes = {} # dictionary caching the inodes of
1079 # archive members already added
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001080
1081 if self._mode == "r":
1082 self.firstmember = None
1083 self.firstmember = self.next()
1084
1085 if self._mode == "a":
1086 # Move to the end of the archive,
1087 # before the first empty block.
1088 self.firstmember = None
1089 while True:
1090 try:
1091 tarinfo = self.next()
1092 except ReadError:
1093 self.fileobj.seek(0)
1094 break
1095 if tarinfo is None:
1096 self.fileobj.seek(- BLOCKSIZE, 1)
1097 break
1098
1099 if self._mode in "aw":
1100 self._loaded = True
1101
1102 #--------------------------------------------------------------------------
1103 # Below are the classmethods which act as alternate constructors to the
1104 # TarFile class. The open() method is the only one that is needed for
1105 # public use; it is the "super"-constructor and is able to select an
1106 # adequate "sub"-constructor for a particular compression using the mapping
1107 # from OPEN_METH.
1108 #
1109 # This concept allows one to subclass TarFile without losing the comfort of
1110 # the super-constructor. A sub-constructor is registered and made available
1111 # by adding it to the mapping in OPEN_METH.
1112
Guido van Rossum75b64e62005-01-16 00:16:11 +00001113 @classmethod
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001114 def open(cls, name=None, mode="r", fileobj=None, bufsize=20*512):
1115 """Open a tar archive for reading, writing or appending. Return
1116 an appropriate TarFile class.
1117
1118 mode:
Martin v. Löwis78be7df2005-03-05 12:47:42 +00001119 'r' or 'r:*' open for reading with transparent compression
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001120 'r:' open for reading exclusively uncompressed
1121 'r:gz' open for reading with gzip compression
1122 'r:bz2' open for reading with bzip2 compression
1123 'a' or 'a:' open for appending
1124 'w' or 'w:' open for writing without compression
1125 'w:gz' open for writing with gzip compression
1126 'w:bz2' open for writing with bzip2 compression
Martin v. Löwis78be7df2005-03-05 12:47:42 +00001127
1128 'r|*' open a stream of tar blocks with transparent compression
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001129 'r|' open an uncompressed stream of tar blocks for reading
1130 'r|gz' open a gzip compressed stream of tar blocks
1131 'r|bz2' open a bzip2 compressed stream of tar blocks
1132 'w|' open an uncompressed stream for writing
1133 'w|gz' open a gzip compressed stream for writing
1134 'w|bz2' open a bzip2 compressed stream for writing
1135 """
1136
1137 if not name and not fileobj:
Georg Brandle4751e32006-05-18 06:11:19 +00001138 raise ValueError("nothing to open")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001139
Martin v. Löwis78be7df2005-03-05 12:47:42 +00001140 if mode in ("r", "r:*"):
1141 # Find out which *open() is appropriate for opening the file.
1142 for comptype in cls.OPEN_METH:
1143 func = getattr(cls, cls.OPEN_METH[comptype])
Lars Gustäbelf9a2c632006-12-27 10:36:58 +00001144 if fileobj is not None:
1145 saved_pos = fileobj.tell()
Martin v. Löwis78be7df2005-03-05 12:47:42 +00001146 try:
1147 return func(name, "r", fileobj)
1148 except (ReadError, CompressionError):
Lars Gustäbelf9a2c632006-12-27 10:36:58 +00001149 if fileobj is not None:
1150 fileobj.seek(saved_pos)
Martin v. Löwis78be7df2005-03-05 12:47:42 +00001151 continue
Georg Brandle4751e32006-05-18 06:11:19 +00001152 raise ReadError("file could not be opened successfully")
Martin v. Löwis78be7df2005-03-05 12:47:42 +00001153
1154 elif ":" in mode:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001155 filemode, comptype = mode.split(":", 1)
1156 filemode = filemode or "r"
1157 comptype = comptype or "tar"
1158
1159 # Select the *open() function according to
1160 # given compression.
1161 if comptype in cls.OPEN_METH:
1162 func = getattr(cls, cls.OPEN_METH[comptype])
1163 else:
Georg Brandle4751e32006-05-18 06:11:19 +00001164 raise CompressionError("unknown compression type %r" % comptype)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001165 return func(name, filemode, fileobj)
1166
1167 elif "|" in mode:
1168 filemode, comptype = mode.split("|", 1)
1169 filemode = filemode or "r"
1170 comptype = comptype or "tar"
1171
1172 if filemode not in "rw":
Georg Brandle4751e32006-05-18 06:11:19 +00001173 raise ValueError("mode must be 'r' or 'w'")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001174
1175 t = cls(name, filemode,
1176 _Stream(name, filemode, comptype, fileobj, bufsize))
1177 t._extfileobj = False
1178 return t
1179
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001180 elif mode in "aw":
1181 return cls.taropen(name, mode, fileobj)
1182
Georg Brandle4751e32006-05-18 06:11:19 +00001183 raise ValueError("undiscernible mode")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001184
Guido van Rossum75b64e62005-01-16 00:16:11 +00001185 @classmethod
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001186 def taropen(cls, name, mode="r", fileobj=None):
1187 """Open uncompressed tar archive name for reading or writing.
1188 """
1189 if len(mode) > 1 or mode not in "raw":
Georg Brandle4751e32006-05-18 06:11:19 +00001190 raise ValueError("mode must be 'r', 'a' or 'w'")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001191 return cls(name, mode, fileobj)
1192
Guido van Rossum75b64e62005-01-16 00:16:11 +00001193 @classmethod
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001194 def gzopen(cls, name, mode="r", fileobj=None, compresslevel=9):
1195 """Open gzip compressed tar archive name for reading or writing.
1196 Appending is not allowed.
1197 """
1198 if len(mode) > 1 or mode not in "rw":
Georg Brandle4751e32006-05-18 06:11:19 +00001199 raise ValueError("mode must be 'r' or 'w'")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001200
1201 try:
1202 import gzip
Neal Norwitz4ec68242003-04-11 03:05:56 +00001203 gzip.GzipFile
1204 except (ImportError, AttributeError):
Georg Brandle4751e32006-05-18 06:11:19 +00001205 raise CompressionError("gzip module is not available")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001206
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001207 if fileobj is None:
1208 fileobj = file(name, mode + "b")
1209
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001210 try:
Lars Gustäbel12e087a2006-12-23 18:13:57 +00001211 t = cls.taropen(name, mode,
1212 gzip.GzipFile(name, mode, compresslevel, fileobj))
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001213 except IOError:
Georg Brandle4751e32006-05-18 06:11:19 +00001214 raise ReadError("not a gzip file")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001215 t._extfileobj = False
1216 return t
1217
Guido van Rossum75b64e62005-01-16 00:16:11 +00001218 @classmethod
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001219 def bz2open(cls, name, mode="r", fileobj=None, compresslevel=9):
1220 """Open bzip2 compressed tar archive name for reading or writing.
1221 Appending is not allowed.
1222 """
1223 if len(mode) > 1 or mode not in "rw":
Georg Brandle4751e32006-05-18 06:11:19 +00001224 raise ValueError("mode must be 'r' or 'w'.")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001225
1226 try:
1227 import bz2
1228 except ImportError:
Georg Brandle4751e32006-05-18 06:11:19 +00001229 raise CompressionError("bz2 module is not available")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001230
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001231 if fileobj is not None:
Georg Brandl49c8f4c2006-05-15 19:30:35 +00001232 fileobj = _BZ2Proxy(fileobj, mode)
1233 else:
1234 fileobj = bz2.BZ2File(name, mode, compresslevel=compresslevel)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001235
1236 try:
Lars Gustäbel12e087a2006-12-23 18:13:57 +00001237 t = cls.taropen(name, mode, fileobj)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001238 except IOError:
Georg Brandle4751e32006-05-18 06:11:19 +00001239 raise ReadError("not a bzip2 file")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001240 t._extfileobj = False
1241 return t
1242
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001243 # All *open() methods are registered here.
1244 OPEN_METH = {
1245 "tar": "taropen", # uncompressed tar
1246 "gz": "gzopen", # gzip compressed tar
1247 "bz2": "bz2open" # bzip2 compressed tar
1248 }
1249
1250 #--------------------------------------------------------------------------
1251 # The public methods which TarFile provides:
1252
1253 def close(self):
1254 """Close the TarFile. In write-mode, two finishing zero blocks are
1255 appended to the archive.
1256 """
1257 if self.closed:
1258 return
1259
1260 if self._mode in "aw":
1261 self.fileobj.write(NUL * (BLOCKSIZE * 2))
1262 self.offset += (BLOCKSIZE * 2)
1263 # fill up the end with zero-blocks
1264 # (like option -b20 for tar does)
1265 blocks, remainder = divmod(self.offset, RECORDSIZE)
1266 if remainder > 0:
1267 self.fileobj.write(NUL * (RECORDSIZE - remainder))
1268
1269 if not self._extfileobj:
1270 self.fileobj.close()
1271 self.closed = True
1272
1273 def getmember(self, name):
1274 """Return a TarInfo object for member `name'. If `name' can not be
1275 found in the archive, KeyError is raised. If a member occurs more
1276 than once in the archive, its last occurence is assumed to be the
1277 most up-to-date version.
1278 """
Martin v. Löwisf3c56112004-09-18 09:08:52 +00001279 tarinfo = self._getmember(name)
1280 if tarinfo is None:
Georg Brandle4751e32006-05-18 06:11:19 +00001281 raise KeyError("filename %r not found" % name)
Martin v. Löwisf3c56112004-09-18 09:08:52 +00001282 return tarinfo
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001283
1284 def getmembers(self):
1285 """Return the members of the archive as a list of TarInfo objects. The
1286 list has the same order as the members in the archive.
1287 """
1288 self._check()
1289 if not self._loaded: # if we want to obtain a list of
1290 self._load() # all members, we first have to
1291 # scan the whole archive.
1292 return self.members
1293
1294 def getnames(self):
1295 """Return the members of the archive as a list of their names. It has
1296 the same order as the list returned by getmembers().
1297 """
Martin v. Löwisf3c56112004-09-18 09:08:52 +00001298 return [tarinfo.name for tarinfo in self.getmembers()]
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001299
1300 def gettarinfo(self, name=None, arcname=None, fileobj=None):
1301 """Create a TarInfo object for either the file `name' or the file
1302 object `fileobj' (using os.fstat on its file descriptor). You can
1303 modify some of the TarInfo's attributes before you add it using
1304 addfile(). If given, `arcname' specifies an alternative name for the
1305 file in the archive.
1306 """
1307 self._check("aw")
1308
1309 # When fileobj is given, replace name by
1310 # fileobj's real name.
1311 if fileobj is not None:
1312 name = fileobj.name
1313
1314 # Building the name of the member in the archive.
1315 # Backward slashes are converted to forward slashes,
1316 # Absolute paths are turned to relative paths.
1317 if arcname is None:
1318 arcname = name
1319 arcname = normpath(arcname)
1320 drv, arcname = os.path.splitdrive(arcname)
1321 while arcname[0:1] == "/":
1322 arcname = arcname[1:]
1323
1324 # Now, fill the TarInfo object with
1325 # information specific for the file.
1326 tarinfo = TarInfo()
1327
1328 # Use os.stat or os.lstat, depending on platform
1329 # and if symlinks shall be resolved.
1330 if fileobj is None:
1331 if hasattr(os, "lstat") and not self.dereference:
1332 statres = os.lstat(name)
1333 else:
1334 statres = os.stat(name)
1335 else:
1336 statres = os.fstat(fileobj.fileno())
1337 linkname = ""
1338
1339 stmd = statres.st_mode
1340 if stat.S_ISREG(stmd):
1341 inode = (statres.st_ino, statres.st_dev)
Neal Norwitzb0e32e22005-10-20 04:50:13 +00001342 if not self.dereference and \
1343 statres.st_nlink > 1 and inode in self.inodes:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001344 # Is it a hardlink to an already
1345 # archived file?
1346 type = LNKTYPE
1347 linkname = self.inodes[inode]
1348 else:
1349 # The inode is added only if its valid.
1350 # For win32 it is always 0.
1351 type = REGTYPE
1352 if inode[0]:
1353 self.inodes[inode] = arcname
1354 elif stat.S_ISDIR(stmd):
1355 type = DIRTYPE
1356 if arcname[-1:] != "/":
1357 arcname += "/"
1358 elif stat.S_ISFIFO(stmd):
1359 type = FIFOTYPE
1360 elif stat.S_ISLNK(stmd):
1361 type = SYMTYPE
1362 linkname = os.readlink(name)
1363 elif stat.S_ISCHR(stmd):
1364 type = CHRTYPE
1365 elif stat.S_ISBLK(stmd):
1366 type = BLKTYPE
1367 else:
1368 return None
1369
1370 # Fill the TarInfo object with all
1371 # information we can get.
Martin v. Löwis5dbdc592005-08-27 10:07:56 +00001372 tarinfo.name = arcname
1373 tarinfo.mode = stmd
1374 tarinfo.uid = statres.st_uid
1375 tarinfo.gid = statres.st_gid
1376 if stat.S_ISREG(stmd):
Martin v. Löwis61d77e02004-08-20 06:35:46 +00001377 tarinfo.size = statres.st_size
Martin v. Löwis5dbdc592005-08-27 10:07:56 +00001378 else:
1379 tarinfo.size = 0L
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001380 tarinfo.mtime = statres.st_mtime
Martin v. Löwis5dbdc592005-08-27 10:07:56 +00001381 tarinfo.type = type
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001382 tarinfo.linkname = linkname
1383 if pwd:
1384 try:
1385 tarinfo.uname = pwd.getpwuid(tarinfo.uid)[0]
1386 except KeyError:
1387 pass
1388 if grp:
1389 try:
1390 tarinfo.gname = grp.getgrgid(tarinfo.gid)[0]
1391 except KeyError:
1392 pass
1393
1394 if type in (CHRTYPE, BLKTYPE):
1395 if hasattr(os, "major") and hasattr(os, "minor"):
1396 tarinfo.devmajor = os.major(statres.st_rdev)
1397 tarinfo.devminor = os.minor(statres.st_rdev)
1398 return tarinfo
1399
1400 def list(self, verbose=True):
1401 """Print a table of contents to sys.stdout. If `verbose' is False, only
1402 the names of the members are printed. If it is True, an `ls -l'-like
1403 output is produced.
1404 """
1405 self._check()
1406
1407 for tarinfo in self:
1408 if verbose:
1409 print filemode(tarinfo.mode),
1410 print "%s/%s" % (tarinfo.uname or tarinfo.uid,
1411 tarinfo.gname or tarinfo.gid),
1412 if tarinfo.ischr() or tarinfo.isblk():
1413 print "%10s" % ("%d,%d" \
1414 % (tarinfo.devmajor, tarinfo.devminor)),
1415 else:
1416 print "%10d" % tarinfo.size,
1417 print "%d-%02d-%02d %02d:%02d:%02d" \
1418 % time.localtime(tarinfo.mtime)[:6],
1419
1420 print tarinfo.name,
1421
1422 if verbose:
1423 if tarinfo.issym():
1424 print "->", tarinfo.linkname,
1425 if tarinfo.islnk():
1426 print "link to", tarinfo.linkname,
1427 print
1428
1429 def add(self, name, arcname=None, recursive=True):
1430 """Add the file `name' to the archive. `name' may be any type of file
1431 (directory, fifo, symbolic link, etc.). If given, `arcname'
1432 specifies an alternative name for the file in the archive.
1433 Directories are added recursively by default. This can be avoided by
1434 setting `recursive' to False.
1435 """
1436 self._check("aw")
1437
1438 if arcname is None:
1439 arcname = name
1440
1441 # Skip if somebody tries to archive the archive...
Lars Gustäbel12e087a2006-12-23 18:13:57 +00001442 if self.name is not None and os.path.abspath(name) == self.name:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001443 self._dbg(2, "tarfile: Skipped %r" % name)
1444 return
1445
1446 # Special case: The user wants to add the current
1447 # working directory.
1448 if name == ".":
1449 if recursive:
1450 if arcname == ".":
1451 arcname = ""
1452 for f in os.listdir("."):
1453 self.add(f, os.path.join(arcname, f))
1454 return
1455
1456 self._dbg(1, name)
1457
1458 # Create a TarInfo object from the file.
1459 tarinfo = self.gettarinfo(name, arcname)
1460
1461 if tarinfo is None:
1462 self._dbg(1, "tarfile: Unsupported type %r" % name)
1463 return
1464
1465 # Append the tar header and data to the archive.
1466 if tarinfo.isreg():
1467 f = file(name, "rb")
1468 self.addfile(tarinfo, f)
1469 f.close()
1470
Martin v. Löwis5dbdc592005-08-27 10:07:56 +00001471 elif tarinfo.isdir():
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001472 self.addfile(tarinfo)
1473 if recursive:
1474 for f in os.listdir(name):
1475 self.add(os.path.join(name, f), os.path.join(arcname, f))
1476
Martin v. Löwis5dbdc592005-08-27 10:07:56 +00001477 else:
1478 self.addfile(tarinfo)
1479
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001480 def addfile(self, tarinfo, fileobj=None):
1481 """Add the TarInfo object `tarinfo' to the archive. If `fileobj' is
1482 given, tarinfo.size bytes are read from it and added to the archive.
1483 You can create TarInfo objects using gettarinfo().
1484 On Windows platforms, `fileobj' should always be opened with mode
1485 'rb' to avoid irritation about the file size.
1486 """
1487 self._check("aw")
1488
Georg Brandl2527f7f2006-10-29 09:16:15 +00001489 tarinfo = copy.copy(tarinfo)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001490
Georg Brandl2527f7f2006-10-29 09:16:15 +00001491 buf = tarinfo.tobuf(self.posix)
1492 self.fileobj.write(buf)
1493 self.offset += len(buf)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001494
1495 # If there's data to follow, append it.
1496 if fileobj is not None:
1497 copyfileobj(fileobj, self.fileobj, tarinfo.size)
1498 blocks, remainder = divmod(tarinfo.size, BLOCKSIZE)
1499 if remainder > 0:
1500 self.fileobj.write(NUL * (BLOCKSIZE - remainder))
1501 blocks += 1
1502 self.offset += blocks * BLOCKSIZE
1503
Martin v. Löwisf3c56112004-09-18 09:08:52 +00001504 self.members.append(tarinfo)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001505
Martin v. Löwis00a73e72005-03-04 19:40:34 +00001506 def extractall(self, path=".", members=None):
1507 """Extract all members from the archive to the current working
1508 directory and set owner, modification time and permissions on
1509 directories afterwards. `path' specifies a different directory
1510 to extract to. `members' is optional and must be a subset of the
1511 list returned by getmembers().
1512 """
1513 directories = []
1514
1515 if members is None:
1516 members = self
1517
1518 for tarinfo in members:
1519 if tarinfo.isdir():
Lars Gustäbel42993fe2008-02-05 12:00:20 +00001520 # Extract directories with a safe mode.
Martin v. Löwis00a73e72005-03-04 19:40:34 +00001521 directories.append(tarinfo)
Lars Gustäbel42993fe2008-02-05 12:00:20 +00001522 tarinfo = copy.copy(tarinfo)
1523 tarinfo.mode = 0700
1524 self.extract(tarinfo, path)
Martin v. Löwis00a73e72005-03-04 19:40:34 +00001525
1526 # Reverse sort directories.
1527 directories.sort(lambda a, b: cmp(a.name, b.name))
1528 directories.reverse()
1529
1530 # Set correct owner, mtime and filemode on directories.
1531 for tarinfo in directories:
Lars Gustäbele5f9e582008-01-04 14:44:23 +00001532 dirpath = os.path.join(path, tarinfo.name)
Martin v. Löwis00a73e72005-03-04 19:40:34 +00001533 try:
Lars Gustäbele5f9e582008-01-04 14:44:23 +00001534 self.chown(tarinfo, dirpath)
1535 self.utime(tarinfo, dirpath)
1536 self.chmod(tarinfo, dirpath)
Martin v. Löwis00a73e72005-03-04 19:40:34 +00001537 except ExtractError, e:
1538 if self.errorlevel > 1:
1539 raise
1540 else:
1541 self._dbg(1, "tarfile: %s" % e)
1542
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001543 def extract(self, member, path=""):
1544 """Extract a member from the archive to the current working directory,
1545 using its full name. Its file information is extracted as accurately
1546 as possible. `member' may be a filename or a TarInfo object. You can
1547 specify a different directory using `path'.
1548 """
1549 self._check("r")
1550
1551 if isinstance(member, TarInfo):
1552 tarinfo = member
1553 else:
1554 tarinfo = self.getmember(member)
1555
Neal Norwitza4f651a2004-07-20 22:07:44 +00001556 # Prepare the link target for makelink().
1557 if tarinfo.islnk():
1558 tarinfo._link_target = os.path.join(path, tarinfo.linkname)
1559
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001560 try:
1561 self._extract_member(tarinfo, os.path.join(path, tarinfo.name))
1562 except EnvironmentError, e:
1563 if self.errorlevel > 0:
1564 raise
1565 else:
1566 if e.filename is None:
1567 self._dbg(1, "tarfile: %s" % e.strerror)
1568 else:
1569 self._dbg(1, "tarfile: %s %r" % (e.strerror, e.filename))
1570 except ExtractError, e:
1571 if self.errorlevel > 1:
1572 raise
1573 else:
1574 self._dbg(1, "tarfile: %s" % e)
1575
1576 def extractfile(self, member):
1577 """Extract a member from the archive as a file object. `member' may be
1578 a filename or a TarInfo object. If `member' is a regular file, a
1579 file-like object is returned. If `member' is a link, a file-like
1580 object is constructed from the link's target. If `member' is none of
1581 the above, None is returned.
1582 The file-like object is read-only and provides the following
1583 methods: read(), readline(), readlines(), seek() and tell()
1584 """
1585 self._check("r")
1586
1587 if isinstance(member, TarInfo):
1588 tarinfo = member
1589 else:
1590 tarinfo = self.getmember(member)
1591
1592 if tarinfo.isreg():
1593 return self.fileobject(self, tarinfo)
1594
1595 elif tarinfo.type not in SUPPORTED_TYPES:
1596 # If a member's type is unknown, it is treated as a
1597 # regular file.
1598 return self.fileobject(self, tarinfo)
1599
1600 elif tarinfo.islnk() or tarinfo.issym():
1601 if isinstance(self.fileobj, _Stream):
1602 # A small but ugly workaround for the case that someone tries
1603 # to extract a (sym)link as a file-object from a non-seekable
1604 # stream of tar blocks.
Georg Brandle4751e32006-05-18 06:11:19 +00001605 raise StreamError("cannot extract (sym)link as file object")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001606 else:
Georg Brandl7eb4b7d2005-07-22 21:49:32 +00001607 # A (sym)link's file object is its target's file object.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001608 return self.extractfile(self._getmember(tarinfo.linkname,
1609 tarinfo))
1610 else:
1611 # If there's no data associated with the member (directory, chrdev,
1612 # blkdev, etc.), return None instead of a file object.
1613 return None
1614
1615 def _extract_member(self, tarinfo, targetpath):
1616 """Extract the TarInfo object tarinfo to a physical
1617 file called targetpath.
1618 """
1619 # Fetch the TarInfo object for the given name
1620 # and build the destination pathname, replacing
1621 # forward slashes to platform specific separators.
1622 if targetpath[-1:] == "/":
1623 targetpath = targetpath[:-1]
1624 targetpath = os.path.normpath(targetpath)
1625
1626 # Create all upper directories.
1627 upperdirs = os.path.dirname(targetpath)
1628 if upperdirs and not os.path.exists(upperdirs):
Lars Gustäbel42993fe2008-02-05 12:00:20 +00001629 # Create directories that are not part of the archive with
1630 # default permissions.
1631 os.makedirs(upperdirs)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001632
1633 if tarinfo.islnk() or tarinfo.issym():
1634 self._dbg(1, "%s -> %s" % (tarinfo.name, tarinfo.linkname))
1635 else:
1636 self._dbg(1, tarinfo.name)
1637
1638 if tarinfo.isreg():
1639 self.makefile(tarinfo, targetpath)
1640 elif tarinfo.isdir():
1641 self.makedir(tarinfo, targetpath)
1642 elif tarinfo.isfifo():
1643 self.makefifo(tarinfo, targetpath)
1644 elif tarinfo.ischr() or tarinfo.isblk():
1645 self.makedev(tarinfo, targetpath)
1646 elif tarinfo.islnk() or tarinfo.issym():
1647 self.makelink(tarinfo, targetpath)
1648 elif tarinfo.type not in SUPPORTED_TYPES:
1649 self.makeunknown(tarinfo, targetpath)
1650 else:
1651 self.makefile(tarinfo, targetpath)
1652
1653 self.chown(tarinfo, targetpath)
1654 if not tarinfo.issym():
1655 self.chmod(tarinfo, targetpath)
1656 self.utime(tarinfo, targetpath)
1657
1658 #--------------------------------------------------------------------------
1659 # Below are the different file methods. They are called via
1660 # _extract_member() when extract() is called. They can be replaced in a
1661 # subclass to implement other functionality.
1662
1663 def makedir(self, tarinfo, targetpath):
1664 """Make a directory called targetpath.
1665 """
1666 try:
Lars Gustäbel42993fe2008-02-05 12:00:20 +00001667 # Use a safe mode for the directory, the real mode is set
1668 # later in _extract_member().
1669 os.mkdir(targetpath, 0700)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001670 except EnvironmentError, e:
1671 if e.errno != errno.EEXIST:
1672 raise
1673
1674 def makefile(self, tarinfo, targetpath):
1675 """Make a file called targetpath.
1676 """
1677 source = self.extractfile(tarinfo)
1678 target = file(targetpath, "wb")
1679 copyfileobj(source, target)
1680 source.close()
1681 target.close()
1682
1683 def makeunknown(self, tarinfo, targetpath):
1684 """Make a file from a TarInfo object with an unknown type
1685 at targetpath.
1686 """
1687 self.makefile(tarinfo, targetpath)
1688 self._dbg(1, "tarfile: Unknown file type %r, " \
1689 "extracted as regular file." % tarinfo.type)
1690
1691 def makefifo(self, tarinfo, targetpath):
1692 """Make a fifo called targetpath.
1693 """
1694 if hasattr(os, "mkfifo"):
1695 os.mkfifo(targetpath)
1696 else:
Georg Brandle4751e32006-05-18 06:11:19 +00001697 raise ExtractError("fifo not supported by system")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001698
1699 def makedev(self, tarinfo, targetpath):
1700 """Make a character or block device called targetpath.
1701 """
1702 if not hasattr(os, "mknod") or not hasattr(os, "makedev"):
Georg Brandle4751e32006-05-18 06:11:19 +00001703 raise ExtractError("special devices not supported by system")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001704
1705 mode = tarinfo.mode
1706 if tarinfo.isblk():
1707 mode |= stat.S_IFBLK
1708 else:
1709 mode |= stat.S_IFCHR
1710
1711 os.mknod(targetpath, mode,
1712 os.makedev(tarinfo.devmajor, tarinfo.devminor))
1713
1714 def makelink(self, tarinfo, targetpath):
1715 """Make a (symbolic) link called targetpath. If it cannot be created
1716 (platform limitation), we try to make a copy of the referenced file
1717 instead of a link.
1718 """
1719 linkpath = tarinfo.linkname
1720 try:
1721 if tarinfo.issym():
1722 os.symlink(linkpath, targetpath)
1723 else:
Neal Norwitza4f651a2004-07-20 22:07:44 +00001724 # See extract().
1725 os.link(tarinfo._link_target, targetpath)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001726 except AttributeError:
1727 if tarinfo.issym():
1728 linkpath = os.path.join(os.path.dirname(tarinfo.name),
1729 linkpath)
1730 linkpath = normpath(linkpath)
1731
1732 try:
1733 self._extract_member(self.getmember(linkpath), targetpath)
1734 except (EnvironmentError, KeyError), e:
1735 linkpath = os.path.normpath(linkpath)
1736 try:
1737 shutil.copy2(linkpath, targetpath)
1738 except EnvironmentError, e:
Georg Brandle4751e32006-05-18 06:11:19 +00001739 raise IOError("link could not be created")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001740
1741 def chown(self, tarinfo, targetpath):
1742 """Set owner of targetpath according to tarinfo.
1743 """
1744 if pwd and hasattr(os, "geteuid") and os.geteuid() == 0:
1745 # We have to be root to do so.
1746 try:
1747 g = grp.getgrnam(tarinfo.gname)[2]
1748 except KeyError:
1749 try:
1750 g = grp.getgrgid(tarinfo.gid)[2]
1751 except KeyError:
1752 g = os.getgid()
1753 try:
1754 u = pwd.getpwnam(tarinfo.uname)[2]
1755 except KeyError:
1756 try:
1757 u = pwd.getpwuid(tarinfo.uid)[2]
1758 except KeyError:
1759 u = os.getuid()
1760 try:
1761 if tarinfo.issym() and hasattr(os, "lchown"):
1762 os.lchown(targetpath, u, g)
1763 else:
Andrew MacIntyre7970d202003-02-19 12:51:34 +00001764 if sys.platform != "os2emx":
1765 os.chown(targetpath, u, g)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001766 except EnvironmentError, e:
Georg Brandle4751e32006-05-18 06:11:19 +00001767 raise ExtractError("could not change owner")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001768
1769 def chmod(self, tarinfo, targetpath):
1770 """Set file permissions of targetpath according to tarinfo.
1771 """
Jack Jansen834eff62003-03-07 12:47:06 +00001772 if hasattr(os, 'chmod'):
1773 try:
1774 os.chmod(targetpath, tarinfo.mode)
1775 except EnvironmentError, e:
Georg Brandle4751e32006-05-18 06:11:19 +00001776 raise ExtractError("could not change mode")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001777
1778 def utime(self, tarinfo, targetpath):
1779 """Set modification time of targetpath according to tarinfo.
1780 """
Jack Jansen834eff62003-03-07 12:47:06 +00001781 if not hasattr(os, 'utime'):
Tim Petersf9347782003-03-07 15:36:41 +00001782 return
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001783 if sys.platform == "win32" and tarinfo.isdir():
1784 # According to msdn.microsoft.com, it is an error (EACCES)
1785 # to use utime() on directories.
1786 return
1787 try:
1788 os.utime(targetpath, (tarinfo.mtime, tarinfo.mtime))
1789 except EnvironmentError, e:
Georg Brandle4751e32006-05-18 06:11:19 +00001790 raise ExtractError("could not change modification time")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001791
1792 #--------------------------------------------------------------------------
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001793 def next(self):
1794 """Return the next member of the archive as a TarInfo object, when
1795 TarFile is opened for reading. Return None if there is no more
1796 available.
1797 """
1798 self._check("ra")
1799 if self.firstmember is not None:
1800 m = self.firstmember
1801 self.firstmember = None
1802 return m
1803
1804 # Read the next block.
Andrew M. Kuchling864bba12004-07-10 22:02:11 +00001805 self.fileobj.seek(self.offset)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001806 while True:
1807 buf = self.fileobj.read(BLOCKSIZE)
1808 if not buf:
1809 return None
Georg Brandl38c6a222006-05-10 16:26:03 +00001810
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001811 try:
1812 tarinfo = TarInfo.frombuf(buf)
Tim Peters8a299d22006-05-19 19:16:34 +00001813
Georg Brandl38c6a222006-05-10 16:26:03 +00001814 # Set the TarInfo object's offset to the current position of the
1815 # TarFile and set self.offset to the position where the data blocks
1816 # should begin.
1817 tarinfo.offset = self.offset
1818 self.offset += BLOCKSIZE
1819
1820 tarinfo = self.proc_member(tarinfo)
1821
1822 except ValueError, e:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001823 if self.ignore_zeros:
Georg Brandle4751e32006-05-18 06:11:19 +00001824 self._dbg(2, "0x%X: empty or invalid block: %s" %
1825 (self.offset, e))
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001826 self.offset += BLOCKSIZE
1827 continue
1828 else:
Andrew M. Kuchling864bba12004-07-10 22:02:11 +00001829 if self.offset == 0:
Georg Brandle4751e32006-05-18 06:11:19 +00001830 raise ReadError("empty, unreadable or compressed "
1831 "file: %s" % e)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001832 return None
1833 break
1834
Georg Brandl38c6a222006-05-10 16:26:03 +00001835 # Some old tar programs represent a directory as a regular
1836 # file with a trailing slash.
1837 if tarinfo.isreg() and tarinfo.name.endswith("/"):
1838 tarinfo.type = DIRTYPE
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001839
Georg Brandl38c6a222006-05-10 16:26:03 +00001840 # Directory names should have a '/' at the end.
Lars Gustäbeld2201442007-04-20 14:49:02 +00001841 if tarinfo.isdir() and not tarinfo.name.endswith("/"):
Georg Brandl38c6a222006-05-10 16:26:03 +00001842 tarinfo.name += "/"
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001843
Martin v. Löwisf3c56112004-09-18 09:08:52 +00001844 self.members.append(tarinfo)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001845 return tarinfo
1846
1847 #--------------------------------------------------------------------------
Georg Brandl38c6a222006-05-10 16:26:03 +00001848 # The following are methods that are called depending on the type of a
1849 # member. The entry point is proc_member() which is called with a TarInfo
1850 # object created from the header block from the current offset. The
1851 # proc_member() method can be overridden in a subclass to add custom
1852 # proc_*() methods. A proc_*() method MUST implement the following
1853 # operations:
1854 # 1. Set tarinfo.offset_data to the position where the data blocks begin,
1855 # if there is data that follows.
1856 # 2. Set self.offset to the position where the next member's header will
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001857 # begin.
Georg Brandl38c6a222006-05-10 16:26:03 +00001858 # 3. Return tarinfo or another valid TarInfo object.
1859 def proc_member(self, tarinfo):
1860 """Choose the right processing method for tarinfo depending
1861 on its type and call it.
1862 """
1863 if tarinfo.type in (GNUTYPE_LONGNAME, GNUTYPE_LONGLINK):
1864 return self.proc_gnulong(tarinfo)
1865 elif tarinfo.type == GNUTYPE_SPARSE:
1866 return self.proc_sparse(tarinfo)
1867 else:
1868 return self.proc_builtin(tarinfo)
1869
1870 def proc_builtin(self, tarinfo):
1871 """Process a builtin type member or an unknown member
1872 which will be treated as a regular file.
1873 """
1874 tarinfo.offset_data = self.offset
1875 if tarinfo.isreg() or tarinfo.type not in SUPPORTED_TYPES:
1876 # Skip the following data blocks.
1877 self.offset += self._block(tarinfo.size)
1878 return tarinfo
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001879
1880 def proc_gnulong(self, tarinfo):
Georg Brandl38c6a222006-05-10 16:26:03 +00001881 """Process the blocks that hold a GNU longname
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001882 or longlink member.
1883 """
1884 buf = ""
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001885 count = tarinfo.size
1886 while count > 0:
1887 block = self.fileobj.read(BLOCKSIZE)
1888 buf += block
1889 self.offset += BLOCKSIZE
1890 count -= BLOCKSIZE
1891
Georg Brandl38c6a222006-05-10 16:26:03 +00001892 # Fetch the next header and process it.
1893 b = self.fileobj.read(BLOCKSIZE)
1894 t = TarInfo.frombuf(b)
1895 t.offset = self.offset
1896 self.offset += BLOCKSIZE
1897 next = self.proc_member(t)
Andrew M. Kuchling864bba12004-07-10 22:02:11 +00001898
Georg Brandl38c6a222006-05-10 16:26:03 +00001899 # Patch the TarInfo object from the next header with
1900 # the longname information.
Andrew M. Kuchling864bba12004-07-10 22:02:11 +00001901 next.offset = tarinfo.offset
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001902 if tarinfo.type == GNUTYPE_LONGNAME:
Lars Gustäbel08303db2008-02-11 18:36:07 +00001903 next.name = nts(buf)
Andrew M. Kuchling864bba12004-07-10 22:02:11 +00001904 elif tarinfo.type == GNUTYPE_LONGLINK:
Lars Gustäbel08303db2008-02-11 18:36:07 +00001905 next.linkname = nts(buf)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001906
Andrew M. Kuchling864bba12004-07-10 22:02:11 +00001907 return next
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001908
1909 def proc_sparse(self, tarinfo):
Georg Brandl38c6a222006-05-10 16:26:03 +00001910 """Process a GNU sparse header plus extra headers.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001911 """
Georg Brandl38c6a222006-05-10 16:26:03 +00001912 buf = tarinfo.buf
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001913 sp = _ringbuffer()
1914 pos = 386
1915 lastpos = 0L
1916 realpos = 0L
1917 # There are 4 possible sparse structs in the
1918 # first header.
1919 for i in xrange(4):
1920 try:
Georg Brandl38c6a222006-05-10 16:26:03 +00001921 offset = nti(buf[pos:pos + 12])
1922 numbytes = nti(buf[pos + 12:pos + 24])
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001923 except ValueError:
1924 break
1925 if offset > lastpos:
1926 sp.append(_hole(lastpos, offset - lastpos))
1927 sp.append(_data(offset, numbytes, realpos))
1928 realpos += numbytes
1929 lastpos = offset + numbytes
1930 pos += 24
1931
1932 isextended = ord(buf[482])
Georg Brandl38c6a222006-05-10 16:26:03 +00001933 origsize = nti(buf[483:495])
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001934
1935 # If the isextended flag is given,
1936 # there are extra headers to process.
1937 while isextended == 1:
1938 buf = self.fileobj.read(BLOCKSIZE)
1939 self.offset += BLOCKSIZE
1940 pos = 0
1941 for i in xrange(21):
1942 try:
Georg Brandl38c6a222006-05-10 16:26:03 +00001943 offset = nti(buf[pos:pos + 12])
1944 numbytes = nti(buf[pos + 12:pos + 24])
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001945 except ValueError:
1946 break
1947 if offset > lastpos:
1948 sp.append(_hole(lastpos, offset - lastpos))
1949 sp.append(_data(offset, numbytes, realpos))
1950 realpos += numbytes
1951 lastpos = offset + numbytes
1952 pos += 24
1953 isextended = ord(buf[504])
1954
1955 if lastpos < origsize:
1956 sp.append(_hole(lastpos, origsize - lastpos))
1957
1958 tarinfo.sparse = sp
1959
1960 tarinfo.offset_data = self.offset
1961 self.offset += self._block(tarinfo.size)
1962 tarinfo.size = origsize
Andrew M. Kuchling864bba12004-07-10 22:02:11 +00001963
Georg Brandl38c6a222006-05-10 16:26:03 +00001964 return tarinfo
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001965
1966 #--------------------------------------------------------------------------
1967 # Little helper methods:
1968
1969 def _block(self, count):
1970 """Round up a byte count by BLOCKSIZE and return it,
1971 e.g. _block(834) => 1024.
1972 """
1973 blocks, remainder = divmod(count, BLOCKSIZE)
1974 if remainder:
1975 blocks += 1
1976 return blocks * BLOCKSIZE
1977
1978 def _getmember(self, name, tarinfo=None):
1979 """Find an archive member by name from bottom to top.
1980 If tarinfo is given, it is used as the starting point.
1981 """
Martin v. Löwisf3c56112004-09-18 09:08:52 +00001982 # Ensure that all members have been loaded.
1983 members = self.getmembers()
1984
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001985 if tarinfo is None:
Martin v. Löwisf3c56112004-09-18 09:08:52 +00001986 end = len(members)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001987 else:
Martin v. Löwisf3c56112004-09-18 09:08:52 +00001988 end = members.index(tarinfo)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001989
1990 for i in xrange(end - 1, -1, -1):
Martin v. Löwisf3c56112004-09-18 09:08:52 +00001991 if name == members[i].name:
1992 return members[i]
Andrew M. Kuchling864bba12004-07-10 22:02:11 +00001993
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001994 def _load(self):
1995 """Read through the entire archive file and look for readable
1996 members.
1997 """
1998 while True:
1999 tarinfo = self.next()
2000 if tarinfo is None:
2001 break
2002 self._loaded = True
2003
2004 def _check(self, mode=None):
2005 """Check if TarFile is still open, and if the operation's mode
2006 corresponds to TarFile's mode.
2007 """
2008 if self.closed:
Georg Brandle4751e32006-05-18 06:11:19 +00002009 raise IOError("%s is closed" % self.__class__.__name__)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002010 if mode is not None and self._mode not in mode:
Georg Brandle4751e32006-05-18 06:11:19 +00002011 raise IOError("bad operation for mode %r" % self._mode)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002012
2013 def __iter__(self):
2014 """Provide an iterator object.
2015 """
2016 if self._loaded:
2017 return iter(self.members)
2018 else:
2019 return TarIter(self)
2020
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002021 def _dbg(self, level, msg):
2022 """Write debugging output to sys.stderr.
2023 """
2024 if level <= self.debug:
2025 print >> sys.stderr, msg
2026# class TarFile
2027
2028class TarIter:
2029 """Iterator Class.
2030
2031 for tarinfo in TarFile(...):
2032 suite...
2033 """
2034
2035 def __init__(self, tarfile):
2036 """Construct a TarIter object.
2037 """
2038 self.tarfile = tarfile
Martin v. Löwis637431b2005-03-03 23:12:42 +00002039 self.index = 0
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002040 def __iter__(self):
2041 """Return iterator object.
2042 """
2043 return self
2044 def next(self):
2045 """Return the next item using TarFile's next() method.
2046 When all members have been read, set TarFile as _loaded.
2047 """
Martin v. Löwis637431b2005-03-03 23:12:42 +00002048 # Fix for SF #1100429: Under rare circumstances it can
2049 # happen that getmembers() is called during iteration,
2050 # which will cause TarIter to stop prematurely.
2051 if not self.tarfile._loaded:
2052 tarinfo = self.tarfile.next()
2053 if not tarinfo:
2054 self.tarfile._loaded = True
2055 raise StopIteration
2056 else:
2057 try:
2058 tarinfo = self.tarfile.members[self.index]
2059 except IndexError:
2060 raise StopIteration
2061 self.index += 1
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002062 return tarinfo
2063
2064# Helper classes for sparse file support
2065class _section:
2066 """Base class for _data and _hole.
2067 """
2068 def __init__(self, offset, size):
2069 self.offset = offset
2070 self.size = size
2071 def __contains__(self, offset):
2072 return self.offset <= offset < self.offset + self.size
2073
2074class _data(_section):
2075 """Represent a data section in a sparse file.
2076 """
2077 def __init__(self, offset, size, realpos):
2078 _section.__init__(self, offset, size)
2079 self.realpos = realpos
2080
2081class _hole(_section):
2082 """Represent a hole section in a sparse file.
2083 """
2084 pass
2085
2086class _ringbuffer(list):
2087 """Ringbuffer class which increases performance
2088 over a regular list.
2089 """
2090 def __init__(self):
2091 self.idx = 0
2092 def find(self, offset):
2093 idx = self.idx
2094 while True:
2095 item = self[idx]
2096 if offset in item:
2097 break
2098 idx += 1
2099 if idx == len(self):
2100 idx = 0
2101 if idx == self.idx:
2102 # End of File
2103 return None
2104 self.idx = idx
2105 return item
2106
2107#---------------------------------------------
2108# zipfile compatible TarFile class
2109#---------------------------------------------
2110TAR_PLAIN = 0 # zipfile.ZIP_STORED
2111TAR_GZIPPED = 8 # zipfile.ZIP_DEFLATED
2112class TarFileCompat:
2113 """TarFile class compatible with standard module zipfile's
2114 ZipFile class.
2115 """
2116 def __init__(self, file, mode="r", compression=TAR_PLAIN):
2117 if compression == TAR_PLAIN:
2118 self.tarfile = TarFile.taropen(file, mode)
2119 elif compression == TAR_GZIPPED:
2120 self.tarfile = TarFile.gzopen(file, mode)
2121 else:
Georg Brandle4751e32006-05-18 06:11:19 +00002122 raise ValueError("unknown compression constant")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002123 if mode[0:1] == "r":
2124 members = self.tarfile.getmembers()
Raymond Hettingera1d09e22005-09-11 16:34:05 +00002125 for m in members:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002126 m.filename = m.name
2127 m.file_size = m.size
2128 m.date_time = time.gmtime(m.mtime)[:6]
2129 def namelist(self):
2130 return map(lambda m: m.name, self.infolist())
2131 def infolist(self):
2132 return filter(lambda m: m.type in REGULAR_TYPES,
2133 self.tarfile.getmembers())
2134 def printdir(self):
2135 self.tarfile.list()
2136 def testzip(self):
2137 return
2138 def getinfo(self, name):
2139 return self.tarfile.getmember(name)
2140 def read(self, name):
2141 return self.tarfile.extractfile(self.tarfile.getmember(name)).read()
2142 def write(self, filename, arcname=None, compress_type=None):
2143 self.tarfile.add(filename, arcname)
2144 def writestr(self, zinfo, bytes):
Raymond Hettingera6172712004-12-31 19:15:26 +00002145 try:
2146 from cStringIO import StringIO
2147 except ImportError:
2148 from StringIO import StringIO
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002149 import calendar
2150 zinfo.name = zinfo.filename
2151 zinfo.size = zinfo.file_size
2152 zinfo.mtime = calendar.timegm(zinfo.date_time)
Raymond Hettingera6172712004-12-31 19:15:26 +00002153 self.tarfile.addfile(zinfo, StringIO(bytes))
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002154 def close(self):
2155 self.tarfile.close()
2156#class TarFileCompat
2157
2158#--------------------
2159# exported functions
2160#--------------------
2161def is_tarfile(name):
2162 """Return True if name points to a tar archive that we
2163 are able to handle, else return False.
2164 """
2165 try:
2166 t = open(name)
2167 t.close()
2168 return True
2169 except TarError:
2170 return False
2171
2172open = TarFile.open