blob: aac6a5d48d68e44ef04ffadc3a76eed5966c0cf3 [file] [log] [blame]
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001#!/usr/bin/env python
2# -*- coding: iso-8859-1 -*-
3#-------------------------------------------------------------------
4# tarfile.py
5#-------------------------------------------------------------------
6# Copyright (C) 2002 Lars Gustäbel <lars@gustaebel.de>
7# All rights reserved.
8#
9# Permission is hereby granted, free of charge, to any person
10# obtaining a copy of this software and associated documentation
11# files (the "Software"), to deal in the Software without
12# restriction, including without limitation the rights to use,
13# copy, modify, merge, publish, distribute, sublicense, and/or sell
14# copies of the Software, and to permit persons to whom the
15# Software is furnished to do so, subject to the following
16# conditions:
17#
18# The above copyright notice and this permission notice shall be
19# included in all copies or substantial portions of the Software.
20#
21# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
22# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
23# OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
24# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
25# HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
26# WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
27# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
28# OTHER DEALINGS IN THE SOFTWARE.
29#
30"""Read from and write to tar format archives.
31"""
32
33__version__ = "$Revision$"
34# $Source$
35
Georg Brandl38c6a222006-05-10 16:26:03 +000036version = "0.8.0"
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +000037__author__ = "Lars Gustäbel (lars@gustaebel.de)"
38__date__ = "$Date$"
39__cvsid__ = "$Id$"
40__credits__ = "Gustavo Niemeyer, Niels Gustäbel, Richard Townsend."
41
42#---------
43# Imports
44#---------
45import sys
46import os
47import shutil
48import stat
49import errno
50import time
51import struct
Georg Brandl2527f7f2006-10-29 09:16:15 +000052import copy
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +000053
Jack Jansencfc49022003-03-07 13:37:32 +000054if sys.platform == 'mac':
55 # This module needs work for MacOS9, especially in the area of pathname
56 # handling. In many places it is assumed a simple substitution of / by the
57 # local os.path.sep is good enough to convert pathnames, but this does not
58 # work with the mac rooted:path:name versus :nonrooted:path:name syntax
59 raise ImportError, "tarfile does not work for platform==mac"
60
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +000061try:
62 import grp, pwd
63except ImportError:
64 grp = pwd = None
65
66# from tarfile import *
67__all__ = ["TarFile", "TarInfo", "is_tarfile", "TarError"]
68
69#---------------------------------------------------------
70# tar constants
71#---------------------------------------------------------
72NUL = "\0" # the null character
73BLOCKSIZE = 512 # length of processing blocks
74RECORDSIZE = BLOCKSIZE * 20 # length of records
75MAGIC = "ustar" # magic tar string
76VERSION = "00" # version number
77
78LENGTH_NAME = 100 # maximum length of a filename
79LENGTH_LINK = 100 # maximum length of a linkname
80LENGTH_PREFIX = 155 # maximum length of the prefix field
81MAXSIZE_MEMBER = 077777777777L # maximum size of a file (11 octal digits)
82
83REGTYPE = "0" # regular file
84AREGTYPE = "\0" # regular file
85LNKTYPE = "1" # link (inside tarfile)
86SYMTYPE = "2" # symbolic link
87CHRTYPE = "3" # character special device
88BLKTYPE = "4" # block special device
89DIRTYPE = "5" # directory
90FIFOTYPE = "6" # fifo special device
91CONTTYPE = "7" # contiguous file
92
93GNUTYPE_LONGNAME = "L" # GNU tar extension for longnames
94GNUTYPE_LONGLINK = "K" # GNU tar extension for longlink
95GNUTYPE_SPARSE = "S" # GNU tar extension for sparse file
96
97#---------------------------------------------------------
98# tarfile constants
99#---------------------------------------------------------
100SUPPORTED_TYPES = (REGTYPE, AREGTYPE, LNKTYPE, # file types that tarfile
101 SYMTYPE, DIRTYPE, FIFOTYPE, # can cope with.
102 CONTTYPE, CHRTYPE, BLKTYPE,
103 GNUTYPE_LONGNAME, GNUTYPE_LONGLINK,
104 GNUTYPE_SPARSE)
105
106REGULAR_TYPES = (REGTYPE, AREGTYPE, # file types that somehow
107 CONTTYPE, GNUTYPE_SPARSE) # represent regular files
108
109#---------------------------------------------------------
110# Bits used in the mode field, values in octal.
111#---------------------------------------------------------
112S_IFLNK = 0120000 # symbolic link
113S_IFREG = 0100000 # regular file
114S_IFBLK = 0060000 # block device
115S_IFDIR = 0040000 # directory
116S_IFCHR = 0020000 # character device
117S_IFIFO = 0010000 # fifo
118
119TSUID = 04000 # set UID on execution
120TSGID = 02000 # set GID on execution
121TSVTX = 01000 # reserved
122
123TUREAD = 0400 # read by owner
124TUWRITE = 0200 # write by owner
125TUEXEC = 0100 # execute/search by owner
126TGREAD = 0040 # read by group
127TGWRITE = 0020 # write by group
128TGEXEC = 0010 # execute/search by group
129TOREAD = 0004 # read by other
130TOWRITE = 0002 # write by other
131TOEXEC = 0001 # execute/search by other
132
133#---------------------------------------------------------
134# Some useful functions
135#---------------------------------------------------------
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000136
Georg Brandl38c6a222006-05-10 16:26:03 +0000137def stn(s, length):
138 """Convert a python string to a null-terminated string buffer.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000139 """
Georg Brandlee23f4b2006-10-24 16:54:23 +0000140 return s[:length] + (length - len(s)) * NUL
Georg Brandl38c6a222006-05-10 16:26:03 +0000141
Lars Gustäbel08303db2008-02-11 18:36:07 +0000142def nts(s):
143 """Convert a null-terminated string field to a python string.
144 """
145 # Use the string up to the first null char.
146 p = s.find("\0")
147 if p == -1:
148 return s
149 return s[:p]
150
Georg Brandl38c6a222006-05-10 16:26:03 +0000151def nti(s):
152 """Convert a number field to a python number.
153 """
154 # There are two possible encodings for a number field, see
155 # itn() below.
156 if s[0] != chr(0200):
Lars Gustäbel08303db2008-02-11 18:36:07 +0000157 n = int(nts(s) or "0", 8)
Georg Brandl38c6a222006-05-10 16:26:03 +0000158 else:
159 n = 0L
160 for i in xrange(len(s) - 1):
161 n <<= 8
162 n += ord(s[i + 1])
163 return n
164
165def itn(n, digits=8, posix=False):
166 """Convert a python number to a number field.
167 """
168 # POSIX 1003.1-1988 requires numbers to be encoded as a string of
169 # octal digits followed by a null-byte, this allows values up to
170 # (8**(digits-1))-1. GNU tar allows storing numbers greater than
171 # that if necessary. A leading 0200 byte indicates this particular
172 # encoding, the following digits-1 bytes are a big-endian
173 # representation. This allows values up to (256**(digits-1))-1.
174 if 0 <= n < 8 ** (digits - 1):
175 s = "%0*o" % (digits - 1, n) + NUL
176 else:
177 if posix:
Georg Brandle4751e32006-05-18 06:11:19 +0000178 raise ValueError("overflow in number field")
Georg Brandl38c6a222006-05-10 16:26:03 +0000179
180 if n < 0:
181 # XXX We mimic GNU tar's behaviour with negative numbers,
182 # this could raise OverflowError.
183 n = struct.unpack("L", struct.pack("l", n))[0]
184
185 s = ""
186 for i in xrange(digits - 1):
187 s = chr(n & 0377) + s
188 n >>= 8
189 s = chr(0200) + s
190 return s
191
192def calc_chksums(buf):
193 """Calculate the checksum for a member's header by summing up all
194 characters except for the chksum field which is treated as if
195 it was filled with spaces. According to the GNU tar sources,
196 some tars (Sun and NeXT) calculate chksum with signed char,
197 which will be different if there are chars in the buffer with
198 the high bit set. So we calculate two checksums, unsigned and
199 signed.
200 """
201 unsigned_chksum = 256 + sum(struct.unpack("148B", buf[:148]) + struct.unpack("356B", buf[156:512]))
202 signed_chksum = 256 + sum(struct.unpack("148b", buf[:148]) + struct.unpack("356b", buf[156:512]))
203 return unsigned_chksum, signed_chksum
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000204
205def copyfileobj(src, dst, length=None):
206 """Copy length bytes from fileobj src to fileobj dst.
207 If length is None, copy the entire content.
208 """
209 if length == 0:
210 return
211 if length is None:
212 shutil.copyfileobj(src, dst)
213 return
214
215 BUFSIZE = 16 * 1024
216 blocks, remainder = divmod(length, BUFSIZE)
217 for b in xrange(blocks):
218 buf = src.read(BUFSIZE)
219 if len(buf) < BUFSIZE:
Georg Brandle4751e32006-05-18 06:11:19 +0000220 raise IOError("end of file reached")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000221 dst.write(buf)
222
223 if remainder != 0:
224 buf = src.read(remainder)
225 if len(buf) < remainder:
Georg Brandle4751e32006-05-18 06:11:19 +0000226 raise IOError("end of file reached")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000227 dst.write(buf)
228 return
229
230filemode_table = (
Andrew M. Kuchling8bc462f2004-10-20 11:48:42 +0000231 ((S_IFLNK, "l"),
232 (S_IFREG, "-"),
233 (S_IFBLK, "b"),
234 (S_IFDIR, "d"),
235 (S_IFCHR, "c"),
236 (S_IFIFO, "p")),
237
238 ((TUREAD, "r"),),
239 ((TUWRITE, "w"),),
240 ((TUEXEC|TSUID, "s"),
241 (TSUID, "S"),
242 (TUEXEC, "x")),
243
244 ((TGREAD, "r"),),
245 ((TGWRITE, "w"),),
246 ((TGEXEC|TSGID, "s"),
247 (TSGID, "S"),
248 (TGEXEC, "x")),
249
250 ((TOREAD, "r"),),
251 ((TOWRITE, "w"),),
252 ((TOEXEC|TSVTX, "t"),
253 (TSVTX, "T"),
254 (TOEXEC, "x"))
255)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000256
257def filemode(mode):
258 """Convert a file's mode to a string of the form
259 -rwxrwxrwx.
260 Used by TarFile.list()
261 """
Andrew M. Kuchling8bc462f2004-10-20 11:48:42 +0000262 perm = []
263 for table in filemode_table:
264 for bit, char in table:
265 if mode & bit == bit:
266 perm.append(char)
267 break
268 else:
269 perm.append("-")
270 return "".join(perm)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000271
272if os.sep != "/":
273 normpath = lambda path: os.path.normpath(path).replace(os.sep, "/")
274else:
275 normpath = os.path.normpath
276
277class TarError(Exception):
278 """Base exception."""
279 pass
280class ExtractError(TarError):
281 """General exception for extract errors."""
282 pass
283class ReadError(TarError):
284 """Exception for unreadble tar archives."""
285 pass
286class CompressionError(TarError):
287 """Exception for unavailable compression methods."""
288 pass
289class StreamError(TarError):
290 """Exception for unsupported operations on stream-like TarFiles."""
291 pass
292
293#---------------------------
294# internal stream interface
295#---------------------------
296class _LowLevelFile:
297 """Low-level file object. Supports reading and writing.
298 It is used instead of a regular file object for streaming
299 access.
300 """
301
302 def __init__(self, name, mode):
303 mode = {
304 "r": os.O_RDONLY,
305 "w": os.O_WRONLY | os.O_CREAT | os.O_TRUNC,
306 }[mode]
307 if hasattr(os, "O_BINARY"):
308 mode |= os.O_BINARY
309 self.fd = os.open(name, mode)
310
311 def close(self):
312 os.close(self.fd)
313
314 def read(self, size):
315 return os.read(self.fd, size)
316
317 def write(self, s):
318 os.write(self.fd, s)
319
320class _Stream:
321 """Class that serves as an adapter between TarFile and
322 a stream-like object. The stream-like object only
323 needs to have a read() or write() method and is accessed
324 blockwise. Use of gzip or bzip2 compression is possible.
325 A stream-like object could be for example: sys.stdin,
326 sys.stdout, a socket, a tape device etc.
327
328 _Stream is intended to be used only internally.
329 """
330
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000331 def __init__(self, name, mode, comptype, fileobj, bufsize):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000332 """Construct a _Stream object.
333 """
334 self._extfileobj = True
335 if fileobj is None:
336 fileobj = _LowLevelFile(name, mode)
337 self._extfileobj = False
338
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000339 if comptype == '*':
340 # Enable transparent compression detection for the
341 # stream interface
342 fileobj = _StreamProxy(fileobj)
343 comptype = fileobj.getcomptype()
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000344
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000345 self.name = name or ""
346 self.mode = mode
347 self.comptype = comptype
348 self.fileobj = fileobj
349 self.bufsize = bufsize
350 self.buf = ""
351 self.pos = 0L
352 self.closed = False
353
354 if comptype == "gz":
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000355 try:
356 import zlib
357 except ImportError:
Georg Brandle4751e32006-05-18 06:11:19 +0000358 raise CompressionError("zlib module is not available")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000359 self.zlib = zlib
360 self.crc = zlib.crc32("")
361 if mode == "r":
362 self._init_read_gz()
363 else:
364 self._init_write_gz()
365
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000366 if comptype == "bz2":
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000367 try:
368 import bz2
369 except ImportError:
Georg Brandle4751e32006-05-18 06:11:19 +0000370 raise CompressionError("bz2 module is not available")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000371 if mode == "r":
372 self.dbuf = ""
373 self.cmp = bz2.BZ2Decompressor()
374 else:
375 self.cmp = bz2.BZ2Compressor()
376
377 def __del__(self):
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000378 if hasattr(self, "closed") and not self.closed:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000379 self.close()
380
381 def _init_write_gz(self):
382 """Initialize for writing with gzip compression.
383 """
384 self.cmp = self.zlib.compressobj(9, self.zlib.DEFLATED,
385 -self.zlib.MAX_WBITS,
386 self.zlib.DEF_MEM_LEVEL,
387 0)
388 timestamp = struct.pack("<L", long(time.time()))
389 self.__write("\037\213\010\010%s\002\377" % timestamp)
390 if self.name.endswith(".gz"):
391 self.name = self.name[:-3]
392 self.__write(self.name + NUL)
393
394 def write(self, s):
395 """Write string s to the stream.
396 """
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000397 if self.comptype == "gz":
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000398 self.crc = self.zlib.crc32(s, self.crc)
399 self.pos += len(s)
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000400 if self.comptype != "tar":
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000401 s = self.cmp.compress(s)
402 self.__write(s)
403
404 def __write(self, s):
405 """Write string s to the stream if a whole new block
406 is ready to be written.
407 """
408 self.buf += s
409 while len(self.buf) > self.bufsize:
410 self.fileobj.write(self.buf[:self.bufsize])
411 self.buf = self.buf[self.bufsize:]
412
413 def close(self):
414 """Close the _Stream object. No operation should be
415 done on it afterwards.
416 """
417 if self.closed:
418 return
419
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000420 if self.mode == "w" and self.comptype != "tar":
Martin v. Löwisc234a522004-08-22 21:28:33 +0000421 self.buf += self.cmp.flush()
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000422
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000423 if self.mode == "w" and self.buf:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000424 self.fileobj.write(self.buf)
425 self.buf = ""
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000426 if self.comptype == "gz":
Tim Petersa05f6e22006-08-02 05:20:08 +0000427 # The native zlib crc is an unsigned 32-bit integer, but
428 # the Python wrapper implicitly casts that to a signed C
429 # long. So, on a 32-bit box self.crc may "look negative",
430 # while the same crc on a 64-bit box may "look positive".
431 # To avoid irksome warnings from the `struct` module, force
432 # it to look positive on all boxes.
433 self.fileobj.write(struct.pack("<L", self.crc & 0xffffffffL))
Andrew M. Kuchling10a44492003-10-24 17:38:34 +0000434 self.fileobj.write(struct.pack("<L", self.pos & 0xffffFFFFL))
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000435
436 if not self._extfileobj:
437 self.fileobj.close()
438
439 self.closed = True
440
441 def _init_read_gz(self):
442 """Initialize for reading a gzip compressed fileobj.
443 """
444 self.cmp = self.zlib.decompressobj(-self.zlib.MAX_WBITS)
445 self.dbuf = ""
446
447 # taken from gzip.GzipFile with some alterations
448 if self.__read(2) != "\037\213":
Georg Brandle4751e32006-05-18 06:11:19 +0000449 raise ReadError("not a gzip file")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000450 if self.__read(1) != "\010":
Georg Brandle4751e32006-05-18 06:11:19 +0000451 raise CompressionError("unsupported compression method")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000452
453 flag = ord(self.__read(1))
454 self.__read(6)
455
456 if flag & 4:
457 xlen = ord(self.__read(1)) + 256 * ord(self.__read(1))
458 self.read(xlen)
459 if flag & 8:
460 while True:
461 s = self.__read(1)
462 if not s or s == NUL:
463 break
464 if flag & 16:
465 while True:
466 s = self.__read(1)
467 if not s or s == NUL:
468 break
469 if flag & 2:
470 self.__read(2)
471
472 def tell(self):
473 """Return the stream's file pointer position.
474 """
475 return self.pos
476
477 def seek(self, pos=0):
478 """Set the stream's file pointer to pos. Negative seeking
479 is forbidden.
480 """
481 if pos - self.pos >= 0:
482 blocks, remainder = divmod(pos - self.pos, self.bufsize)
483 for i in xrange(blocks):
484 self.read(self.bufsize)
485 self.read(remainder)
486 else:
Georg Brandle4751e32006-05-18 06:11:19 +0000487 raise StreamError("seeking backwards is not allowed")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000488 return self.pos
489
490 def read(self, size=None):
491 """Return the next size number of bytes from the stream.
492 If size is not defined, return all bytes of the stream
493 up to EOF.
494 """
495 if size is None:
496 t = []
497 while True:
498 buf = self._read(self.bufsize)
499 if not buf:
500 break
501 t.append(buf)
502 buf = "".join(t)
503 else:
504 buf = self._read(size)
505 self.pos += len(buf)
506 return buf
507
508 def _read(self, size):
509 """Return size bytes from the stream.
510 """
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000511 if self.comptype == "tar":
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000512 return self.__read(size)
513
514 c = len(self.dbuf)
515 t = [self.dbuf]
516 while c < size:
517 buf = self.__read(self.bufsize)
518 if not buf:
519 break
520 buf = self.cmp.decompress(buf)
521 t.append(buf)
522 c += len(buf)
523 t = "".join(t)
524 self.dbuf = t[size:]
525 return t[:size]
526
527 def __read(self, size):
528 """Return size bytes from stream. If internal buffer is empty,
529 read another block from the stream.
530 """
531 c = len(self.buf)
532 t = [self.buf]
533 while c < size:
534 buf = self.fileobj.read(self.bufsize)
535 if not buf:
536 break
537 t.append(buf)
538 c += len(buf)
539 t = "".join(t)
540 self.buf = t[size:]
541 return t[:size]
542# class _Stream
543
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000544class _StreamProxy(object):
545 """Small proxy class that enables transparent compression
546 detection for the Stream interface (mode 'r|*').
547 """
548
549 def __init__(self, fileobj):
550 self.fileobj = fileobj
551 self.buf = self.fileobj.read(BLOCKSIZE)
552
553 def read(self, size):
554 self.read = self.fileobj.read
555 return self.buf
556
557 def getcomptype(self):
558 if self.buf.startswith("\037\213\010"):
559 return "gz"
560 if self.buf.startswith("BZh91"):
561 return "bz2"
562 return "tar"
563
564 def close(self):
565 self.fileobj.close()
566# class StreamProxy
567
Georg Brandl49c8f4c2006-05-15 19:30:35 +0000568class _BZ2Proxy(object):
569 """Small proxy class that enables external file object
570 support for "r:bz2" and "w:bz2" modes. This is actually
571 a workaround for a limitation in bz2 module's BZ2File
572 class which (unlike gzip.GzipFile) has no support for
573 a file object argument.
574 """
575
576 blocksize = 16 * 1024
577
578 def __init__(self, fileobj, mode):
579 self.fileobj = fileobj
580 self.mode = mode
581 self.init()
582
583 def init(self):
584 import bz2
585 self.pos = 0
586 if self.mode == "r":
587 self.bz2obj = bz2.BZ2Decompressor()
588 self.fileobj.seek(0)
589 self.buf = ""
590 else:
591 self.bz2obj = bz2.BZ2Compressor()
592
593 def read(self, size):
594 b = [self.buf]
595 x = len(self.buf)
596 while x < size:
Lars Gustäbel76232942009-03-22 20:48:03 +0000597 raw = self.fileobj.read(self.blocksize)
598 if not raw:
Georg Brandl49c8f4c2006-05-15 19:30:35 +0000599 break
Lars Gustäbelb4dc9212009-03-22 21:34:05 +0000600 try:
601 data = self.bz2obj.decompress(raw)
602 except EOFError:
603 break
Lars Gustäbel76232942009-03-22 20:48:03 +0000604 b.append(data)
Georg Brandl49c8f4c2006-05-15 19:30:35 +0000605 x += len(data)
606 self.buf = "".join(b)
607
608 buf = self.buf[:size]
609 self.buf = self.buf[size:]
610 self.pos += len(buf)
611 return buf
612
613 def seek(self, pos):
614 if pos < self.pos:
615 self.init()
616 self.read(pos - self.pos)
617
618 def tell(self):
619 return self.pos
620
621 def write(self, data):
622 self.pos += len(data)
623 raw = self.bz2obj.compress(data)
624 self.fileobj.write(raw)
625
626 def close(self):
627 if self.mode == "w":
628 raw = self.bz2obj.flush()
629 self.fileobj.write(raw)
Georg Brandle8953182006-05-27 14:02:03 +0000630 self.fileobj.close()
Georg Brandl49c8f4c2006-05-15 19:30:35 +0000631# class _BZ2Proxy
632
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000633#------------------------
634# Extraction file object
635#------------------------
Lars Gustäbelaedb92e2006-12-23 16:51:47 +0000636class _FileInFile(object):
637 """A thin wrapper around an existing file object that
638 provides a part of its data as an individual file
639 object.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000640 """
641
Lars Gustäbelaedb92e2006-12-23 16:51:47 +0000642 def __init__(self, fileobj, offset, size, sparse=None):
643 self.fileobj = fileobj
644 self.offset = offset
645 self.size = size
646 self.sparse = sparse
647 self.position = 0
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000648
Lars Gustäbelaedb92e2006-12-23 16:51:47 +0000649 def tell(self):
650 """Return the current file position.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000651 """
Lars Gustäbelaedb92e2006-12-23 16:51:47 +0000652 return self.position
653
654 def seek(self, position):
655 """Seek to a position in the file.
656 """
657 self.position = position
658
659 def read(self, size=None):
660 """Read data from the file.
661 """
662 if size is None:
663 size = self.size - self.position
664 else:
665 size = min(size, self.size - self.position)
666
667 if self.sparse is None:
668 return self.readnormal(size)
669 else:
670 return self.readsparse(size)
671
672 def readnormal(self, size):
673 """Read operation for regular files.
674 """
675 self.fileobj.seek(self.offset + self.position)
676 self.position += size
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000677 return self.fileobj.read(size)
678
Lars Gustäbelaedb92e2006-12-23 16:51:47 +0000679 def readsparse(self, size):
680 """Read operation for sparse files.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000681 """
Lars Gustäbelaedb92e2006-12-23 16:51:47 +0000682 data = []
683 while size > 0:
684 buf = self.readsparsesection(size)
685 if not buf:
686 break
687 size -= len(buf)
688 data.append(buf)
689 return "".join(data)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000690
Lars Gustäbelaedb92e2006-12-23 16:51:47 +0000691 def readsparsesection(self, size):
692 """Read a single section of a sparse file.
693 """
694 section = self.sparse.find(self.position)
695
696 if section is None:
697 return ""
698
699 size = min(size, section.offset + section.size - self.position)
700
701 if isinstance(section, _data):
702 realpos = section.realpos + self.position - section.offset
703 self.fileobj.seek(self.offset + realpos)
704 self.position += size
705 return self.fileobj.read(size)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000706 else:
Lars Gustäbelaedb92e2006-12-23 16:51:47 +0000707 self.position += size
708 return NUL * size
709#class _FileInFile
710
711
712class ExFileObject(object):
713 """File-like object for reading an archive member.
714 Is returned by TarFile.extractfile().
715 """
716 blocksize = 1024
717
718 def __init__(self, tarfile, tarinfo):
719 self.fileobj = _FileInFile(tarfile.fileobj,
720 tarinfo.offset_data,
721 tarinfo.size,
722 getattr(tarinfo, "sparse", None))
723 self.name = tarinfo.name
724 self.mode = "r"
725 self.closed = False
726 self.size = tarinfo.size
727
728 self.position = 0
729 self.buffer = ""
730
731 def read(self, size=None):
732 """Read at most size bytes from the file. If size is not
733 present or None, read all data until EOF is reached.
734 """
735 if self.closed:
736 raise ValueError("I/O operation on closed file")
737
738 buf = ""
739 if self.buffer:
740 if size is None:
741 buf = self.buffer
742 self.buffer = ""
743 else:
744 buf = self.buffer[:size]
745 self.buffer = self.buffer[size:]
746
747 if size is None:
748 buf += self.fileobj.read()
749 else:
750 buf += self.fileobj.read(size - len(buf))
751
752 self.position += len(buf)
753 return buf
754
755 def readline(self, size=-1):
756 """Read one entire line from the file. If size is present
757 and non-negative, return a string with at most that
758 size, which may be an incomplete line.
759 """
760 if self.closed:
761 raise ValueError("I/O operation on closed file")
762
763 if "\n" in self.buffer:
764 pos = self.buffer.find("\n") + 1
765 else:
766 buffers = [self.buffer]
767 while True:
768 buf = self.fileobj.read(self.blocksize)
769 buffers.append(buf)
770 if not buf or "\n" in buf:
771 self.buffer = "".join(buffers)
772 pos = self.buffer.find("\n") + 1
773 if pos == 0:
774 # no newline found.
775 pos = len(self.buffer)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000776 break
Lars Gustäbelaedb92e2006-12-23 16:51:47 +0000777
778 if size != -1:
779 pos = min(size, pos)
780
781 buf = self.buffer[:pos]
782 self.buffer = self.buffer[pos:]
783 self.position += len(buf)
784 return buf
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000785
786 def readlines(self):
Lars Gustäbelaedb92e2006-12-23 16:51:47 +0000787 """Return a list with all remaining lines.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000788 """
789 result = []
790 while True:
791 line = self.readline()
792 if not line: break
793 result.append(line)
794 return result
795
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000796 def tell(self):
797 """Return the current file position.
798 """
Lars Gustäbelaedb92e2006-12-23 16:51:47 +0000799 if self.closed:
800 raise ValueError("I/O operation on closed file")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000801
Lars Gustäbelaedb92e2006-12-23 16:51:47 +0000802 return self.position
803
804 def seek(self, pos, whence=os.SEEK_SET):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000805 """Seek to a position in the file.
806 """
Lars Gustäbelaedb92e2006-12-23 16:51:47 +0000807 if self.closed:
808 raise ValueError("I/O operation on closed file")
809
810 if whence == os.SEEK_SET:
811 self.position = min(max(pos, 0), self.size)
812 elif whence == os.SEEK_CUR:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000813 if pos < 0:
Lars Gustäbelaedb92e2006-12-23 16:51:47 +0000814 self.position = max(self.position + pos, 0)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000815 else:
Lars Gustäbelaedb92e2006-12-23 16:51:47 +0000816 self.position = min(self.position + pos, self.size)
817 elif whence == os.SEEK_END:
818 self.position = max(min(self.size + pos, self.size), 0)
819 else:
820 raise ValueError("Invalid argument")
821
822 self.buffer = ""
823 self.fileobj.seek(self.position)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000824
825 def close(self):
826 """Close the file object.
827 """
828 self.closed = True
Martin v. Löwisdf241532005-03-03 08:17:42 +0000829
830 def __iter__(self):
Lars Gustäbelaedb92e2006-12-23 16:51:47 +0000831 """Get an iterator over the file's lines.
Martin v. Löwisdf241532005-03-03 08:17:42 +0000832 """
Lars Gustäbelaedb92e2006-12-23 16:51:47 +0000833 while True:
834 line = self.readline()
835 if not line:
836 break
837 yield line
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000838#class ExFileObject
839
840#------------------
841# Exported Classes
842#------------------
843class TarInfo(object):
844 """Informational class which holds the details about an
845 archive member given by a tar header block.
846 TarInfo objects are returned by TarFile.getmember(),
847 TarFile.getmembers() and TarFile.gettarinfo() and are
848 usually created internally.
849 """
850
851 def __init__(self, name=""):
852 """Construct a TarInfo object. name is the optional name
853 of the member.
854 """
Georg Brandl38c6a222006-05-10 16:26:03 +0000855 self.name = name # member name (dirnames must end with '/')
856 self.mode = 0666 # file permissions
857 self.uid = 0 # user id
858 self.gid = 0 # group id
859 self.size = 0 # file size
860 self.mtime = 0 # modification time
861 self.chksum = 0 # header checksum
862 self.type = REGTYPE # member type
863 self.linkname = "" # link name
864 self.uname = "user" # user name
865 self.gname = "group" # group name
866 self.devmajor = 0 # device major number
867 self.devminor = 0 # device minor number
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000868
Georg Brandl38c6a222006-05-10 16:26:03 +0000869 self.offset = 0 # the tar header starts here
870 self.offset_data = 0 # the file's data starts here
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000871
872 def __repr__(self):
873 return "<%s %r at %#x>" % (self.__class__.__name__,self.name,id(self))
874
Guido van Rossum75b64e62005-01-16 00:16:11 +0000875 @classmethod
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000876 def frombuf(cls, buf):
877 """Construct a TarInfo object from a 512 byte string buffer.
878 """
Georg Brandl38c6a222006-05-10 16:26:03 +0000879 if len(buf) != BLOCKSIZE:
Georg Brandle4751e32006-05-18 06:11:19 +0000880 raise ValueError("truncated header")
Georg Brandl38c6a222006-05-10 16:26:03 +0000881 if buf.count(NUL) == BLOCKSIZE:
Georg Brandle4751e32006-05-18 06:11:19 +0000882 raise ValueError("empty header")
Georg Brandl38c6a222006-05-10 16:26:03 +0000883
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000884 tarinfo = cls()
Georg Brandl38c6a222006-05-10 16:26:03 +0000885 tarinfo.buf = buf
Lars Gustäbel08303db2008-02-11 18:36:07 +0000886 tarinfo.name = nts(buf[0:100])
Georg Brandl38c6a222006-05-10 16:26:03 +0000887 tarinfo.mode = nti(buf[100:108])
888 tarinfo.uid = nti(buf[108:116])
889 tarinfo.gid = nti(buf[116:124])
890 tarinfo.size = nti(buf[124:136])
891 tarinfo.mtime = nti(buf[136:148])
892 tarinfo.chksum = nti(buf[148:156])
893 tarinfo.type = buf[156:157]
Lars Gustäbel08303db2008-02-11 18:36:07 +0000894 tarinfo.linkname = nts(buf[157:257])
895 tarinfo.uname = nts(buf[265:297])
896 tarinfo.gname = nts(buf[297:329])
Georg Brandl38c6a222006-05-10 16:26:03 +0000897 tarinfo.devmajor = nti(buf[329:337])
898 tarinfo.devminor = nti(buf[337:345])
Lars Gustäbel08303db2008-02-11 18:36:07 +0000899 prefix = nts(buf[345:500])
Georg Brandl2527f7f2006-10-29 09:16:15 +0000900
901 if prefix and not tarinfo.issparse():
902 tarinfo.name = prefix + "/" + tarinfo.name
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000903
Georg Brandl38c6a222006-05-10 16:26:03 +0000904 if tarinfo.chksum not in calc_chksums(buf):
Georg Brandle4751e32006-05-18 06:11:19 +0000905 raise ValueError("invalid header")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000906 return tarinfo
907
Georg Brandl38c6a222006-05-10 16:26:03 +0000908 def tobuf(self, posix=False):
Georg Brandl2527f7f2006-10-29 09:16:15 +0000909 """Return a tar header as a string of 512 byte blocks.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000910 """
Georg Brandl2527f7f2006-10-29 09:16:15 +0000911 buf = ""
912 type = self.type
913 prefix = ""
914
915 if self.name.endswith("/"):
916 type = DIRTYPE
917
Georg Brandl25f58f62006-12-06 22:21:23 +0000918 if type in (GNUTYPE_LONGNAME, GNUTYPE_LONGLINK):
919 # Prevent "././@LongLink" from being normalized.
920 name = self.name
921 else:
922 name = normpath(self.name)
Georg Brandl2527f7f2006-10-29 09:16:15 +0000923
924 if type == DIRTYPE:
925 # directories should end with '/'
926 name += "/"
927
928 linkname = self.linkname
929 if linkname:
930 # if linkname is empty we end up with a '.'
931 linkname = normpath(linkname)
932
933 if posix:
934 if self.size > MAXSIZE_MEMBER:
935 raise ValueError("file is too large (>= 8 GB)")
936
937 if len(self.linkname) > LENGTH_LINK:
938 raise ValueError("linkname is too long (>%d)" % (LENGTH_LINK))
939
940 if len(name) > LENGTH_NAME:
941 prefix = name[:LENGTH_PREFIX + 1]
942 while prefix and prefix[-1] != "/":
943 prefix = prefix[:-1]
944
945 name = name[len(prefix):]
946 prefix = prefix[:-1]
947
948 if not prefix or len(name) > LENGTH_NAME:
949 raise ValueError("name is too long")
950
951 else:
952 if len(self.linkname) > LENGTH_LINK:
953 buf += self._create_gnulong(self.linkname, GNUTYPE_LONGLINK)
954
955 if len(name) > LENGTH_NAME:
956 buf += self._create_gnulong(name, GNUTYPE_LONGNAME)
957
Georg Brandl38c6a222006-05-10 16:26:03 +0000958 parts = [
Georg Brandl2527f7f2006-10-29 09:16:15 +0000959 stn(name, 100),
Georg Brandl38c6a222006-05-10 16:26:03 +0000960 itn(self.mode & 07777, 8, posix),
961 itn(self.uid, 8, posix),
962 itn(self.gid, 8, posix),
963 itn(self.size, 12, posix),
964 itn(self.mtime, 12, posix),
965 " ", # checksum field
Georg Brandl2527f7f2006-10-29 09:16:15 +0000966 type,
Georg Brandl38c6a222006-05-10 16:26:03 +0000967 stn(self.linkname, 100),
968 stn(MAGIC, 6),
969 stn(VERSION, 2),
970 stn(self.uname, 32),
971 stn(self.gname, 32),
972 itn(self.devmajor, 8, posix),
973 itn(self.devminor, 8, posix),
Georg Brandl2527f7f2006-10-29 09:16:15 +0000974 stn(prefix, 155)
Georg Brandl38c6a222006-05-10 16:26:03 +0000975 ]
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000976
Lars Gustäbel8ff1f6a2007-04-21 12:20:09 +0000977 buf += "".join(parts).ljust(BLOCKSIZE, NUL)
Georg Brandl25f58f62006-12-06 22:21:23 +0000978 chksum = calc_chksums(buf[-BLOCKSIZE:])[0]
Georg Brandl2527f7f2006-10-29 09:16:15 +0000979 buf = buf[:-364] + "%06o\0" % chksum + buf[-357:]
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000980 self.buf = buf
981 return buf
982
Georg Brandl2527f7f2006-10-29 09:16:15 +0000983 def _create_gnulong(self, name, type):
984 """Create a GNU longname/longlink header from name.
985 It consists of an extended tar header, with the length
986 of the longname as size, followed by data blocks,
987 which contain the longname as a null terminated string.
988 """
989 name += NUL
990
991 tarinfo = self.__class__()
992 tarinfo.name = "././@LongLink"
993 tarinfo.type = type
994 tarinfo.mode = 0
995 tarinfo.size = len(name)
996
997 # create extended header
998 buf = tarinfo.tobuf()
999 # create name blocks
1000 buf += name
1001 blocks, remainder = divmod(len(name), BLOCKSIZE)
1002 if remainder > 0:
1003 buf += (BLOCKSIZE - remainder) * NUL
1004 return buf
1005
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001006 def isreg(self):
1007 return self.type in REGULAR_TYPES
1008 def isfile(self):
1009 return self.isreg()
1010 def isdir(self):
1011 return self.type == DIRTYPE
1012 def issym(self):
1013 return self.type == SYMTYPE
1014 def islnk(self):
1015 return self.type == LNKTYPE
1016 def ischr(self):
1017 return self.type == CHRTYPE
1018 def isblk(self):
1019 return self.type == BLKTYPE
1020 def isfifo(self):
1021 return self.type == FIFOTYPE
1022 def issparse(self):
1023 return self.type == GNUTYPE_SPARSE
1024 def isdev(self):
1025 return self.type in (CHRTYPE, BLKTYPE, FIFOTYPE)
1026# class TarInfo
1027
1028class TarFile(object):
1029 """The TarFile Class provides an interface to tar archives.
1030 """
1031
1032 debug = 0 # May be set from 0 (no msgs) to 3 (all msgs)
1033
1034 dereference = False # If true, add content of linked file to the
1035 # tar file, else the link.
1036
1037 ignore_zeros = False # If true, skips empty or invalid blocks and
1038 # continues processing.
1039
1040 errorlevel = 0 # If 0, fatal errors only appear in debug
1041 # messages (if debug >= 0). If > 0, errors
1042 # are passed to the caller as exceptions.
1043
Martin v. Löwis75b9da42004-08-18 13:57:44 +00001044 posix = False # If True, generates POSIX.1-1990-compliant
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001045 # archives (no GNU extensions!)
1046
1047 fileobject = ExFileObject
1048
1049 def __init__(self, name=None, mode="r", fileobj=None):
1050 """Open an (uncompressed) tar archive `name'. `mode' is either 'r' to
1051 read from an existing archive, 'a' to append data to an existing
1052 file or 'w' to create a new file overwriting an existing one. `mode'
1053 defaults to 'r'.
1054 If `fileobj' is given, it is used for reading or writing data. If it
1055 can be determined, `mode' is overridden by `fileobj's mode.
1056 `fileobj' is not closed, when TarFile is closed.
1057 """
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001058 if len(mode) > 1 or mode not in "raw":
Georg Brandle4751e32006-05-18 06:11:19 +00001059 raise ValueError("mode must be 'r', 'a' or 'w'")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001060 self._mode = mode
1061 self.mode = {"r": "rb", "a": "r+b", "w": "wb"}[mode]
1062
1063 if not fileobj:
Lars Gustäbela9bad982007-08-28 12:33:15 +00001064 fileobj = file(name, self.mode)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001065 self._extfileobj = False
1066 else:
Lars Gustäbela9bad982007-08-28 12:33:15 +00001067 if name is None and hasattr(fileobj, "name"):
1068 name = fileobj.name
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001069 if hasattr(fileobj, "mode"):
1070 self.mode = fileobj.mode
1071 self._extfileobj = True
Lars Gustäbela9bad982007-08-28 12:33:15 +00001072 self.name = os.path.abspath(name) if name else None
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001073 self.fileobj = fileobj
1074
1075 # Init datastructures
Georg Brandl38c6a222006-05-10 16:26:03 +00001076 self.closed = False
1077 self.members = [] # list of members as TarInfo objects
1078 self._loaded = False # flag if all members have been read
Lars Gustäbel7cc9c8b2007-12-01 21:06:06 +00001079 self.offset = self.fileobj.tell()
1080 # current position in the archive file
Georg Brandl38c6a222006-05-10 16:26:03 +00001081 self.inodes = {} # dictionary caching the inodes of
1082 # archive members already added
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001083
1084 if self._mode == "r":
1085 self.firstmember = None
1086 self.firstmember = self.next()
1087
1088 if self._mode == "a":
1089 # Move to the end of the archive,
1090 # before the first empty block.
1091 self.firstmember = None
1092 while True:
1093 try:
1094 tarinfo = self.next()
1095 except ReadError:
1096 self.fileobj.seek(0)
1097 break
1098 if tarinfo is None:
1099 self.fileobj.seek(- BLOCKSIZE, 1)
1100 break
1101
1102 if self._mode in "aw":
1103 self._loaded = True
1104
1105 #--------------------------------------------------------------------------
1106 # Below are the classmethods which act as alternate constructors to the
1107 # TarFile class. The open() method is the only one that is needed for
1108 # public use; it is the "super"-constructor and is able to select an
1109 # adequate "sub"-constructor for a particular compression using the mapping
1110 # from OPEN_METH.
1111 #
1112 # This concept allows one to subclass TarFile without losing the comfort of
1113 # the super-constructor. A sub-constructor is registered and made available
1114 # by adding it to the mapping in OPEN_METH.
1115
Guido van Rossum75b64e62005-01-16 00:16:11 +00001116 @classmethod
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001117 def open(cls, name=None, mode="r", fileobj=None, bufsize=20*512):
1118 """Open a tar archive for reading, writing or appending. Return
1119 an appropriate TarFile class.
1120
1121 mode:
Martin v. Löwis78be7df2005-03-05 12:47:42 +00001122 'r' or 'r:*' open for reading with transparent compression
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001123 'r:' open for reading exclusively uncompressed
1124 'r:gz' open for reading with gzip compression
1125 'r:bz2' open for reading with bzip2 compression
1126 'a' or 'a:' open for appending
1127 'w' or 'w:' open for writing without compression
1128 'w:gz' open for writing with gzip compression
1129 'w:bz2' open for writing with bzip2 compression
Martin v. Löwis78be7df2005-03-05 12:47:42 +00001130
1131 'r|*' open a stream of tar blocks with transparent compression
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001132 'r|' open an uncompressed stream of tar blocks for reading
1133 'r|gz' open a gzip compressed stream of tar blocks
1134 'r|bz2' open a bzip2 compressed stream of tar blocks
1135 'w|' open an uncompressed stream for writing
1136 'w|gz' open a gzip compressed stream for writing
1137 'w|bz2' open a bzip2 compressed stream for writing
1138 """
1139
1140 if not name and not fileobj:
Georg Brandle4751e32006-05-18 06:11:19 +00001141 raise ValueError("nothing to open")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001142
Martin v. Löwis78be7df2005-03-05 12:47:42 +00001143 if mode in ("r", "r:*"):
1144 # Find out which *open() is appropriate for opening the file.
1145 for comptype in cls.OPEN_METH:
1146 func = getattr(cls, cls.OPEN_METH[comptype])
Lars Gustäbelf9a2c632006-12-27 10:36:58 +00001147 if fileobj is not None:
1148 saved_pos = fileobj.tell()
Martin v. Löwis78be7df2005-03-05 12:47:42 +00001149 try:
1150 return func(name, "r", fileobj)
1151 except (ReadError, CompressionError):
Lars Gustäbelf9a2c632006-12-27 10:36:58 +00001152 if fileobj is not None:
1153 fileobj.seek(saved_pos)
Martin v. Löwis78be7df2005-03-05 12:47:42 +00001154 continue
Georg Brandle4751e32006-05-18 06:11:19 +00001155 raise ReadError("file could not be opened successfully")
Martin v. Löwis78be7df2005-03-05 12:47:42 +00001156
1157 elif ":" in mode:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001158 filemode, comptype = mode.split(":", 1)
1159 filemode = filemode or "r"
1160 comptype = comptype or "tar"
1161
1162 # Select the *open() function according to
1163 # given compression.
1164 if comptype in cls.OPEN_METH:
1165 func = getattr(cls, cls.OPEN_METH[comptype])
1166 else:
Georg Brandle4751e32006-05-18 06:11:19 +00001167 raise CompressionError("unknown compression type %r" % comptype)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001168 return func(name, filemode, fileobj)
1169
1170 elif "|" in mode:
1171 filemode, comptype = mode.split("|", 1)
1172 filemode = filemode or "r"
1173 comptype = comptype or "tar"
1174
1175 if filemode not in "rw":
Georg Brandle4751e32006-05-18 06:11:19 +00001176 raise ValueError("mode must be 'r' or 'w'")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001177
1178 t = cls(name, filemode,
1179 _Stream(name, filemode, comptype, fileobj, bufsize))
1180 t._extfileobj = False
1181 return t
1182
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001183 elif mode in "aw":
1184 return cls.taropen(name, mode, fileobj)
1185
Georg Brandle4751e32006-05-18 06:11:19 +00001186 raise ValueError("undiscernible mode")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001187
Guido van Rossum75b64e62005-01-16 00:16:11 +00001188 @classmethod
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001189 def taropen(cls, name, mode="r", fileobj=None):
1190 """Open uncompressed tar archive name for reading or writing.
1191 """
1192 if len(mode) > 1 or mode not in "raw":
Georg Brandle4751e32006-05-18 06:11:19 +00001193 raise ValueError("mode must be 'r', 'a' or 'w'")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001194 return cls(name, mode, fileobj)
1195
Guido van Rossum75b64e62005-01-16 00:16:11 +00001196 @classmethod
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001197 def gzopen(cls, name, mode="r", fileobj=None, compresslevel=9):
1198 """Open gzip compressed tar archive name for reading or writing.
1199 Appending is not allowed.
1200 """
1201 if len(mode) > 1 or mode not in "rw":
Georg Brandle4751e32006-05-18 06:11:19 +00001202 raise ValueError("mode must be 'r' or 'w'")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001203
1204 try:
1205 import gzip
Neal Norwitz4ec68242003-04-11 03:05:56 +00001206 gzip.GzipFile
1207 except (ImportError, AttributeError):
Georg Brandle4751e32006-05-18 06:11:19 +00001208 raise CompressionError("gzip module is not available")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001209
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001210 if fileobj is None:
1211 fileobj = file(name, mode + "b")
1212
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001213 try:
Lars Gustäbel12e087a2006-12-23 18:13:57 +00001214 t = cls.taropen(name, mode,
1215 gzip.GzipFile(name, mode, compresslevel, fileobj))
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001216 except IOError:
Georg Brandle4751e32006-05-18 06:11:19 +00001217 raise ReadError("not a gzip file")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001218 t._extfileobj = False
1219 return t
1220
Guido van Rossum75b64e62005-01-16 00:16:11 +00001221 @classmethod
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001222 def bz2open(cls, name, mode="r", fileobj=None, compresslevel=9):
1223 """Open bzip2 compressed tar archive name for reading or writing.
1224 Appending is not allowed.
1225 """
1226 if len(mode) > 1 or mode not in "rw":
Georg Brandle4751e32006-05-18 06:11:19 +00001227 raise ValueError("mode must be 'r' or 'w'.")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001228
1229 try:
1230 import bz2
1231 except ImportError:
Georg Brandle4751e32006-05-18 06:11:19 +00001232 raise CompressionError("bz2 module is not available")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001233
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001234 if fileobj is not None:
Georg Brandl49c8f4c2006-05-15 19:30:35 +00001235 fileobj = _BZ2Proxy(fileobj, mode)
1236 else:
1237 fileobj = bz2.BZ2File(name, mode, compresslevel=compresslevel)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001238
1239 try:
Lars Gustäbel12e087a2006-12-23 18:13:57 +00001240 t = cls.taropen(name, mode, fileobj)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001241 except IOError:
Georg Brandle4751e32006-05-18 06:11:19 +00001242 raise ReadError("not a bzip2 file")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001243 t._extfileobj = False
1244 return t
1245
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001246 # All *open() methods are registered here.
1247 OPEN_METH = {
1248 "tar": "taropen", # uncompressed tar
1249 "gz": "gzopen", # gzip compressed tar
1250 "bz2": "bz2open" # bzip2 compressed tar
1251 }
1252
1253 #--------------------------------------------------------------------------
1254 # The public methods which TarFile provides:
1255
1256 def close(self):
1257 """Close the TarFile. In write-mode, two finishing zero blocks are
1258 appended to the archive.
1259 """
1260 if self.closed:
1261 return
1262
1263 if self._mode in "aw":
1264 self.fileobj.write(NUL * (BLOCKSIZE * 2))
1265 self.offset += (BLOCKSIZE * 2)
1266 # fill up the end with zero-blocks
1267 # (like option -b20 for tar does)
1268 blocks, remainder = divmod(self.offset, RECORDSIZE)
1269 if remainder > 0:
1270 self.fileobj.write(NUL * (RECORDSIZE - remainder))
1271
1272 if not self._extfileobj:
1273 self.fileobj.close()
1274 self.closed = True
1275
1276 def getmember(self, name):
1277 """Return a TarInfo object for member `name'. If `name' can not be
1278 found in the archive, KeyError is raised. If a member occurs more
1279 than once in the archive, its last occurence is assumed to be the
1280 most up-to-date version.
1281 """
Martin v. Löwisf3c56112004-09-18 09:08:52 +00001282 tarinfo = self._getmember(name)
1283 if tarinfo is None:
Georg Brandle4751e32006-05-18 06:11:19 +00001284 raise KeyError("filename %r not found" % name)
Martin v. Löwisf3c56112004-09-18 09:08:52 +00001285 return tarinfo
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001286
1287 def getmembers(self):
1288 """Return the members of the archive as a list of TarInfo objects. The
1289 list has the same order as the members in the archive.
1290 """
1291 self._check()
1292 if not self._loaded: # if we want to obtain a list of
1293 self._load() # all members, we first have to
1294 # scan the whole archive.
1295 return self.members
1296
1297 def getnames(self):
1298 """Return the members of the archive as a list of their names. It has
1299 the same order as the list returned by getmembers().
1300 """
Martin v. Löwisf3c56112004-09-18 09:08:52 +00001301 return [tarinfo.name for tarinfo in self.getmembers()]
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001302
1303 def gettarinfo(self, name=None, arcname=None, fileobj=None):
1304 """Create a TarInfo object for either the file `name' or the file
1305 object `fileobj' (using os.fstat on its file descriptor). You can
1306 modify some of the TarInfo's attributes before you add it using
1307 addfile(). If given, `arcname' specifies an alternative name for the
1308 file in the archive.
1309 """
1310 self._check("aw")
1311
1312 # When fileobj is given, replace name by
1313 # fileobj's real name.
1314 if fileobj is not None:
1315 name = fileobj.name
1316
1317 # Building the name of the member in the archive.
1318 # Backward slashes are converted to forward slashes,
1319 # Absolute paths are turned to relative paths.
1320 if arcname is None:
1321 arcname = name
1322 arcname = normpath(arcname)
1323 drv, arcname = os.path.splitdrive(arcname)
1324 while arcname[0:1] == "/":
1325 arcname = arcname[1:]
1326
1327 # Now, fill the TarInfo object with
1328 # information specific for the file.
1329 tarinfo = TarInfo()
1330
1331 # Use os.stat or os.lstat, depending on platform
1332 # and if symlinks shall be resolved.
1333 if fileobj is None:
1334 if hasattr(os, "lstat") and not self.dereference:
1335 statres = os.lstat(name)
1336 else:
1337 statres = os.stat(name)
1338 else:
1339 statres = os.fstat(fileobj.fileno())
1340 linkname = ""
1341
1342 stmd = statres.st_mode
1343 if stat.S_ISREG(stmd):
1344 inode = (statres.st_ino, statres.st_dev)
Neal Norwitzb0e32e22005-10-20 04:50:13 +00001345 if not self.dereference and \
1346 statres.st_nlink > 1 and inode in self.inodes:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001347 # Is it a hardlink to an already
1348 # archived file?
1349 type = LNKTYPE
1350 linkname = self.inodes[inode]
1351 else:
1352 # The inode is added only if its valid.
1353 # For win32 it is always 0.
1354 type = REGTYPE
1355 if inode[0]:
1356 self.inodes[inode] = arcname
1357 elif stat.S_ISDIR(stmd):
1358 type = DIRTYPE
1359 if arcname[-1:] != "/":
1360 arcname += "/"
1361 elif stat.S_ISFIFO(stmd):
1362 type = FIFOTYPE
1363 elif stat.S_ISLNK(stmd):
1364 type = SYMTYPE
1365 linkname = os.readlink(name)
1366 elif stat.S_ISCHR(stmd):
1367 type = CHRTYPE
1368 elif stat.S_ISBLK(stmd):
1369 type = BLKTYPE
1370 else:
1371 return None
1372
1373 # Fill the TarInfo object with all
1374 # information we can get.
Martin v. Löwis5dbdc592005-08-27 10:07:56 +00001375 tarinfo.name = arcname
1376 tarinfo.mode = stmd
1377 tarinfo.uid = statres.st_uid
1378 tarinfo.gid = statres.st_gid
1379 if stat.S_ISREG(stmd):
Martin v. Löwis61d77e02004-08-20 06:35:46 +00001380 tarinfo.size = statres.st_size
Martin v. Löwis5dbdc592005-08-27 10:07:56 +00001381 else:
1382 tarinfo.size = 0L
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001383 tarinfo.mtime = statres.st_mtime
Martin v. Löwis5dbdc592005-08-27 10:07:56 +00001384 tarinfo.type = type
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001385 tarinfo.linkname = linkname
1386 if pwd:
1387 try:
1388 tarinfo.uname = pwd.getpwuid(tarinfo.uid)[0]
1389 except KeyError:
1390 pass
1391 if grp:
1392 try:
1393 tarinfo.gname = grp.getgrgid(tarinfo.gid)[0]
1394 except KeyError:
1395 pass
1396
1397 if type in (CHRTYPE, BLKTYPE):
1398 if hasattr(os, "major") and hasattr(os, "minor"):
1399 tarinfo.devmajor = os.major(statres.st_rdev)
1400 tarinfo.devminor = os.minor(statres.st_rdev)
1401 return tarinfo
1402
1403 def list(self, verbose=True):
1404 """Print a table of contents to sys.stdout. If `verbose' is False, only
1405 the names of the members are printed. If it is True, an `ls -l'-like
1406 output is produced.
1407 """
1408 self._check()
1409
1410 for tarinfo in self:
1411 if verbose:
1412 print filemode(tarinfo.mode),
1413 print "%s/%s" % (tarinfo.uname or tarinfo.uid,
1414 tarinfo.gname or tarinfo.gid),
1415 if tarinfo.ischr() or tarinfo.isblk():
1416 print "%10s" % ("%d,%d" \
1417 % (tarinfo.devmajor, tarinfo.devminor)),
1418 else:
1419 print "%10d" % tarinfo.size,
1420 print "%d-%02d-%02d %02d:%02d:%02d" \
1421 % time.localtime(tarinfo.mtime)[:6],
1422
1423 print tarinfo.name,
1424
1425 if verbose:
1426 if tarinfo.issym():
1427 print "->", tarinfo.linkname,
1428 if tarinfo.islnk():
1429 print "link to", tarinfo.linkname,
1430 print
1431
1432 def add(self, name, arcname=None, recursive=True):
1433 """Add the file `name' to the archive. `name' may be any type of file
1434 (directory, fifo, symbolic link, etc.). If given, `arcname'
1435 specifies an alternative name for the file in the archive.
1436 Directories are added recursively by default. This can be avoided by
1437 setting `recursive' to False.
1438 """
1439 self._check("aw")
1440
1441 if arcname is None:
1442 arcname = name
1443
1444 # Skip if somebody tries to archive the archive...
Lars Gustäbel12e087a2006-12-23 18:13:57 +00001445 if self.name is not None and os.path.abspath(name) == self.name:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001446 self._dbg(2, "tarfile: Skipped %r" % name)
1447 return
1448
1449 # Special case: The user wants to add the current
1450 # working directory.
1451 if name == ".":
1452 if recursive:
1453 if arcname == ".":
1454 arcname = ""
1455 for f in os.listdir("."):
1456 self.add(f, os.path.join(arcname, f))
1457 return
1458
1459 self._dbg(1, name)
1460
1461 # Create a TarInfo object from the file.
1462 tarinfo = self.gettarinfo(name, arcname)
1463
1464 if tarinfo is None:
1465 self._dbg(1, "tarfile: Unsupported type %r" % name)
1466 return
1467
1468 # Append the tar header and data to the archive.
1469 if tarinfo.isreg():
1470 f = file(name, "rb")
1471 self.addfile(tarinfo, f)
1472 f.close()
1473
Martin v. Löwis5dbdc592005-08-27 10:07:56 +00001474 elif tarinfo.isdir():
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001475 self.addfile(tarinfo)
1476 if recursive:
1477 for f in os.listdir(name):
1478 self.add(os.path.join(name, f), os.path.join(arcname, f))
1479
Martin v. Löwis5dbdc592005-08-27 10:07:56 +00001480 else:
1481 self.addfile(tarinfo)
1482
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001483 def addfile(self, tarinfo, fileobj=None):
1484 """Add the TarInfo object `tarinfo' to the archive. If `fileobj' is
1485 given, tarinfo.size bytes are read from it and added to the archive.
1486 You can create TarInfo objects using gettarinfo().
1487 On Windows platforms, `fileobj' should always be opened with mode
1488 'rb' to avoid irritation about the file size.
1489 """
1490 self._check("aw")
1491
Georg Brandl2527f7f2006-10-29 09:16:15 +00001492 tarinfo = copy.copy(tarinfo)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001493
Georg Brandl2527f7f2006-10-29 09:16:15 +00001494 buf = tarinfo.tobuf(self.posix)
1495 self.fileobj.write(buf)
1496 self.offset += len(buf)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001497
1498 # If there's data to follow, append it.
1499 if fileobj is not None:
1500 copyfileobj(fileobj, self.fileobj, tarinfo.size)
1501 blocks, remainder = divmod(tarinfo.size, BLOCKSIZE)
1502 if remainder > 0:
1503 self.fileobj.write(NUL * (BLOCKSIZE - remainder))
1504 blocks += 1
1505 self.offset += blocks * BLOCKSIZE
1506
Martin v. Löwisf3c56112004-09-18 09:08:52 +00001507 self.members.append(tarinfo)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001508
Martin v. Löwis00a73e72005-03-04 19:40:34 +00001509 def extractall(self, path=".", members=None):
1510 """Extract all members from the archive to the current working
1511 directory and set owner, modification time and permissions on
1512 directories afterwards. `path' specifies a different directory
1513 to extract to. `members' is optional and must be a subset of the
1514 list returned by getmembers().
1515 """
1516 directories = []
1517
1518 if members is None:
1519 members = self
1520
1521 for tarinfo in members:
1522 if tarinfo.isdir():
Lars Gustäbel42993fe2008-02-05 12:00:20 +00001523 # Extract directories with a safe mode.
Martin v. Löwis00a73e72005-03-04 19:40:34 +00001524 directories.append(tarinfo)
Lars Gustäbel42993fe2008-02-05 12:00:20 +00001525 tarinfo = copy.copy(tarinfo)
1526 tarinfo.mode = 0700
1527 self.extract(tarinfo, path)
Martin v. Löwis00a73e72005-03-04 19:40:34 +00001528
1529 # Reverse sort directories.
1530 directories.sort(lambda a, b: cmp(a.name, b.name))
1531 directories.reverse()
1532
1533 # Set correct owner, mtime and filemode on directories.
1534 for tarinfo in directories:
Lars Gustäbele5f9e582008-01-04 14:44:23 +00001535 dirpath = os.path.join(path, tarinfo.name)
Martin v. Löwis00a73e72005-03-04 19:40:34 +00001536 try:
Lars Gustäbele5f9e582008-01-04 14:44:23 +00001537 self.chown(tarinfo, dirpath)
1538 self.utime(tarinfo, dirpath)
1539 self.chmod(tarinfo, dirpath)
Martin v. Löwis00a73e72005-03-04 19:40:34 +00001540 except ExtractError, e:
1541 if self.errorlevel > 1:
1542 raise
1543 else:
1544 self._dbg(1, "tarfile: %s" % e)
1545
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001546 def extract(self, member, path=""):
1547 """Extract a member from the archive to the current working directory,
1548 using its full name. Its file information is extracted as accurately
1549 as possible. `member' may be a filename or a TarInfo object. You can
1550 specify a different directory using `path'.
1551 """
1552 self._check("r")
1553
1554 if isinstance(member, TarInfo):
1555 tarinfo = member
1556 else:
1557 tarinfo = self.getmember(member)
1558
Neal Norwitza4f651a2004-07-20 22:07:44 +00001559 # Prepare the link target for makelink().
1560 if tarinfo.islnk():
1561 tarinfo._link_target = os.path.join(path, tarinfo.linkname)
1562
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001563 try:
1564 self._extract_member(tarinfo, os.path.join(path, tarinfo.name))
1565 except EnvironmentError, e:
1566 if self.errorlevel > 0:
1567 raise
1568 else:
1569 if e.filename is None:
1570 self._dbg(1, "tarfile: %s" % e.strerror)
1571 else:
1572 self._dbg(1, "tarfile: %s %r" % (e.strerror, e.filename))
1573 except ExtractError, e:
1574 if self.errorlevel > 1:
1575 raise
1576 else:
1577 self._dbg(1, "tarfile: %s" % e)
1578
1579 def extractfile(self, member):
1580 """Extract a member from the archive as a file object. `member' may be
1581 a filename or a TarInfo object. If `member' is a regular file, a
1582 file-like object is returned. If `member' is a link, a file-like
1583 object is constructed from the link's target. If `member' is none of
1584 the above, None is returned.
1585 The file-like object is read-only and provides the following
1586 methods: read(), readline(), readlines(), seek() and tell()
1587 """
1588 self._check("r")
1589
1590 if isinstance(member, TarInfo):
1591 tarinfo = member
1592 else:
1593 tarinfo = self.getmember(member)
1594
1595 if tarinfo.isreg():
1596 return self.fileobject(self, tarinfo)
1597
1598 elif tarinfo.type not in SUPPORTED_TYPES:
1599 # If a member's type is unknown, it is treated as a
1600 # regular file.
1601 return self.fileobject(self, tarinfo)
1602
1603 elif tarinfo.islnk() or tarinfo.issym():
1604 if isinstance(self.fileobj, _Stream):
1605 # A small but ugly workaround for the case that someone tries
1606 # to extract a (sym)link as a file-object from a non-seekable
1607 # stream of tar blocks.
Georg Brandle4751e32006-05-18 06:11:19 +00001608 raise StreamError("cannot extract (sym)link as file object")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001609 else:
Georg Brandl7eb4b7d2005-07-22 21:49:32 +00001610 # A (sym)link's file object is its target's file object.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001611 return self.extractfile(self._getmember(tarinfo.linkname,
1612 tarinfo))
1613 else:
1614 # If there's no data associated with the member (directory, chrdev,
1615 # blkdev, etc.), return None instead of a file object.
1616 return None
1617
1618 def _extract_member(self, tarinfo, targetpath):
1619 """Extract the TarInfo object tarinfo to a physical
1620 file called targetpath.
1621 """
1622 # Fetch the TarInfo object for the given name
1623 # and build the destination pathname, replacing
1624 # forward slashes to platform specific separators.
1625 if targetpath[-1:] == "/":
1626 targetpath = targetpath[:-1]
1627 targetpath = os.path.normpath(targetpath)
1628
1629 # Create all upper directories.
1630 upperdirs = os.path.dirname(targetpath)
1631 if upperdirs and not os.path.exists(upperdirs):
Lars Gustäbel42993fe2008-02-05 12:00:20 +00001632 # Create directories that are not part of the archive with
1633 # default permissions.
1634 os.makedirs(upperdirs)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001635
1636 if tarinfo.islnk() or tarinfo.issym():
1637 self._dbg(1, "%s -> %s" % (tarinfo.name, tarinfo.linkname))
1638 else:
1639 self._dbg(1, tarinfo.name)
1640
1641 if tarinfo.isreg():
1642 self.makefile(tarinfo, targetpath)
1643 elif tarinfo.isdir():
1644 self.makedir(tarinfo, targetpath)
1645 elif tarinfo.isfifo():
1646 self.makefifo(tarinfo, targetpath)
1647 elif tarinfo.ischr() or tarinfo.isblk():
1648 self.makedev(tarinfo, targetpath)
1649 elif tarinfo.islnk() or tarinfo.issym():
1650 self.makelink(tarinfo, targetpath)
1651 elif tarinfo.type not in SUPPORTED_TYPES:
1652 self.makeunknown(tarinfo, targetpath)
1653 else:
1654 self.makefile(tarinfo, targetpath)
1655
1656 self.chown(tarinfo, targetpath)
1657 if not tarinfo.issym():
1658 self.chmod(tarinfo, targetpath)
1659 self.utime(tarinfo, targetpath)
1660
1661 #--------------------------------------------------------------------------
1662 # Below are the different file methods. They are called via
1663 # _extract_member() when extract() is called. They can be replaced in a
1664 # subclass to implement other functionality.
1665
1666 def makedir(self, tarinfo, targetpath):
1667 """Make a directory called targetpath.
1668 """
1669 try:
Lars Gustäbel42993fe2008-02-05 12:00:20 +00001670 # Use a safe mode for the directory, the real mode is set
1671 # later in _extract_member().
1672 os.mkdir(targetpath, 0700)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001673 except EnvironmentError, e:
1674 if e.errno != errno.EEXIST:
1675 raise
1676
1677 def makefile(self, tarinfo, targetpath):
1678 """Make a file called targetpath.
1679 """
1680 source = self.extractfile(tarinfo)
1681 target = file(targetpath, "wb")
1682 copyfileobj(source, target)
1683 source.close()
1684 target.close()
1685
1686 def makeunknown(self, tarinfo, targetpath):
1687 """Make a file from a TarInfo object with an unknown type
1688 at targetpath.
1689 """
1690 self.makefile(tarinfo, targetpath)
1691 self._dbg(1, "tarfile: Unknown file type %r, " \
1692 "extracted as regular file." % tarinfo.type)
1693
1694 def makefifo(self, tarinfo, targetpath):
1695 """Make a fifo called targetpath.
1696 """
1697 if hasattr(os, "mkfifo"):
1698 os.mkfifo(targetpath)
1699 else:
Georg Brandle4751e32006-05-18 06:11:19 +00001700 raise ExtractError("fifo not supported by system")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001701
1702 def makedev(self, tarinfo, targetpath):
1703 """Make a character or block device called targetpath.
1704 """
1705 if not hasattr(os, "mknod") or not hasattr(os, "makedev"):
Georg Brandle4751e32006-05-18 06:11:19 +00001706 raise ExtractError("special devices not supported by system")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001707
1708 mode = tarinfo.mode
1709 if tarinfo.isblk():
1710 mode |= stat.S_IFBLK
1711 else:
1712 mode |= stat.S_IFCHR
1713
1714 os.mknod(targetpath, mode,
1715 os.makedev(tarinfo.devmajor, tarinfo.devminor))
1716
1717 def makelink(self, tarinfo, targetpath):
1718 """Make a (symbolic) link called targetpath. If it cannot be created
1719 (platform limitation), we try to make a copy of the referenced file
1720 instead of a link.
1721 """
1722 linkpath = tarinfo.linkname
1723 try:
1724 if tarinfo.issym():
1725 os.symlink(linkpath, targetpath)
1726 else:
Neal Norwitza4f651a2004-07-20 22:07:44 +00001727 # See extract().
1728 os.link(tarinfo._link_target, targetpath)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001729 except AttributeError:
1730 if tarinfo.issym():
1731 linkpath = os.path.join(os.path.dirname(tarinfo.name),
1732 linkpath)
1733 linkpath = normpath(linkpath)
1734
1735 try:
1736 self._extract_member(self.getmember(linkpath), targetpath)
1737 except (EnvironmentError, KeyError), e:
1738 linkpath = os.path.normpath(linkpath)
1739 try:
1740 shutil.copy2(linkpath, targetpath)
1741 except EnvironmentError, e:
Georg Brandle4751e32006-05-18 06:11:19 +00001742 raise IOError("link could not be created")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001743
1744 def chown(self, tarinfo, targetpath):
1745 """Set owner of targetpath according to tarinfo.
1746 """
1747 if pwd and hasattr(os, "geteuid") and os.geteuid() == 0:
1748 # We have to be root to do so.
1749 try:
1750 g = grp.getgrnam(tarinfo.gname)[2]
1751 except KeyError:
1752 try:
1753 g = grp.getgrgid(tarinfo.gid)[2]
1754 except KeyError:
1755 g = os.getgid()
1756 try:
1757 u = pwd.getpwnam(tarinfo.uname)[2]
1758 except KeyError:
1759 try:
1760 u = pwd.getpwuid(tarinfo.uid)[2]
1761 except KeyError:
1762 u = os.getuid()
1763 try:
1764 if tarinfo.issym() and hasattr(os, "lchown"):
1765 os.lchown(targetpath, u, g)
1766 else:
Andrew MacIntyre7970d202003-02-19 12:51:34 +00001767 if sys.platform != "os2emx":
1768 os.chown(targetpath, u, g)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001769 except EnvironmentError, e:
Georg Brandle4751e32006-05-18 06:11:19 +00001770 raise ExtractError("could not change owner")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001771
1772 def chmod(self, tarinfo, targetpath):
1773 """Set file permissions of targetpath according to tarinfo.
1774 """
Jack Jansen834eff62003-03-07 12:47:06 +00001775 if hasattr(os, 'chmod'):
1776 try:
1777 os.chmod(targetpath, tarinfo.mode)
1778 except EnvironmentError, e:
Georg Brandle4751e32006-05-18 06:11:19 +00001779 raise ExtractError("could not change mode")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001780
1781 def utime(self, tarinfo, targetpath):
1782 """Set modification time of targetpath according to tarinfo.
1783 """
Jack Jansen834eff62003-03-07 12:47:06 +00001784 if not hasattr(os, 'utime'):
Tim Petersf9347782003-03-07 15:36:41 +00001785 return
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001786 if sys.platform == "win32" and tarinfo.isdir():
1787 # According to msdn.microsoft.com, it is an error (EACCES)
1788 # to use utime() on directories.
1789 return
1790 try:
1791 os.utime(targetpath, (tarinfo.mtime, tarinfo.mtime))
1792 except EnvironmentError, e:
Georg Brandle4751e32006-05-18 06:11:19 +00001793 raise ExtractError("could not change modification time")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001794
1795 #--------------------------------------------------------------------------
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001796 def next(self):
1797 """Return the next member of the archive as a TarInfo object, when
1798 TarFile is opened for reading. Return None if there is no more
1799 available.
1800 """
1801 self._check("ra")
1802 if self.firstmember is not None:
1803 m = self.firstmember
1804 self.firstmember = None
1805 return m
1806
1807 # Read the next block.
Andrew M. Kuchling864bba12004-07-10 22:02:11 +00001808 self.fileobj.seek(self.offset)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001809 while True:
1810 buf = self.fileobj.read(BLOCKSIZE)
1811 if not buf:
1812 return None
Georg Brandl38c6a222006-05-10 16:26:03 +00001813
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001814 try:
1815 tarinfo = TarInfo.frombuf(buf)
Tim Peters8a299d22006-05-19 19:16:34 +00001816
Georg Brandl38c6a222006-05-10 16:26:03 +00001817 # Set the TarInfo object's offset to the current position of the
1818 # TarFile and set self.offset to the position where the data blocks
1819 # should begin.
1820 tarinfo.offset = self.offset
1821 self.offset += BLOCKSIZE
1822
1823 tarinfo = self.proc_member(tarinfo)
1824
1825 except ValueError, e:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001826 if self.ignore_zeros:
Georg Brandle4751e32006-05-18 06:11:19 +00001827 self._dbg(2, "0x%X: empty or invalid block: %s" %
1828 (self.offset, e))
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001829 self.offset += BLOCKSIZE
1830 continue
1831 else:
Andrew M. Kuchling864bba12004-07-10 22:02:11 +00001832 if self.offset == 0:
Georg Brandle4751e32006-05-18 06:11:19 +00001833 raise ReadError("empty, unreadable or compressed "
1834 "file: %s" % e)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001835 return None
1836 break
1837
Georg Brandl38c6a222006-05-10 16:26:03 +00001838 # Some old tar programs represent a directory as a regular
1839 # file with a trailing slash.
1840 if tarinfo.isreg() and tarinfo.name.endswith("/"):
1841 tarinfo.type = DIRTYPE
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001842
Georg Brandl38c6a222006-05-10 16:26:03 +00001843 # Directory names should have a '/' at the end.
Lars Gustäbeld2201442007-04-20 14:49:02 +00001844 if tarinfo.isdir() and not tarinfo.name.endswith("/"):
Georg Brandl38c6a222006-05-10 16:26:03 +00001845 tarinfo.name += "/"
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001846
Martin v. Löwisf3c56112004-09-18 09:08:52 +00001847 self.members.append(tarinfo)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001848 return tarinfo
1849
1850 #--------------------------------------------------------------------------
Georg Brandl38c6a222006-05-10 16:26:03 +00001851 # The following are methods that are called depending on the type of a
1852 # member. The entry point is proc_member() which is called with a TarInfo
1853 # object created from the header block from the current offset. The
1854 # proc_member() method can be overridden in a subclass to add custom
1855 # proc_*() methods. A proc_*() method MUST implement the following
1856 # operations:
1857 # 1. Set tarinfo.offset_data to the position where the data blocks begin,
1858 # if there is data that follows.
1859 # 2. Set self.offset to the position where the next member's header will
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001860 # begin.
Georg Brandl38c6a222006-05-10 16:26:03 +00001861 # 3. Return tarinfo or another valid TarInfo object.
1862 def proc_member(self, tarinfo):
1863 """Choose the right processing method for tarinfo depending
1864 on its type and call it.
1865 """
1866 if tarinfo.type in (GNUTYPE_LONGNAME, GNUTYPE_LONGLINK):
1867 return self.proc_gnulong(tarinfo)
1868 elif tarinfo.type == GNUTYPE_SPARSE:
1869 return self.proc_sparse(tarinfo)
1870 else:
1871 return self.proc_builtin(tarinfo)
1872
1873 def proc_builtin(self, tarinfo):
1874 """Process a builtin type member or an unknown member
1875 which will be treated as a regular file.
1876 """
1877 tarinfo.offset_data = self.offset
1878 if tarinfo.isreg() or tarinfo.type not in SUPPORTED_TYPES:
1879 # Skip the following data blocks.
1880 self.offset += self._block(tarinfo.size)
1881 return tarinfo
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001882
1883 def proc_gnulong(self, tarinfo):
Georg Brandl38c6a222006-05-10 16:26:03 +00001884 """Process the blocks that hold a GNU longname
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001885 or longlink member.
1886 """
1887 buf = ""
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001888 count = tarinfo.size
1889 while count > 0:
1890 block = self.fileobj.read(BLOCKSIZE)
1891 buf += block
1892 self.offset += BLOCKSIZE
1893 count -= BLOCKSIZE
1894
Georg Brandl38c6a222006-05-10 16:26:03 +00001895 # Fetch the next header and process it.
1896 b = self.fileobj.read(BLOCKSIZE)
1897 t = TarInfo.frombuf(b)
1898 t.offset = self.offset
1899 self.offset += BLOCKSIZE
1900 next = self.proc_member(t)
Andrew M. Kuchling864bba12004-07-10 22:02:11 +00001901
Georg Brandl38c6a222006-05-10 16:26:03 +00001902 # Patch the TarInfo object from the next header with
1903 # the longname information.
Andrew M. Kuchling864bba12004-07-10 22:02:11 +00001904 next.offset = tarinfo.offset
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001905 if tarinfo.type == GNUTYPE_LONGNAME:
Lars Gustäbel08303db2008-02-11 18:36:07 +00001906 next.name = nts(buf)
Andrew M. Kuchling864bba12004-07-10 22:02:11 +00001907 elif tarinfo.type == GNUTYPE_LONGLINK:
Lars Gustäbel08303db2008-02-11 18:36:07 +00001908 next.linkname = nts(buf)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001909
Andrew M. Kuchling864bba12004-07-10 22:02:11 +00001910 return next
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001911
1912 def proc_sparse(self, tarinfo):
Georg Brandl38c6a222006-05-10 16:26:03 +00001913 """Process a GNU sparse header plus extra headers.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001914 """
Georg Brandl38c6a222006-05-10 16:26:03 +00001915 buf = tarinfo.buf
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001916 sp = _ringbuffer()
1917 pos = 386
1918 lastpos = 0L
1919 realpos = 0L
1920 # There are 4 possible sparse structs in the
1921 # first header.
1922 for i in xrange(4):
1923 try:
Georg Brandl38c6a222006-05-10 16:26:03 +00001924 offset = nti(buf[pos:pos + 12])
1925 numbytes = nti(buf[pos + 12:pos + 24])
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001926 except ValueError:
1927 break
1928 if offset > lastpos:
1929 sp.append(_hole(lastpos, offset - lastpos))
1930 sp.append(_data(offset, numbytes, realpos))
1931 realpos += numbytes
1932 lastpos = offset + numbytes
1933 pos += 24
1934
1935 isextended = ord(buf[482])
Georg Brandl38c6a222006-05-10 16:26:03 +00001936 origsize = nti(buf[483:495])
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001937
1938 # If the isextended flag is given,
1939 # there are extra headers to process.
1940 while isextended == 1:
1941 buf = self.fileobj.read(BLOCKSIZE)
1942 self.offset += BLOCKSIZE
1943 pos = 0
1944 for i in xrange(21):
1945 try:
Georg Brandl38c6a222006-05-10 16:26:03 +00001946 offset = nti(buf[pos:pos + 12])
1947 numbytes = nti(buf[pos + 12:pos + 24])
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001948 except ValueError:
1949 break
1950 if offset > lastpos:
1951 sp.append(_hole(lastpos, offset - lastpos))
1952 sp.append(_data(offset, numbytes, realpos))
1953 realpos += numbytes
1954 lastpos = offset + numbytes
1955 pos += 24
1956 isextended = ord(buf[504])
1957
1958 if lastpos < origsize:
1959 sp.append(_hole(lastpos, origsize - lastpos))
1960
1961 tarinfo.sparse = sp
1962
1963 tarinfo.offset_data = self.offset
1964 self.offset += self._block(tarinfo.size)
1965 tarinfo.size = origsize
Andrew M. Kuchling864bba12004-07-10 22:02:11 +00001966
Georg Brandl38c6a222006-05-10 16:26:03 +00001967 return tarinfo
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001968
1969 #--------------------------------------------------------------------------
1970 # Little helper methods:
1971
1972 def _block(self, count):
1973 """Round up a byte count by BLOCKSIZE and return it,
1974 e.g. _block(834) => 1024.
1975 """
1976 blocks, remainder = divmod(count, BLOCKSIZE)
1977 if remainder:
1978 blocks += 1
1979 return blocks * BLOCKSIZE
1980
1981 def _getmember(self, name, tarinfo=None):
1982 """Find an archive member by name from bottom to top.
1983 If tarinfo is given, it is used as the starting point.
1984 """
Martin v. Löwisf3c56112004-09-18 09:08:52 +00001985 # Ensure that all members have been loaded.
1986 members = self.getmembers()
1987
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001988 if tarinfo is None:
Martin v. Löwisf3c56112004-09-18 09:08:52 +00001989 end = len(members)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001990 else:
Martin v. Löwisf3c56112004-09-18 09:08:52 +00001991 end = members.index(tarinfo)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001992
1993 for i in xrange(end - 1, -1, -1):
Martin v. Löwisf3c56112004-09-18 09:08:52 +00001994 if name == members[i].name:
1995 return members[i]
Andrew M. Kuchling864bba12004-07-10 22:02:11 +00001996
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001997 def _load(self):
1998 """Read through the entire archive file and look for readable
1999 members.
2000 """
2001 while True:
2002 tarinfo = self.next()
2003 if tarinfo is None:
2004 break
2005 self._loaded = True
2006
2007 def _check(self, mode=None):
2008 """Check if TarFile is still open, and if the operation's mode
2009 corresponds to TarFile's mode.
2010 """
2011 if self.closed:
Georg Brandle4751e32006-05-18 06:11:19 +00002012 raise IOError("%s is closed" % self.__class__.__name__)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002013 if mode is not None and self._mode not in mode:
Georg Brandle4751e32006-05-18 06:11:19 +00002014 raise IOError("bad operation for mode %r" % self._mode)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002015
2016 def __iter__(self):
2017 """Provide an iterator object.
2018 """
2019 if self._loaded:
2020 return iter(self.members)
2021 else:
2022 return TarIter(self)
2023
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002024 def _dbg(self, level, msg):
2025 """Write debugging output to sys.stderr.
2026 """
2027 if level <= self.debug:
2028 print >> sys.stderr, msg
2029# class TarFile
2030
2031class TarIter:
2032 """Iterator Class.
2033
2034 for tarinfo in TarFile(...):
2035 suite...
2036 """
2037
2038 def __init__(self, tarfile):
2039 """Construct a TarIter object.
2040 """
2041 self.tarfile = tarfile
Martin v. Löwis637431b2005-03-03 23:12:42 +00002042 self.index = 0
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002043 def __iter__(self):
2044 """Return iterator object.
2045 """
2046 return self
2047 def next(self):
2048 """Return the next item using TarFile's next() method.
2049 When all members have been read, set TarFile as _loaded.
2050 """
Martin v. Löwis637431b2005-03-03 23:12:42 +00002051 # Fix for SF #1100429: Under rare circumstances it can
2052 # happen that getmembers() is called during iteration,
2053 # which will cause TarIter to stop prematurely.
2054 if not self.tarfile._loaded:
2055 tarinfo = self.tarfile.next()
2056 if not tarinfo:
2057 self.tarfile._loaded = True
2058 raise StopIteration
2059 else:
2060 try:
2061 tarinfo = self.tarfile.members[self.index]
2062 except IndexError:
2063 raise StopIteration
2064 self.index += 1
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002065 return tarinfo
2066
2067# Helper classes for sparse file support
2068class _section:
2069 """Base class for _data and _hole.
2070 """
2071 def __init__(self, offset, size):
2072 self.offset = offset
2073 self.size = size
2074 def __contains__(self, offset):
2075 return self.offset <= offset < self.offset + self.size
2076
2077class _data(_section):
2078 """Represent a data section in a sparse file.
2079 """
2080 def __init__(self, offset, size, realpos):
2081 _section.__init__(self, offset, size)
2082 self.realpos = realpos
2083
2084class _hole(_section):
2085 """Represent a hole section in a sparse file.
2086 """
2087 pass
2088
2089class _ringbuffer(list):
2090 """Ringbuffer class which increases performance
2091 over a regular list.
2092 """
2093 def __init__(self):
2094 self.idx = 0
2095 def find(self, offset):
2096 idx = self.idx
2097 while True:
2098 item = self[idx]
2099 if offset in item:
2100 break
2101 idx += 1
2102 if idx == len(self):
2103 idx = 0
2104 if idx == self.idx:
2105 # End of File
2106 return None
2107 self.idx = idx
2108 return item
2109
2110#---------------------------------------------
2111# zipfile compatible TarFile class
2112#---------------------------------------------
2113TAR_PLAIN = 0 # zipfile.ZIP_STORED
2114TAR_GZIPPED = 8 # zipfile.ZIP_DEFLATED
2115class TarFileCompat:
2116 """TarFile class compatible with standard module zipfile's
2117 ZipFile class.
2118 """
2119 def __init__(self, file, mode="r", compression=TAR_PLAIN):
2120 if compression == TAR_PLAIN:
2121 self.tarfile = TarFile.taropen(file, mode)
2122 elif compression == TAR_GZIPPED:
2123 self.tarfile = TarFile.gzopen(file, mode)
2124 else:
Georg Brandle4751e32006-05-18 06:11:19 +00002125 raise ValueError("unknown compression constant")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002126 if mode[0:1] == "r":
2127 members = self.tarfile.getmembers()
Raymond Hettingera1d09e22005-09-11 16:34:05 +00002128 for m in members:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002129 m.filename = m.name
2130 m.file_size = m.size
2131 m.date_time = time.gmtime(m.mtime)[:6]
2132 def namelist(self):
2133 return map(lambda m: m.name, self.infolist())
2134 def infolist(self):
2135 return filter(lambda m: m.type in REGULAR_TYPES,
2136 self.tarfile.getmembers())
2137 def printdir(self):
2138 self.tarfile.list()
2139 def testzip(self):
2140 return
2141 def getinfo(self, name):
2142 return self.tarfile.getmember(name)
2143 def read(self, name):
2144 return self.tarfile.extractfile(self.tarfile.getmember(name)).read()
2145 def write(self, filename, arcname=None, compress_type=None):
2146 self.tarfile.add(filename, arcname)
2147 def writestr(self, zinfo, bytes):
Raymond Hettingera6172712004-12-31 19:15:26 +00002148 try:
2149 from cStringIO import StringIO
2150 except ImportError:
2151 from StringIO import StringIO
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002152 import calendar
2153 zinfo.name = zinfo.filename
2154 zinfo.size = zinfo.file_size
2155 zinfo.mtime = calendar.timegm(zinfo.date_time)
Raymond Hettingera6172712004-12-31 19:15:26 +00002156 self.tarfile.addfile(zinfo, StringIO(bytes))
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002157 def close(self):
2158 self.tarfile.close()
2159#class TarFileCompat
2160
2161#--------------------
2162# exported functions
2163#--------------------
2164def is_tarfile(name):
2165 """Return True if name points to a tar archive that we
2166 are able to handle, else return False.
2167 """
2168 try:
2169 t = open(name)
2170 t.close()
2171 return True
2172 except TarError:
2173 return False
2174
2175open = TarFile.open