blob: 5ad096dbf36beb559b2383f2cf7d3547562579b2 [file] [log] [blame]
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001#!/usr/bin/env python
2# -*- coding: iso-8859-1 -*-
3#-------------------------------------------------------------------
4# tarfile.py
5#-------------------------------------------------------------------
6# Copyright (C) 2002 Lars Gustäbel <lars@gustaebel.de>
7# All rights reserved.
8#
9# Permission is hereby granted, free of charge, to any person
10# obtaining a copy of this software and associated documentation
11# files (the "Software"), to deal in the Software without
12# restriction, including without limitation the rights to use,
13# copy, modify, merge, publish, distribute, sublicense, and/or sell
14# copies of the Software, and to permit persons to whom the
15# Software is furnished to do so, subject to the following
16# conditions:
17#
18# The above copyright notice and this permission notice shall be
19# included in all copies or substantial portions of the Software.
20#
21# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
22# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
23# OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
24# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
25# HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
26# WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
27# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
28# OTHER DEALINGS IN THE SOFTWARE.
29#
30"""Read from and write to tar format archives.
31"""
32
33__version__ = "$Revision$"
34# $Source$
35
Georg Brandl38c6a222006-05-10 16:26:03 +000036version = "0.8.0"
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +000037__author__ = "Lars Gustäbel (lars@gustaebel.de)"
38__date__ = "$Date$"
39__cvsid__ = "$Id$"
40__credits__ = "Gustavo Niemeyer, Niels Gustäbel, Richard Townsend."
41
42#---------
43# Imports
44#---------
45import sys
46import os
47import shutil
48import stat
49import errno
50import time
51import struct
Georg Brandl2527f7f2006-10-29 09:16:15 +000052import copy
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +000053
Jack Jansencfc49022003-03-07 13:37:32 +000054if sys.platform == 'mac':
55 # This module needs work for MacOS9, especially in the area of pathname
56 # handling. In many places it is assumed a simple substitution of / by the
57 # local os.path.sep is good enough to convert pathnames, but this does not
58 # work with the mac rooted:path:name versus :nonrooted:path:name syntax
59 raise ImportError, "tarfile does not work for platform==mac"
60
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +000061try:
62 import grp, pwd
63except ImportError:
64 grp = pwd = None
65
66# from tarfile import *
67__all__ = ["TarFile", "TarInfo", "is_tarfile", "TarError"]
68
69#---------------------------------------------------------
70# tar constants
71#---------------------------------------------------------
72NUL = "\0" # the null character
73BLOCKSIZE = 512 # length of processing blocks
74RECORDSIZE = BLOCKSIZE * 20 # length of records
75MAGIC = "ustar" # magic tar string
76VERSION = "00" # version number
77
78LENGTH_NAME = 100 # maximum length of a filename
79LENGTH_LINK = 100 # maximum length of a linkname
80LENGTH_PREFIX = 155 # maximum length of the prefix field
81MAXSIZE_MEMBER = 077777777777L # maximum size of a file (11 octal digits)
82
83REGTYPE = "0" # regular file
84AREGTYPE = "\0" # regular file
85LNKTYPE = "1" # link (inside tarfile)
86SYMTYPE = "2" # symbolic link
87CHRTYPE = "3" # character special device
88BLKTYPE = "4" # block special device
89DIRTYPE = "5" # directory
90FIFOTYPE = "6" # fifo special device
91CONTTYPE = "7" # contiguous file
92
93GNUTYPE_LONGNAME = "L" # GNU tar extension for longnames
94GNUTYPE_LONGLINK = "K" # GNU tar extension for longlink
95GNUTYPE_SPARSE = "S" # GNU tar extension for sparse file
96
97#---------------------------------------------------------
98# tarfile constants
99#---------------------------------------------------------
100SUPPORTED_TYPES = (REGTYPE, AREGTYPE, LNKTYPE, # file types that tarfile
101 SYMTYPE, DIRTYPE, FIFOTYPE, # can cope with.
102 CONTTYPE, CHRTYPE, BLKTYPE,
103 GNUTYPE_LONGNAME, GNUTYPE_LONGLINK,
104 GNUTYPE_SPARSE)
105
106REGULAR_TYPES = (REGTYPE, AREGTYPE, # file types that somehow
107 CONTTYPE, GNUTYPE_SPARSE) # represent regular files
108
109#---------------------------------------------------------
110# Bits used in the mode field, values in octal.
111#---------------------------------------------------------
112S_IFLNK = 0120000 # symbolic link
113S_IFREG = 0100000 # regular file
114S_IFBLK = 0060000 # block device
115S_IFDIR = 0040000 # directory
116S_IFCHR = 0020000 # character device
117S_IFIFO = 0010000 # fifo
118
119TSUID = 04000 # set UID on execution
120TSGID = 02000 # set GID on execution
121TSVTX = 01000 # reserved
122
123TUREAD = 0400 # read by owner
124TUWRITE = 0200 # write by owner
125TUEXEC = 0100 # execute/search by owner
126TGREAD = 0040 # read by group
127TGWRITE = 0020 # write by group
128TGEXEC = 0010 # execute/search by group
129TOREAD = 0004 # read by other
130TOWRITE = 0002 # write by other
131TOEXEC = 0001 # execute/search by other
132
133#---------------------------------------------------------
134# Some useful functions
135#---------------------------------------------------------
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000136
Georg Brandl38c6a222006-05-10 16:26:03 +0000137def stn(s, length):
138 """Convert a python string to a null-terminated string buffer.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000139 """
Georg Brandlee23f4b2006-10-24 16:54:23 +0000140 return s[:length] + (length - len(s)) * NUL
Georg Brandl38c6a222006-05-10 16:26:03 +0000141
Lars Gustäbel08303db2008-02-11 18:36:07 +0000142def nts(s):
143 """Convert a null-terminated string field to a python string.
144 """
145 # Use the string up to the first null char.
146 p = s.find("\0")
147 if p == -1:
148 return s
149 return s[:p]
150
Georg Brandl38c6a222006-05-10 16:26:03 +0000151def nti(s):
152 """Convert a number field to a python number.
153 """
154 # There are two possible encodings for a number field, see
155 # itn() below.
156 if s[0] != chr(0200):
Lars Gustäbel08303db2008-02-11 18:36:07 +0000157 n = int(nts(s) or "0", 8)
Georg Brandl38c6a222006-05-10 16:26:03 +0000158 else:
159 n = 0L
160 for i in xrange(len(s) - 1):
161 n <<= 8
162 n += ord(s[i + 1])
163 return n
164
165def itn(n, digits=8, posix=False):
166 """Convert a python number to a number field.
167 """
168 # POSIX 1003.1-1988 requires numbers to be encoded as a string of
169 # octal digits followed by a null-byte, this allows values up to
170 # (8**(digits-1))-1. GNU tar allows storing numbers greater than
171 # that if necessary. A leading 0200 byte indicates this particular
172 # encoding, the following digits-1 bytes are a big-endian
173 # representation. This allows values up to (256**(digits-1))-1.
174 if 0 <= n < 8 ** (digits - 1):
175 s = "%0*o" % (digits - 1, n) + NUL
176 else:
177 if posix:
Georg Brandle4751e32006-05-18 06:11:19 +0000178 raise ValueError("overflow in number field")
Georg Brandl38c6a222006-05-10 16:26:03 +0000179
180 if n < 0:
181 # XXX We mimic GNU tar's behaviour with negative numbers,
182 # this could raise OverflowError.
183 n = struct.unpack("L", struct.pack("l", n))[0]
184
185 s = ""
186 for i in xrange(digits - 1):
187 s = chr(n & 0377) + s
188 n >>= 8
189 s = chr(0200) + s
190 return s
191
192def calc_chksums(buf):
193 """Calculate the checksum for a member's header by summing up all
194 characters except for the chksum field which is treated as if
195 it was filled with spaces. According to the GNU tar sources,
196 some tars (Sun and NeXT) calculate chksum with signed char,
197 which will be different if there are chars in the buffer with
198 the high bit set. So we calculate two checksums, unsigned and
199 signed.
200 """
201 unsigned_chksum = 256 + sum(struct.unpack("148B", buf[:148]) + struct.unpack("356B", buf[156:512]))
202 signed_chksum = 256 + sum(struct.unpack("148b", buf[:148]) + struct.unpack("356b", buf[156:512]))
203 return unsigned_chksum, signed_chksum
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000204
205def copyfileobj(src, dst, length=None):
206 """Copy length bytes from fileobj src to fileobj dst.
207 If length is None, copy the entire content.
208 """
209 if length == 0:
210 return
211 if length is None:
212 shutil.copyfileobj(src, dst)
213 return
214
215 BUFSIZE = 16 * 1024
216 blocks, remainder = divmod(length, BUFSIZE)
217 for b in xrange(blocks):
218 buf = src.read(BUFSIZE)
219 if len(buf) < BUFSIZE:
Georg Brandle4751e32006-05-18 06:11:19 +0000220 raise IOError("end of file reached")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000221 dst.write(buf)
222
223 if remainder != 0:
224 buf = src.read(remainder)
225 if len(buf) < remainder:
Georg Brandle4751e32006-05-18 06:11:19 +0000226 raise IOError("end of file reached")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000227 dst.write(buf)
228 return
229
230filemode_table = (
Andrew M. Kuchling8bc462f2004-10-20 11:48:42 +0000231 ((S_IFLNK, "l"),
232 (S_IFREG, "-"),
233 (S_IFBLK, "b"),
234 (S_IFDIR, "d"),
235 (S_IFCHR, "c"),
236 (S_IFIFO, "p")),
237
238 ((TUREAD, "r"),),
239 ((TUWRITE, "w"),),
240 ((TUEXEC|TSUID, "s"),
241 (TSUID, "S"),
242 (TUEXEC, "x")),
243
244 ((TGREAD, "r"),),
245 ((TGWRITE, "w"),),
246 ((TGEXEC|TSGID, "s"),
247 (TSGID, "S"),
248 (TGEXEC, "x")),
249
250 ((TOREAD, "r"),),
251 ((TOWRITE, "w"),),
252 ((TOEXEC|TSVTX, "t"),
253 (TSVTX, "T"),
254 (TOEXEC, "x"))
255)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000256
257def filemode(mode):
258 """Convert a file's mode to a string of the form
259 -rwxrwxrwx.
260 Used by TarFile.list()
261 """
Andrew M. Kuchling8bc462f2004-10-20 11:48:42 +0000262 perm = []
263 for table in filemode_table:
264 for bit, char in table:
265 if mode & bit == bit:
266 perm.append(char)
267 break
268 else:
269 perm.append("-")
270 return "".join(perm)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000271
272if os.sep != "/":
273 normpath = lambda path: os.path.normpath(path).replace(os.sep, "/")
274else:
275 normpath = os.path.normpath
276
277class TarError(Exception):
278 """Base exception."""
279 pass
280class ExtractError(TarError):
281 """General exception for extract errors."""
282 pass
283class ReadError(TarError):
284 """Exception for unreadble tar archives."""
285 pass
286class CompressionError(TarError):
287 """Exception for unavailable compression methods."""
288 pass
289class StreamError(TarError):
290 """Exception for unsupported operations on stream-like TarFiles."""
291 pass
292
293#---------------------------
294# internal stream interface
295#---------------------------
296class _LowLevelFile:
297 """Low-level file object. Supports reading and writing.
298 It is used instead of a regular file object for streaming
299 access.
300 """
301
302 def __init__(self, name, mode):
303 mode = {
304 "r": os.O_RDONLY,
305 "w": os.O_WRONLY | os.O_CREAT | os.O_TRUNC,
306 }[mode]
307 if hasattr(os, "O_BINARY"):
308 mode |= os.O_BINARY
309 self.fd = os.open(name, mode)
310
311 def close(self):
312 os.close(self.fd)
313
314 def read(self, size):
315 return os.read(self.fd, size)
316
317 def write(self, s):
318 os.write(self.fd, s)
319
320class _Stream:
321 """Class that serves as an adapter between TarFile and
322 a stream-like object. The stream-like object only
323 needs to have a read() or write() method and is accessed
324 blockwise. Use of gzip or bzip2 compression is possible.
325 A stream-like object could be for example: sys.stdin,
326 sys.stdout, a socket, a tape device etc.
327
328 _Stream is intended to be used only internally.
329 """
330
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000331 def __init__(self, name, mode, comptype, fileobj, bufsize):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000332 """Construct a _Stream object.
333 """
334 self._extfileobj = True
335 if fileobj is None:
336 fileobj = _LowLevelFile(name, mode)
337 self._extfileobj = False
338
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000339 if comptype == '*':
340 # Enable transparent compression detection for the
341 # stream interface
342 fileobj = _StreamProxy(fileobj)
343 comptype = fileobj.getcomptype()
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000344
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000345 self.name = name or ""
346 self.mode = mode
347 self.comptype = comptype
348 self.fileobj = fileobj
349 self.bufsize = bufsize
350 self.buf = ""
351 self.pos = 0L
352 self.closed = False
353
354 if comptype == "gz":
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000355 try:
356 import zlib
357 except ImportError:
Georg Brandle4751e32006-05-18 06:11:19 +0000358 raise CompressionError("zlib module is not available")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000359 self.zlib = zlib
360 self.crc = zlib.crc32("")
361 if mode == "r":
362 self._init_read_gz()
363 else:
364 self._init_write_gz()
365
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000366 if comptype == "bz2":
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000367 try:
368 import bz2
369 except ImportError:
Georg Brandle4751e32006-05-18 06:11:19 +0000370 raise CompressionError("bz2 module is not available")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000371 if mode == "r":
372 self.dbuf = ""
373 self.cmp = bz2.BZ2Decompressor()
374 else:
375 self.cmp = bz2.BZ2Compressor()
376
377 def __del__(self):
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000378 if hasattr(self, "closed") and not self.closed:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000379 self.close()
380
381 def _init_write_gz(self):
382 """Initialize for writing with gzip compression.
383 """
384 self.cmp = self.zlib.compressobj(9, self.zlib.DEFLATED,
385 -self.zlib.MAX_WBITS,
386 self.zlib.DEF_MEM_LEVEL,
387 0)
388 timestamp = struct.pack("<L", long(time.time()))
389 self.__write("\037\213\010\010%s\002\377" % timestamp)
390 if self.name.endswith(".gz"):
391 self.name = self.name[:-3]
392 self.__write(self.name + NUL)
393
394 def write(self, s):
395 """Write string s to the stream.
396 """
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000397 if self.comptype == "gz":
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000398 self.crc = self.zlib.crc32(s, self.crc)
399 self.pos += len(s)
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000400 if self.comptype != "tar":
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000401 s = self.cmp.compress(s)
402 self.__write(s)
403
404 def __write(self, s):
405 """Write string s to the stream if a whole new block
406 is ready to be written.
407 """
408 self.buf += s
409 while len(self.buf) > self.bufsize:
410 self.fileobj.write(self.buf[:self.bufsize])
411 self.buf = self.buf[self.bufsize:]
412
413 def close(self):
414 """Close the _Stream object. No operation should be
415 done on it afterwards.
416 """
417 if self.closed:
418 return
419
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000420 if self.mode == "w" and self.comptype != "tar":
Martin v. Löwisc234a522004-08-22 21:28:33 +0000421 self.buf += self.cmp.flush()
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000422
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000423 if self.mode == "w" and self.buf:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000424 self.fileobj.write(self.buf)
425 self.buf = ""
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000426 if self.comptype == "gz":
Tim Petersa05f6e22006-08-02 05:20:08 +0000427 # The native zlib crc is an unsigned 32-bit integer, but
428 # the Python wrapper implicitly casts that to a signed C
429 # long. So, on a 32-bit box self.crc may "look negative",
430 # while the same crc on a 64-bit box may "look positive".
431 # To avoid irksome warnings from the `struct` module, force
432 # it to look positive on all boxes.
433 self.fileobj.write(struct.pack("<L", self.crc & 0xffffffffL))
Andrew M. Kuchling10a44492003-10-24 17:38:34 +0000434 self.fileobj.write(struct.pack("<L", self.pos & 0xffffFFFFL))
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000435
436 if not self._extfileobj:
437 self.fileobj.close()
438
439 self.closed = True
440
441 def _init_read_gz(self):
442 """Initialize for reading a gzip compressed fileobj.
443 """
444 self.cmp = self.zlib.decompressobj(-self.zlib.MAX_WBITS)
445 self.dbuf = ""
446
447 # taken from gzip.GzipFile with some alterations
448 if self.__read(2) != "\037\213":
Georg Brandle4751e32006-05-18 06:11:19 +0000449 raise ReadError("not a gzip file")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000450 if self.__read(1) != "\010":
Georg Brandle4751e32006-05-18 06:11:19 +0000451 raise CompressionError("unsupported compression method")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000452
453 flag = ord(self.__read(1))
454 self.__read(6)
455
456 if flag & 4:
457 xlen = ord(self.__read(1)) + 256 * ord(self.__read(1))
458 self.read(xlen)
459 if flag & 8:
460 while True:
461 s = self.__read(1)
462 if not s or s == NUL:
463 break
464 if flag & 16:
465 while True:
466 s = self.__read(1)
467 if not s or s == NUL:
468 break
469 if flag & 2:
470 self.__read(2)
471
472 def tell(self):
473 """Return the stream's file pointer position.
474 """
475 return self.pos
476
477 def seek(self, pos=0):
478 """Set the stream's file pointer to pos. Negative seeking
479 is forbidden.
480 """
481 if pos - self.pos >= 0:
482 blocks, remainder = divmod(pos - self.pos, self.bufsize)
483 for i in xrange(blocks):
484 self.read(self.bufsize)
485 self.read(remainder)
486 else:
Georg Brandle4751e32006-05-18 06:11:19 +0000487 raise StreamError("seeking backwards is not allowed")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000488 return self.pos
489
490 def read(self, size=None):
491 """Return the next size number of bytes from the stream.
492 If size is not defined, return all bytes of the stream
493 up to EOF.
494 """
495 if size is None:
496 t = []
497 while True:
498 buf = self._read(self.bufsize)
499 if not buf:
500 break
501 t.append(buf)
502 buf = "".join(t)
503 else:
504 buf = self._read(size)
505 self.pos += len(buf)
506 return buf
507
508 def _read(self, size):
509 """Return size bytes from the stream.
510 """
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000511 if self.comptype == "tar":
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000512 return self.__read(size)
513
514 c = len(self.dbuf)
515 t = [self.dbuf]
516 while c < size:
517 buf = self.__read(self.bufsize)
518 if not buf:
519 break
520 buf = self.cmp.decompress(buf)
521 t.append(buf)
522 c += len(buf)
523 t = "".join(t)
524 self.dbuf = t[size:]
525 return t[:size]
526
527 def __read(self, size):
528 """Return size bytes from stream. If internal buffer is empty,
529 read another block from the stream.
530 """
531 c = len(self.buf)
532 t = [self.buf]
533 while c < size:
534 buf = self.fileobj.read(self.bufsize)
535 if not buf:
536 break
537 t.append(buf)
538 c += len(buf)
539 t = "".join(t)
540 self.buf = t[size:]
541 return t[:size]
542# class _Stream
543
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000544class _StreamProxy(object):
545 """Small proxy class that enables transparent compression
546 detection for the Stream interface (mode 'r|*').
547 """
548
549 def __init__(self, fileobj):
550 self.fileobj = fileobj
551 self.buf = self.fileobj.read(BLOCKSIZE)
552
553 def read(self, size):
554 self.read = self.fileobj.read
555 return self.buf
556
557 def getcomptype(self):
558 if self.buf.startswith("\037\213\010"):
559 return "gz"
560 if self.buf.startswith("BZh91"):
561 return "bz2"
562 return "tar"
563
564 def close(self):
565 self.fileobj.close()
566# class StreamProxy
567
Georg Brandl49c8f4c2006-05-15 19:30:35 +0000568class _BZ2Proxy(object):
569 """Small proxy class that enables external file object
570 support for "r:bz2" and "w:bz2" modes. This is actually
571 a workaround for a limitation in bz2 module's BZ2File
572 class which (unlike gzip.GzipFile) has no support for
573 a file object argument.
574 """
575
576 blocksize = 16 * 1024
577
578 def __init__(self, fileobj, mode):
579 self.fileobj = fileobj
580 self.mode = mode
581 self.init()
582
583 def init(self):
584 import bz2
585 self.pos = 0
586 if self.mode == "r":
587 self.bz2obj = bz2.BZ2Decompressor()
588 self.fileobj.seek(0)
589 self.buf = ""
590 else:
591 self.bz2obj = bz2.BZ2Compressor()
592
593 def read(self, size):
594 b = [self.buf]
595 x = len(self.buf)
596 while x < size:
597 try:
598 raw = self.fileobj.read(self.blocksize)
599 data = self.bz2obj.decompress(raw)
600 b.append(data)
601 except EOFError:
602 break
603 x += len(data)
604 self.buf = "".join(b)
605
606 buf = self.buf[:size]
607 self.buf = self.buf[size:]
608 self.pos += len(buf)
609 return buf
610
611 def seek(self, pos):
612 if pos < self.pos:
613 self.init()
614 self.read(pos - self.pos)
615
616 def tell(self):
617 return self.pos
618
619 def write(self, data):
620 self.pos += len(data)
621 raw = self.bz2obj.compress(data)
622 self.fileobj.write(raw)
623
624 def close(self):
625 if self.mode == "w":
626 raw = self.bz2obj.flush()
627 self.fileobj.write(raw)
Georg Brandle8953182006-05-27 14:02:03 +0000628 self.fileobj.close()
Georg Brandl49c8f4c2006-05-15 19:30:35 +0000629# class _BZ2Proxy
630
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000631#------------------------
632# Extraction file object
633#------------------------
Lars Gustäbelaedb92e2006-12-23 16:51:47 +0000634class _FileInFile(object):
635 """A thin wrapper around an existing file object that
636 provides a part of its data as an individual file
637 object.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000638 """
639
Lars Gustäbelaedb92e2006-12-23 16:51:47 +0000640 def __init__(self, fileobj, offset, size, sparse=None):
641 self.fileobj = fileobj
642 self.offset = offset
643 self.size = size
644 self.sparse = sparse
645 self.position = 0
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000646
Lars Gustäbelaedb92e2006-12-23 16:51:47 +0000647 def tell(self):
648 """Return the current file position.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000649 """
Lars Gustäbelaedb92e2006-12-23 16:51:47 +0000650 return self.position
651
652 def seek(self, position):
653 """Seek to a position in the file.
654 """
655 self.position = position
656
657 def read(self, size=None):
658 """Read data from the file.
659 """
660 if size is None:
661 size = self.size - self.position
662 else:
663 size = min(size, self.size - self.position)
664
665 if self.sparse is None:
666 return self.readnormal(size)
667 else:
668 return self.readsparse(size)
669
670 def readnormal(self, size):
671 """Read operation for regular files.
672 """
673 self.fileobj.seek(self.offset + self.position)
674 self.position += size
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000675 return self.fileobj.read(size)
676
Lars Gustäbelaedb92e2006-12-23 16:51:47 +0000677 def readsparse(self, size):
678 """Read operation for sparse files.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000679 """
Lars Gustäbelaedb92e2006-12-23 16:51:47 +0000680 data = []
681 while size > 0:
682 buf = self.readsparsesection(size)
683 if not buf:
684 break
685 size -= len(buf)
686 data.append(buf)
687 return "".join(data)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000688
Lars Gustäbelaedb92e2006-12-23 16:51:47 +0000689 def readsparsesection(self, size):
690 """Read a single section of a sparse file.
691 """
692 section = self.sparse.find(self.position)
693
694 if section is None:
695 return ""
696
697 size = min(size, section.offset + section.size - self.position)
698
699 if isinstance(section, _data):
700 realpos = section.realpos + self.position - section.offset
701 self.fileobj.seek(self.offset + realpos)
702 self.position += size
703 return self.fileobj.read(size)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000704 else:
Lars Gustäbelaedb92e2006-12-23 16:51:47 +0000705 self.position += size
706 return NUL * size
707#class _FileInFile
708
709
710class ExFileObject(object):
711 """File-like object for reading an archive member.
712 Is returned by TarFile.extractfile().
713 """
714 blocksize = 1024
715
716 def __init__(self, tarfile, tarinfo):
717 self.fileobj = _FileInFile(tarfile.fileobj,
718 tarinfo.offset_data,
719 tarinfo.size,
720 getattr(tarinfo, "sparse", None))
721 self.name = tarinfo.name
722 self.mode = "r"
723 self.closed = False
724 self.size = tarinfo.size
725
726 self.position = 0
727 self.buffer = ""
728
729 def read(self, size=None):
730 """Read at most size bytes from the file. If size is not
731 present or None, read all data until EOF is reached.
732 """
733 if self.closed:
734 raise ValueError("I/O operation on closed file")
735
736 buf = ""
737 if self.buffer:
738 if size is None:
739 buf = self.buffer
740 self.buffer = ""
741 else:
742 buf = self.buffer[:size]
743 self.buffer = self.buffer[size:]
744
745 if size is None:
746 buf += self.fileobj.read()
747 else:
748 buf += self.fileobj.read(size - len(buf))
749
750 self.position += len(buf)
751 return buf
752
753 def readline(self, size=-1):
754 """Read one entire line from the file. If size is present
755 and non-negative, return a string with at most that
756 size, which may be an incomplete line.
757 """
758 if self.closed:
759 raise ValueError("I/O operation on closed file")
760
761 if "\n" in self.buffer:
762 pos = self.buffer.find("\n") + 1
763 else:
764 buffers = [self.buffer]
765 while True:
766 buf = self.fileobj.read(self.blocksize)
767 buffers.append(buf)
768 if not buf or "\n" in buf:
769 self.buffer = "".join(buffers)
770 pos = self.buffer.find("\n") + 1
771 if pos == 0:
772 # no newline found.
773 pos = len(self.buffer)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000774 break
Lars Gustäbelaedb92e2006-12-23 16:51:47 +0000775
776 if size != -1:
777 pos = min(size, pos)
778
779 buf = self.buffer[:pos]
780 self.buffer = self.buffer[pos:]
781 self.position += len(buf)
782 return buf
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000783
784 def readlines(self):
Lars Gustäbelaedb92e2006-12-23 16:51:47 +0000785 """Return a list with all remaining lines.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000786 """
787 result = []
788 while True:
789 line = self.readline()
790 if not line: break
791 result.append(line)
792 return result
793
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000794 def tell(self):
795 """Return the current file position.
796 """
Lars Gustäbelaedb92e2006-12-23 16:51:47 +0000797 if self.closed:
798 raise ValueError("I/O operation on closed file")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000799
Lars Gustäbelaedb92e2006-12-23 16:51:47 +0000800 return self.position
801
802 def seek(self, pos, whence=os.SEEK_SET):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000803 """Seek to a position in the file.
804 """
Lars Gustäbelaedb92e2006-12-23 16:51:47 +0000805 if self.closed:
806 raise ValueError("I/O operation on closed file")
807
808 if whence == os.SEEK_SET:
809 self.position = min(max(pos, 0), self.size)
810 elif whence == os.SEEK_CUR:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000811 if pos < 0:
Lars Gustäbelaedb92e2006-12-23 16:51:47 +0000812 self.position = max(self.position + pos, 0)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000813 else:
Lars Gustäbelaedb92e2006-12-23 16:51:47 +0000814 self.position = min(self.position + pos, self.size)
815 elif whence == os.SEEK_END:
816 self.position = max(min(self.size + pos, self.size), 0)
817 else:
818 raise ValueError("Invalid argument")
819
820 self.buffer = ""
821 self.fileobj.seek(self.position)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000822
823 def close(self):
824 """Close the file object.
825 """
826 self.closed = True
Martin v. Löwisdf241532005-03-03 08:17:42 +0000827
828 def __iter__(self):
Lars Gustäbelaedb92e2006-12-23 16:51:47 +0000829 """Get an iterator over the file's lines.
Martin v. Löwisdf241532005-03-03 08:17:42 +0000830 """
Lars Gustäbelaedb92e2006-12-23 16:51:47 +0000831 while True:
832 line = self.readline()
833 if not line:
834 break
835 yield line
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000836#class ExFileObject
837
838#------------------
839# Exported Classes
840#------------------
841class TarInfo(object):
842 """Informational class which holds the details about an
843 archive member given by a tar header block.
844 TarInfo objects are returned by TarFile.getmember(),
845 TarFile.getmembers() and TarFile.gettarinfo() and are
846 usually created internally.
847 """
848
849 def __init__(self, name=""):
850 """Construct a TarInfo object. name is the optional name
851 of the member.
852 """
Georg Brandl38c6a222006-05-10 16:26:03 +0000853 self.name = name # member name (dirnames must end with '/')
854 self.mode = 0666 # file permissions
855 self.uid = 0 # user id
856 self.gid = 0 # group id
857 self.size = 0 # file size
858 self.mtime = 0 # modification time
859 self.chksum = 0 # header checksum
860 self.type = REGTYPE # member type
861 self.linkname = "" # link name
862 self.uname = "user" # user name
863 self.gname = "group" # group name
864 self.devmajor = 0 # device major number
865 self.devminor = 0 # device minor number
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000866
Georg Brandl38c6a222006-05-10 16:26:03 +0000867 self.offset = 0 # the tar header starts here
868 self.offset_data = 0 # the file's data starts here
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000869
870 def __repr__(self):
871 return "<%s %r at %#x>" % (self.__class__.__name__,self.name,id(self))
872
Guido van Rossum75b64e62005-01-16 00:16:11 +0000873 @classmethod
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000874 def frombuf(cls, buf):
875 """Construct a TarInfo object from a 512 byte string buffer.
876 """
Georg Brandl38c6a222006-05-10 16:26:03 +0000877 if len(buf) != BLOCKSIZE:
Georg Brandle4751e32006-05-18 06:11:19 +0000878 raise ValueError("truncated header")
Georg Brandl38c6a222006-05-10 16:26:03 +0000879 if buf.count(NUL) == BLOCKSIZE:
Georg Brandle4751e32006-05-18 06:11:19 +0000880 raise ValueError("empty header")
Georg Brandl38c6a222006-05-10 16:26:03 +0000881
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000882 tarinfo = cls()
Georg Brandl38c6a222006-05-10 16:26:03 +0000883 tarinfo.buf = buf
Lars Gustäbel08303db2008-02-11 18:36:07 +0000884 tarinfo.name = nts(buf[0:100])
Georg Brandl38c6a222006-05-10 16:26:03 +0000885 tarinfo.mode = nti(buf[100:108])
886 tarinfo.uid = nti(buf[108:116])
887 tarinfo.gid = nti(buf[116:124])
888 tarinfo.size = nti(buf[124:136])
889 tarinfo.mtime = nti(buf[136:148])
890 tarinfo.chksum = nti(buf[148:156])
891 tarinfo.type = buf[156:157]
Lars Gustäbel08303db2008-02-11 18:36:07 +0000892 tarinfo.linkname = nts(buf[157:257])
893 tarinfo.uname = nts(buf[265:297])
894 tarinfo.gname = nts(buf[297:329])
Georg Brandl38c6a222006-05-10 16:26:03 +0000895 tarinfo.devmajor = nti(buf[329:337])
896 tarinfo.devminor = nti(buf[337:345])
Lars Gustäbel08303db2008-02-11 18:36:07 +0000897 prefix = nts(buf[345:500])
Georg Brandl2527f7f2006-10-29 09:16:15 +0000898
899 if prefix and not tarinfo.issparse():
900 tarinfo.name = prefix + "/" + tarinfo.name
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000901
Georg Brandl38c6a222006-05-10 16:26:03 +0000902 if tarinfo.chksum not in calc_chksums(buf):
Georg Brandle4751e32006-05-18 06:11:19 +0000903 raise ValueError("invalid header")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000904 return tarinfo
905
Georg Brandl38c6a222006-05-10 16:26:03 +0000906 def tobuf(self, posix=False):
Georg Brandl2527f7f2006-10-29 09:16:15 +0000907 """Return a tar header as a string of 512 byte blocks.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000908 """
Georg Brandl2527f7f2006-10-29 09:16:15 +0000909 buf = ""
910 type = self.type
911 prefix = ""
912
913 if self.name.endswith("/"):
914 type = DIRTYPE
915
Georg Brandl25f58f62006-12-06 22:21:23 +0000916 if type in (GNUTYPE_LONGNAME, GNUTYPE_LONGLINK):
917 # Prevent "././@LongLink" from being normalized.
918 name = self.name
919 else:
920 name = normpath(self.name)
Georg Brandl2527f7f2006-10-29 09:16:15 +0000921
922 if type == DIRTYPE:
923 # directories should end with '/'
924 name += "/"
925
926 linkname = self.linkname
927 if linkname:
928 # if linkname is empty we end up with a '.'
929 linkname = normpath(linkname)
930
931 if posix:
932 if self.size > MAXSIZE_MEMBER:
933 raise ValueError("file is too large (>= 8 GB)")
934
935 if len(self.linkname) > LENGTH_LINK:
936 raise ValueError("linkname is too long (>%d)" % (LENGTH_LINK))
937
938 if len(name) > LENGTH_NAME:
939 prefix = name[:LENGTH_PREFIX + 1]
940 while prefix and prefix[-1] != "/":
941 prefix = prefix[:-1]
942
943 name = name[len(prefix):]
944 prefix = prefix[:-1]
945
946 if not prefix or len(name) > LENGTH_NAME:
947 raise ValueError("name is too long")
948
949 else:
950 if len(self.linkname) > LENGTH_LINK:
951 buf += self._create_gnulong(self.linkname, GNUTYPE_LONGLINK)
952
953 if len(name) > LENGTH_NAME:
954 buf += self._create_gnulong(name, GNUTYPE_LONGNAME)
955
Georg Brandl38c6a222006-05-10 16:26:03 +0000956 parts = [
Georg Brandl2527f7f2006-10-29 09:16:15 +0000957 stn(name, 100),
Georg Brandl38c6a222006-05-10 16:26:03 +0000958 itn(self.mode & 07777, 8, posix),
959 itn(self.uid, 8, posix),
960 itn(self.gid, 8, posix),
961 itn(self.size, 12, posix),
962 itn(self.mtime, 12, posix),
963 " ", # checksum field
Georg Brandl2527f7f2006-10-29 09:16:15 +0000964 type,
Georg Brandl38c6a222006-05-10 16:26:03 +0000965 stn(self.linkname, 100),
966 stn(MAGIC, 6),
967 stn(VERSION, 2),
968 stn(self.uname, 32),
969 stn(self.gname, 32),
970 itn(self.devmajor, 8, posix),
971 itn(self.devminor, 8, posix),
Georg Brandl2527f7f2006-10-29 09:16:15 +0000972 stn(prefix, 155)
Georg Brandl38c6a222006-05-10 16:26:03 +0000973 ]
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000974
Lars Gustäbel8ff1f6a2007-04-21 12:20:09 +0000975 buf += "".join(parts).ljust(BLOCKSIZE, NUL)
Georg Brandl25f58f62006-12-06 22:21:23 +0000976 chksum = calc_chksums(buf[-BLOCKSIZE:])[0]
Georg Brandl2527f7f2006-10-29 09:16:15 +0000977 buf = buf[:-364] + "%06o\0" % chksum + buf[-357:]
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000978 self.buf = buf
979 return buf
980
Georg Brandl2527f7f2006-10-29 09:16:15 +0000981 def _create_gnulong(self, name, type):
982 """Create a GNU longname/longlink header from name.
983 It consists of an extended tar header, with the length
984 of the longname as size, followed by data blocks,
985 which contain the longname as a null terminated string.
986 """
987 name += NUL
988
989 tarinfo = self.__class__()
990 tarinfo.name = "././@LongLink"
991 tarinfo.type = type
992 tarinfo.mode = 0
993 tarinfo.size = len(name)
994
995 # create extended header
996 buf = tarinfo.tobuf()
997 # create name blocks
998 buf += name
999 blocks, remainder = divmod(len(name), BLOCKSIZE)
1000 if remainder > 0:
1001 buf += (BLOCKSIZE - remainder) * NUL
1002 return buf
1003
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001004 def isreg(self):
1005 return self.type in REGULAR_TYPES
1006 def isfile(self):
1007 return self.isreg()
1008 def isdir(self):
1009 return self.type == DIRTYPE
1010 def issym(self):
1011 return self.type == SYMTYPE
1012 def islnk(self):
1013 return self.type == LNKTYPE
1014 def ischr(self):
1015 return self.type == CHRTYPE
1016 def isblk(self):
1017 return self.type == BLKTYPE
1018 def isfifo(self):
1019 return self.type == FIFOTYPE
1020 def issparse(self):
1021 return self.type == GNUTYPE_SPARSE
1022 def isdev(self):
1023 return self.type in (CHRTYPE, BLKTYPE, FIFOTYPE)
1024# class TarInfo
1025
1026class TarFile(object):
1027 """The TarFile Class provides an interface to tar archives.
1028 """
1029
1030 debug = 0 # May be set from 0 (no msgs) to 3 (all msgs)
1031
1032 dereference = False # If true, add content of linked file to the
1033 # tar file, else the link.
1034
1035 ignore_zeros = False # If true, skips empty or invalid blocks and
1036 # continues processing.
1037
1038 errorlevel = 0 # If 0, fatal errors only appear in debug
1039 # messages (if debug >= 0). If > 0, errors
1040 # are passed to the caller as exceptions.
1041
Martin v. Löwis75b9da42004-08-18 13:57:44 +00001042 posix = False # If True, generates POSIX.1-1990-compliant
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001043 # archives (no GNU extensions!)
1044
1045 fileobject = ExFileObject
1046
1047 def __init__(self, name=None, mode="r", fileobj=None):
1048 """Open an (uncompressed) tar archive `name'. `mode' is either 'r' to
1049 read from an existing archive, 'a' to append data to an existing
1050 file or 'w' to create a new file overwriting an existing one. `mode'
1051 defaults to 'r'.
1052 If `fileobj' is given, it is used for reading or writing data. If it
1053 can be determined, `mode' is overridden by `fileobj's mode.
1054 `fileobj' is not closed, when TarFile is closed.
1055 """
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001056 if len(mode) > 1 or mode not in "raw":
Georg Brandle4751e32006-05-18 06:11:19 +00001057 raise ValueError("mode must be 'r', 'a' or 'w'")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001058 self._mode = mode
1059 self.mode = {"r": "rb", "a": "r+b", "w": "wb"}[mode]
1060
1061 if not fileobj:
Lars Gustäbela9bad982007-08-28 12:33:15 +00001062 fileobj = file(name, self.mode)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001063 self._extfileobj = False
1064 else:
Lars Gustäbela9bad982007-08-28 12:33:15 +00001065 if name is None and hasattr(fileobj, "name"):
1066 name = fileobj.name
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001067 if hasattr(fileobj, "mode"):
1068 self.mode = fileobj.mode
1069 self._extfileobj = True
Lars Gustäbela9bad982007-08-28 12:33:15 +00001070 self.name = os.path.abspath(name) if name else None
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001071 self.fileobj = fileobj
1072
1073 # Init datastructures
Georg Brandl38c6a222006-05-10 16:26:03 +00001074 self.closed = False
1075 self.members = [] # list of members as TarInfo objects
1076 self._loaded = False # flag if all members have been read
Lars Gustäbel7cc9c8b2007-12-01 21:06:06 +00001077 self.offset = self.fileobj.tell()
1078 # current position in the archive file
Georg Brandl38c6a222006-05-10 16:26:03 +00001079 self.inodes = {} # dictionary caching the inodes of
1080 # archive members already added
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001081
1082 if self._mode == "r":
1083 self.firstmember = None
1084 self.firstmember = self.next()
1085
1086 if self._mode == "a":
1087 # Move to the end of the archive,
1088 # before the first empty block.
1089 self.firstmember = None
1090 while True:
1091 try:
1092 tarinfo = self.next()
1093 except ReadError:
1094 self.fileobj.seek(0)
1095 break
1096 if tarinfo is None:
1097 self.fileobj.seek(- BLOCKSIZE, 1)
1098 break
1099
1100 if self._mode in "aw":
1101 self._loaded = True
1102
1103 #--------------------------------------------------------------------------
1104 # Below are the classmethods which act as alternate constructors to the
1105 # TarFile class. The open() method is the only one that is needed for
1106 # public use; it is the "super"-constructor and is able to select an
1107 # adequate "sub"-constructor for a particular compression using the mapping
1108 # from OPEN_METH.
1109 #
1110 # This concept allows one to subclass TarFile without losing the comfort of
1111 # the super-constructor. A sub-constructor is registered and made available
1112 # by adding it to the mapping in OPEN_METH.
1113
Guido van Rossum75b64e62005-01-16 00:16:11 +00001114 @classmethod
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001115 def open(cls, name=None, mode="r", fileobj=None, bufsize=20*512):
1116 """Open a tar archive for reading, writing or appending. Return
1117 an appropriate TarFile class.
1118
1119 mode:
Martin v. Löwis78be7df2005-03-05 12:47:42 +00001120 'r' or 'r:*' open for reading with transparent compression
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001121 'r:' open for reading exclusively uncompressed
1122 'r:gz' open for reading with gzip compression
1123 'r:bz2' open for reading with bzip2 compression
1124 'a' or 'a:' open for appending
1125 'w' or 'w:' open for writing without compression
1126 'w:gz' open for writing with gzip compression
1127 'w:bz2' open for writing with bzip2 compression
Martin v. Löwis78be7df2005-03-05 12:47:42 +00001128
1129 'r|*' open a stream of tar blocks with transparent compression
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001130 'r|' open an uncompressed stream of tar blocks for reading
1131 'r|gz' open a gzip compressed stream of tar blocks
1132 'r|bz2' open a bzip2 compressed stream of tar blocks
1133 'w|' open an uncompressed stream for writing
1134 'w|gz' open a gzip compressed stream for writing
1135 'w|bz2' open a bzip2 compressed stream for writing
1136 """
1137
1138 if not name and not fileobj:
Georg Brandle4751e32006-05-18 06:11:19 +00001139 raise ValueError("nothing to open")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001140
Martin v. Löwis78be7df2005-03-05 12:47:42 +00001141 if mode in ("r", "r:*"):
1142 # Find out which *open() is appropriate for opening the file.
1143 for comptype in cls.OPEN_METH:
1144 func = getattr(cls, cls.OPEN_METH[comptype])
Lars Gustäbelf9a2c632006-12-27 10:36:58 +00001145 if fileobj is not None:
1146 saved_pos = fileobj.tell()
Martin v. Löwis78be7df2005-03-05 12:47:42 +00001147 try:
1148 return func(name, "r", fileobj)
1149 except (ReadError, CompressionError):
Lars Gustäbelf9a2c632006-12-27 10:36:58 +00001150 if fileobj is not None:
1151 fileobj.seek(saved_pos)
Martin v. Löwis78be7df2005-03-05 12:47:42 +00001152 continue
Georg Brandle4751e32006-05-18 06:11:19 +00001153 raise ReadError("file could not be opened successfully")
Martin v. Löwis78be7df2005-03-05 12:47:42 +00001154
1155 elif ":" in mode:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001156 filemode, comptype = mode.split(":", 1)
1157 filemode = filemode or "r"
1158 comptype = comptype or "tar"
1159
1160 # Select the *open() function according to
1161 # given compression.
1162 if comptype in cls.OPEN_METH:
1163 func = getattr(cls, cls.OPEN_METH[comptype])
1164 else:
Georg Brandle4751e32006-05-18 06:11:19 +00001165 raise CompressionError("unknown compression type %r" % comptype)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001166 return func(name, filemode, fileobj)
1167
1168 elif "|" in mode:
1169 filemode, comptype = mode.split("|", 1)
1170 filemode = filemode or "r"
1171 comptype = comptype or "tar"
1172
1173 if filemode not in "rw":
Georg Brandle4751e32006-05-18 06:11:19 +00001174 raise ValueError("mode must be 'r' or 'w'")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001175
1176 t = cls(name, filemode,
1177 _Stream(name, filemode, comptype, fileobj, bufsize))
1178 t._extfileobj = False
1179 return t
1180
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001181 elif mode in "aw":
1182 return cls.taropen(name, mode, fileobj)
1183
Georg Brandle4751e32006-05-18 06:11:19 +00001184 raise ValueError("undiscernible mode")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001185
Guido van Rossum75b64e62005-01-16 00:16:11 +00001186 @classmethod
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001187 def taropen(cls, name, mode="r", fileobj=None):
1188 """Open uncompressed tar archive name for reading or writing.
1189 """
1190 if len(mode) > 1 or mode not in "raw":
Georg Brandle4751e32006-05-18 06:11:19 +00001191 raise ValueError("mode must be 'r', 'a' or 'w'")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001192 return cls(name, mode, fileobj)
1193
Guido van Rossum75b64e62005-01-16 00:16:11 +00001194 @classmethod
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001195 def gzopen(cls, name, mode="r", fileobj=None, compresslevel=9):
1196 """Open gzip compressed tar archive name for reading or writing.
1197 Appending is not allowed.
1198 """
1199 if len(mode) > 1 or mode not in "rw":
Georg Brandle4751e32006-05-18 06:11:19 +00001200 raise ValueError("mode must be 'r' or 'w'")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001201
1202 try:
1203 import gzip
Neal Norwitz4ec68242003-04-11 03:05:56 +00001204 gzip.GzipFile
1205 except (ImportError, AttributeError):
Georg Brandle4751e32006-05-18 06:11:19 +00001206 raise CompressionError("gzip module is not available")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001207
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001208 if fileobj is None:
1209 fileobj = file(name, mode + "b")
1210
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001211 try:
Lars Gustäbel12e087a2006-12-23 18:13:57 +00001212 t = cls.taropen(name, mode,
1213 gzip.GzipFile(name, mode, compresslevel, fileobj))
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001214 except IOError:
Georg Brandle4751e32006-05-18 06:11:19 +00001215 raise ReadError("not a gzip file")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001216 t._extfileobj = False
1217 return t
1218
Guido van Rossum75b64e62005-01-16 00:16:11 +00001219 @classmethod
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001220 def bz2open(cls, name, mode="r", fileobj=None, compresslevel=9):
1221 """Open bzip2 compressed tar archive name for reading or writing.
1222 Appending is not allowed.
1223 """
1224 if len(mode) > 1 or mode not in "rw":
Georg Brandle4751e32006-05-18 06:11:19 +00001225 raise ValueError("mode must be 'r' or 'w'.")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001226
1227 try:
1228 import bz2
1229 except ImportError:
Georg Brandle4751e32006-05-18 06:11:19 +00001230 raise CompressionError("bz2 module is not available")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001231
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001232 if fileobj is not None:
Georg Brandl49c8f4c2006-05-15 19:30:35 +00001233 fileobj = _BZ2Proxy(fileobj, mode)
1234 else:
1235 fileobj = bz2.BZ2File(name, mode, compresslevel=compresslevel)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001236
1237 try:
Lars Gustäbel12e087a2006-12-23 18:13:57 +00001238 t = cls.taropen(name, mode, fileobj)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001239 except IOError:
Georg Brandle4751e32006-05-18 06:11:19 +00001240 raise ReadError("not a bzip2 file")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001241 t._extfileobj = False
1242 return t
1243
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001244 # All *open() methods are registered here.
1245 OPEN_METH = {
1246 "tar": "taropen", # uncompressed tar
1247 "gz": "gzopen", # gzip compressed tar
1248 "bz2": "bz2open" # bzip2 compressed tar
1249 }
1250
1251 #--------------------------------------------------------------------------
1252 # The public methods which TarFile provides:
1253
1254 def close(self):
1255 """Close the TarFile. In write-mode, two finishing zero blocks are
1256 appended to the archive.
1257 """
1258 if self.closed:
1259 return
1260
1261 if self._mode in "aw":
1262 self.fileobj.write(NUL * (BLOCKSIZE * 2))
1263 self.offset += (BLOCKSIZE * 2)
1264 # fill up the end with zero-blocks
1265 # (like option -b20 for tar does)
1266 blocks, remainder = divmod(self.offset, RECORDSIZE)
1267 if remainder > 0:
1268 self.fileobj.write(NUL * (RECORDSIZE - remainder))
1269
1270 if not self._extfileobj:
1271 self.fileobj.close()
1272 self.closed = True
1273
1274 def getmember(self, name):
1275 """Return a TarInfo object for member `name'. If `name' can not be
1276 found in the archive, KeyError is raised. If a member occurs more
1277 than once in the archive, its last occurence is assumed to be the
1278 most up-to-date version.
1279 """
Martin v. Löwisf3c56112004-09-18 09:08:52 +00001280 tarinfo = self._getmember(name)
1281 if tarinfo is None:
Georg Brandle4751e32006-05-18 06:11:19 +00001282 raise KeyError("filename %r not found" % name)
Martin v. Löwisf3c56112004-09-18 09:08:52 +00001283 return tarinfo
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001284
1285 def getmembers(self):
1286 """Return the members of the archive as a list of TarInfo objects. The
1287 list has the same order as the members in the archive.
1288 """
1289 self._check()
1290 if not self._loaded: # if we want to obtain a list of
1291 self._load() # all members, we first have to
1292 # scan the whole archive.
1293 return self.members
1294
1295 def getnames(self):
1296 """Return the members of the archive as a list of their names. It has
1297 the same order as the list returned by getmembers().
1298 """
Martin v. Löwisf3c56112004-09-18 09:08:52 +00001299 return [tarinfo.name for tarinfo in self.getmembers()]
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001300
1301 def gettarinfo(self, name=None, arcname=None, fileobj=None):
1302 """Create a TarInfo object for either the file `name' or the file
1303 object `fileobj' (using os.fstat on its file descriptor). You can
1304 modify some of the TarInfo's attributes before you add it using
1305 addfile(). If given, `arcname' specifies an alternative name for the
1306 file in the archive.
1307 """
1308 self._check("aw")
1309
1310 # When fileobj is given, replace name by
1311 # fileobj's real name.
1312 if fileobj is not None:
1313 name = fileobj.name
1314
1315 # Building the name of the member in the archive.
1316 # Backward slashes are converted to forward slashes,
1317 # Absolute paths are turned to relative paths.
1318 if arcname is None:
1319 arcname = name
1320 arcname = normpath(arcname)
1321 drv, arcname = os.path.splitdrive(arcname)
1322 while arcname[0:1] == "/":
1323 arcname = arcname[1:]
1324
1325 # Now, fill the TarInfo object with
1326 # information specific for the file.
1327 tarinfo = TarInfo()
1328
1329 # Use os.stat or os.lstat, depending on platform
1330 # and if symlinks shall be resolved.
1331 if fileobj is None:
1332 if hasattr(os, "lstat") and not self.dereference:
1333 statres = os.lstat(name)
1334 else:
1335 statres = os.stat(name)
1336 else:
1337 statres = os.fstat(fileobj.fileno())
1338 linkname = ""
1339
1340 stmd = statres.st_mode
1341 if stat.S_ISREG(stmd):
1342 inode = (statres.st_ino, statres.st_dev)
Neal Norwitzb0e32e22005-10-20 04:50:13 +00001343 if not self.dereference and \
1344 statres.st_nlink > 1 and inode in self.inodes:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001345 # Is it a hardlink to an already
1346 # archived file?
1347 type = LNKTYPE
1348 linkname = self.inodes[inode]
1349 else:
1350 # The inode is added only if its valid.
1351 # For win32 it is always 0.
1352 type = REGTYPE
1353 if inode[0]:
1354 self.inodes[inode] = arcname
1355 elif stat.S_ISDIR(stmd):
1356 type = DIRTYPE
1357 if arcname[-1:] != "/":
1358 arcname += "/"
1359 elif stat.S_ISFIFO(stmd):
1360 type = FIFOTYPE
1361 elif stat.S_ISLNK(stmd):
1362 type = SYMTYPE
1363 linkname = os.readlink(name)
1364 elif stat.S_ISCHR(stmd):
1365 type = CHRTYPE
1366 elif stat.S_ISBLK(stmd):
1367 type = BLKTYPE
1368 else:
1369 return None
1370
1371 # Fill the TarInfo object with all
1372 # information we can get.
Martin v. Löwis5dbdc592005-08-27 10:07:56 +00001373 tarinfo.name = arcname
1374 tarinfo.mode = stmd
1375 tarinfo.uid = statres.st_uid
1376 tarinfo.gid = statres.st_gid
1377 if stat.S_ISREG(stmd):
Martin v. Löwis61d77e02004-08-20 06:35:46 +00001378 tarinfo.size = statres.st_size
Martin v. Löwis5dbdc592005-08-27 10:07:56 +00001379 else:
1380 tarinfo.size = 0L
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001381 tarinfo.mtime = statres.st_mtime
Martin v. Löwis5dbdc592005-08-27 10:07:56 +00001382 tarinfo.type = type
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001383 tarinfo.linkname = linkname
1384 if pwd:
1385 try:
1386 tarinfo.uname = pwd.getpwuid(tarinfo.uid)[0]
1387 except KeyError:
1388 pass
1389 if grp:
1390 try:
1391 tarinfo.gname = grp.getgrgid(tarinfo.gid)[0]
1392 except KeyError:
1393 pass
1394
1395 if type in (CHRTYPE, BLKTYPE):
1396 if hasattr(os, "major") and hasattr(os, "minor"):
1397 tarinfo.devmajor = os.major(statres.st_rdev)
1398 tarinfo.devminor = os.minor(statres.st_rdev)
1399 return tarinfo
1400
1401 def list(self, verbose=True):
1402 """Print a table of contents to sys.stdout. If `verbose' is False, only
1403 the names of the members are printed. If it is True, an `ls -l'-like
1404 output is produced.
1405 """
1406 self._check()
1407
1408 for tarinfo in self:
1409 if verbose:
1410 print filemode(tarinfo.mode),
1411 print "%s/%s" % (tarinfo.uname or tarinfo.uid,
1412 tarinfo.gname or tarinfo.gid),
1413 if tarinfo.ischr() or tarinfo.isblk():
1414 print "%10s" % ("%d,%d" \
1415 % (tarinfo.devmajor, tarinfo.devminor)),
1416 else:
1417 print "%10d" % tarinfo.size,
1418 print "%d-%02d-%02d %02d:%02d:%02d" \
1419 % time.localtime(tarinfo.mtime)[:6],
1420
1421 print tarinfo.name,
1422
1423 if verbose:
1424 if tarinfo.issym():
1425 print "->", tarinfo.linkname,
1426 if tarinfo.islnk():
1427 print "link to", tarinfo.linkname,
1428 print
1429
1430 def add(self, name, arcname=None, recursive=True):
1431 """Add the file `name' to the archive. `name' may be any type of file
1432 (directory, fifo, symbolic link, etc.). If given, `arcname'
1433 specifies an alternative name for the file in the archive.
1434 Directories are added recursively by default. This can be avoided by
1435 setting `recursive' to False.
1436 """
1437 self._check("aw")
1438
1439 if arcname is None:
1440 arcname = name
1441
1442 # Skip if somebody tries to archive the archive...
Lars Gustäbel12e087a2006-12-23 18:13:57 +00001443 if self.name is not None and os.path.abspath(name) == self.name:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001444 self._dbg(2, "tarfile: Skipped %r" % name)
1445 return
1446
1447 # Special case: The user wants to add the current
1448 # working directory.
1449 if name == ".":
1450 if recursive:
1451 if arcname == ".":
1452 arcname = ""
1453 for f in os.listdir("."):
1454 self.add(f, os.path.join(arcname, f))
1455 return
1456
1457 self._dbg(1, name)
1458
1459 # Create a TarInfo object from the file.
1460 tarinfo = self.gettarinfo(name, arcname)
1461
1462 if tarinfo is None:
1463 self._dbg(1, "tarfile: Unsupported type %r" % name)
1464 return
1465
1466 # Append the tar header and data to the archive.
1467 if tarinfo.isreg():
1468 f = file(name, "rb")
1469 self.addfile(tarinfo, f)
1470 f.close()
1471
Martin v. Löwis5dbdc592005-08-27 10:07:56 +00001472 elif tarinfo.isdir():
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001473 self.addfile(tarinfo)
1474 if recursive:
1475 for f in os.listdir(name):
1476 self.add(os.path.join(name, f), os.path.join(arcname, f))
1477
Martin v. Löwis5dbdc592005-08-27 10:07:56 +00001478 else:
1479 self.addfile(tarinfo)
1480
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001481 def addfile(self, tarinfo, fileobj=None):
1482 """Add the TarInfo object `tarinfo' to the archive. If `fileobj' is
1483 given, tarinfo.size bytes are read from it and added to the archive.
1484 You can create TarInfo objects using gettarinfo().
1485 On Windows platforms, `fileobj' should always be opened with mode
1486 'rb' to avoid irritation about the file size.
1487 """
1488 self._check("aw")
1489
Georg Brandl2527f7f2006-10-29 09:16:15 +00001490 tarinfo = copy.copy(tarinfo)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001491
Georg Brandl2527f7f2006-10-29 09:16:15 +00001492 buf = tarinfo.tobuf(self.posix)
1493 self.fileobj.write(buf)
1494 self.offset += len(buf)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001495
1496 # If there's data to follow, append it.
1497 if fileobj is not None:
1498 copyfileobj(fileobj, self.fileobj, tarinfo.size)
1499 blocks, remainder = divmod(tarinfo.size, BLOCKSIZE)
1500 if remainder > 0:
1501 self.fileobj.write(NUL * (BLOCKSIZE - remainder))
1502 blocks += 1
1503 self.offset += blocks * BLOCKSIZE
1504
Martin v. Löwisf3c56112004-09-18 09:08:52 +00001505 self.members.append(tarinfo)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001506
Martin v. Löwis00a73e72005-03-04 19:40:34 +00001507 def extractall(self, path=".", members=None):
1508 """Extract all members from the archive to the current working
1509 directory and set owner, modification time and permissions on
1510 directories afterwards. `path' specifies a different directory
1511 to extract to. `members' is optional and must be a subset of the
1512 list returned by getmembers().
1513 """
1514 directories = []
1515
1516 if members is None:
1517 members = self
1518
1519 for tarinfo in members:
1520 if tarinfo.isdir():
Lars Gustäbel42993fe2008-02-05 12:00:20 +00001521 # Extract directories with a safe mode.
Martin v. Löwis00a73e72005-03-04 19:40:34 +00001522 directories.append(tarinfo)
Lars Gustäbel42993fe2008-02-05 12:00:20 +00001523 tarinfo = copy.copy(tarinfo)
1524 tarinfo.mode = 0700
1525 self.extract(tarinfo, path)
Martin v. Löwis00a73e72005-03-04 19:40:34 +00001526
1527 # Reverse sort directories.
1528 directories.sort(lambda a, b: cmp(a.name, b.name))
1529 directories.reverse()
1530
1531 # Set correct owner, mtime and filemode on directories.
1532 for tarinfo in directories:
Lars Gustäbele5f9e582008-01-04 14:44:23 +00001533 dirpath = os.path.join(path, tarinfo.name)
Martin v. Löwis00a73e72005-03-04 19:40:34 +00001534 try:
Lars Gustäbele5f9e582008-01-04 14:44:23 +00001535 self.chown(tarinfo, dirpath)
1536 self.utime(tarinfo, dirpath)
1537 self.chmod(tarinfo, dirpath)
Martin v. Löwis00a73e72005-03-04 19:40:34 +00001538 except ExtractError, e:
1539 if self.errorlevel > 1:
1540 raise
1541 else:
1542 self._dbg(1, "tarfile: %s" % e)
1543
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001544 def extract(self, member, path=""):
1545 """Extract a member from the archive to the current working directory,
1546 using its full name. Its file information is extracted as accurately
1547 as possible. `member' may be a filename or a TarInfo object. You can
1548 specify a different directory using `path'.
1549 """
1550 self._check("r")
1551
1552 if isinstance(member, TarInfo):
1553 tarinfo = member
1554 else:
1555 tarinfo = self.getmember(member)
1556
Neal Norwitza4f651a2004-07-20 22:07:44 +00001557 # Prepare the link target for makelink().
1558 if tarinfo.islnk():
1559 tarinfo._link_target = os.path.join(path, tarinfo.linkname)
1560
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001561 try:
1562 self._extract_member(tarinfo, os.path.join(path, tarinfo.name))
1563 except EnvironmentError, e:
1564 if self.errorlevel > 0:
1565 raise
1566 else:
1567 if e.filename is None:
1568 self._dbg(1, "tarfile: %s" % e.strerror)
1569 else:
1570 self._dbg(1, "tarfile: %s %r" % (e.strerror, e.filename))
1571 except ExtractError, e:
1572 if self.errorlevel > 1:
1573 raise
1574 else:
1575 self._dbg(1, "tarfile: %s" % e)
1576
1577 def extractfile(self, member):
1578 """Extract a member from the archive as a file object. `member' may be
1579 a filename or a TarInfo object. If `member' is a regular file, a
1580 file-like object is returned. If `member' is a link, a file-like
1581 object is constructed from the link's target. If `member' is none of
1582 the above, None is returned.
1583 The file-like object is read-only and provides the following
1584 methods: read(), readline(), readlines(), seek() and tell()
1585 """
1586 self._check("r")
1587
1588 if isinstance(member, TarInfo):
1589 tarinfo = member
1590 else:
1591 tarinfo = self.getmember(member)
1592
1593 if tarinfo.isreg():
1594 return self.fileobject(self, tarinfo)
1595
1596 elif tarinfo.type not in SUPPORTED_TYPES:
1597 # If a member's type is unknown, it is treated as a
1598 # regular file.
1599 return self.fileobject(self, tarinfo)
1600
1601 elif tarinfo.islnk() or tarinfo.issym():
1602 if isinstance(self.fileobj, _Stream):
1603 # A small but ugly workaround for the case that someone tries
1604 # to extract a (sym)link as a file-object from a non-seekable
1605 # stream of tar blocks.
Georg Brandle4751e32006-05-18 06:11:19 +00001606 raise StreamError("cannot extract (sym)link as file object")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001607 else:
Georg Brandl7eb4b7d2005-07-22 21:49:32 +00001608 # A (sym)link's file object is its target's file object.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001609 return self.extractfile(self._getmember(tarinfo.linkname,
1610 tarinfo))
1611 else:
1612 # If there's no data associated with the member (directory, chrdev,
1613 # blkdev, etc.), return None instead of a file object.
1614 return None
1615
1616 def _extract_member(self, tarinfo, targetpath):
1617 """Extract the TarInfo object tarinfo to a physical
1618 file called targetpath.
1619 """
1620 # Fetch the TarInfo object for the given name
1621 # and build the destination pathname, replacing
1622 # forward slashes to platform specific separators.
1623 if targetpath[-1:] == "/":
1624 targetpath = targetpath[:-1]
1625 targetpath = os.path.normpath(targetpath)
1626
1627 # Create all upper directories.
1628 upperdirs = os.path.dirname(targetpath)
1629 if upperdirs and not os.path.exists(upperdirs):
Lars Gustäbel42993fe2008-02-05 12:00:20 +00001630 # Create directories that are not part of the archive with
1631 # default permissions.
1632 os.makedirs(upperdirs)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001633
1634 if tarinfo.islnk() or tarinfo.issym():
1635 self._dbg(1, "%s -> %s" % (tarinfo.name, tarinfo.linkname))
1636 else:
1637 self._dbg(1, tarinfo.name)
1638
1639 if tarinfo.isreg():
1640 self.makefile(tarinfo, targetpath)
1641 elif tarinfo.isdir():
1642 self.makedir(tarinfo, targetpath)
1643 elif tarinfo.isfifo():
1644 self.makefifo(tarinfo, targetpath)
1645 elif tarinfo.ischr() or tarinfo.isblk():
1646 self.makedev(tarinfo, targetpath)
1647 elif tarinfo.islnk() or tarinfo.issym():
1648 self.makelink(tarinfo, targetpath)
1649 elif tarinfo.type not in SUPPORTED_TYPES:
1650 self.makeunknown(tarinfo, targetpath)
1651 else:
1652 self.makefile(tarinfo, targetpath)
1653
1654 self.chown(tarinfo, targetpath)
1655 if not tarinfo.issym():
1656 self.chmod(tarinfo, targetpath)
1657 self.utime(tarinfo, targetpath)
1658
1659 #--------------------------------------------------------------------------
1660 # Below are the different file methods. They are called via
1661 # _extract_member() when extract() is called. They can be replaced in a
1662 # subclass to implement other functionality.
1663
1664 def makedir(self, tarinfo, targetpath):
1665 """Make a directory called targetpath.
1666 """
1667 try:
Lars Gustäbel42993fe2008-02-05 12:00:20 +00001668 # Use a safe mode for the directory, the real mode is set
1669 # later in _extract_member().
1670 os.mkdir(targetpath, 0700)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001671 except EnvironmentError, e:
1672 if e.errno != errno.EEXIST:
1673 raise
1674
1675 def makefile(self, tarinfo, targetpath):
1676 """Make a file called targetpath.
1677 """
1678 source = self.extractfile(tarinfo)
1679 target = file(targetpath, "wb")
1680 copyfileobj(source, target)
1681 source.close()
1682 target.close()
1683
1684 def makeunknown(self, tarinfo, targetpath):
1685 """Make a file from a TarInfo object with an unknown type
1686 at targetpath.
1687 """
1688 self.makefile(tarinfo, targetpath)
1689 self._dbg(1, "tarfile: Unknown file type %r, " \
1690 "extracted as regular file." % tarinfo.type)
1691
1692 def makefifo(self, tarinfo, targetpath):
1693 """Make a fifo called targetpath.
1694 """
1695 if hasattr(os, "mkfifo"):
1696 os.mkfifo(targetpath)
1697 else:
Georg Brandle4751e32006-05-18 06:11:19 +00001698 raise ExtractError("fifo not supported by system")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001699
1700 def makedev(self, tarinfo, targetpath):
1701 """Make a character or block device called targetpath.
1702 """
1703 if not hasattr(os, "mknod") or not hasattr(os, "makedev"):
Georg Brandle4751e32006-05-18 06:11:19 +00001704 raise ExtractError("special devices not supported by system")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001705
1706 mode = tarinfo.mode
1707 if tarinfo.isblk():
1708 mode |= stat.S_IFBLK
1709 else:
1710 mode |= stat.S_IFCHR
1711
1712 os.mknod(targetpath, mode,
1713 os.makedev(tarinfo.devmajor, tarinfo.devminor))
1714
1715 def makelink(self, tarinfo, targetpath):
1716 """Make a (symbolic) link called targetpath. If it cannot be created
1717 (platform limitation), we try to make a copy of the referenced file
1718 instead of a link.
1719 """
1720 linkpath = tarinfo.linkname
1721 try:
1722 if tarinfo.issym():
1723 os.symlink(linkpath, targetpath)
1724 else:
Neal Norwitza4f651a2004-07-20 22:07:44 +00001725 # See extract().
1726 os.link(tarinfo._link_target, targetpath)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001727 except AttributeError:
1728 if tarinfo.issym():
1729 linkpath = os.path.join(os.path.dirname(tarinfo.name),
1730 linkpath)
1731 linkpath = normpath(linkpath)
1732
1733 try:
1734 self._extract_member(self.getmember(linkpath), targetpath)
1735 except (EnvironmentError, KeyError), e:
1736 linkpath = os.path.normpath(linkpath)
1737 try:
1738 shutil.copy2(linkpath, targetpath)
1739 except EnvironmentError, e:
Georg Brandle4751e32006-05-18 06:11:19 +00001740 raise IOError("link could not be created")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001741
1742 def chown(self, tarinfo, targetpath):
1743 """Set owner of targetpath according to tarinfo.
1744 """
1745 if pwd and hasattr(os, "geteuid") and os.geteuid() == 0:
1746 # We have to be root to do so.
1747 try:
1748 g = grp.getgrnam(tarinfo.gname)[2]
1749 except KeyError:
1750 try:
1751 g = grp.getgrgid(tarinfo.gid)[2]
1752 except KeyError:
1753 g = os.getgid()
1754 try:
1755 u = pwd.getpwnam(tarinfo.uname)[2]
1756 except KeyError:
1757 try:
1758 u = pwd.getpwuid(tarinfo.uid)[2]
1759 except KeyError:
1760 u = os.getuid()
1761 try:
1762 if tarinfo.issym() and hasattr(os, "lchown"):
1763 os.lchown(targetpath, u, g)
1764 else:
Andrew MacIntyre7970d202003-02-19 12:51:34 +00001765 if sys.platform != "os2emx":
1766 os.chown(targetpath, u, g)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001767 except EnvironmentError, e:
Georg Brandle4751e32006-05-18 06:11:19 +00001768 raise ExtractError("could not change owner")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001769
1770 def chmod(self, tarinfo, targetpath):
1771 """Set file permissions of targetpath according to tarinfo.
1772 """
Jack Jansen834eff62003-03-07 12:47:06 +00001773 if hasattr(os, 'chmod'):
1774 try:
1775 os.chmod(targetpath, tarinfo.mode)
1776 except EnvironmentError, e:
Georg Brandle4751e32006-05-18 06:11:19 +00001777 raise ExtractError("could not change mode")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001778
1779 def utime(self, tarinfo, targetpath):
1780 """Set modification time of targetpath according to tarinfo.
1781 """
Jack Jansen834eff62003-03-07 12:47:06 +00001782 if not hasattr(os, 'utime'):
Tim Petersf9347782003-03-07 15:36:41 +00001783 return
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001784 if sys.platform == "win32" and tarinfo.isdir():
1785 # According to msdn.microsoft.com, it is an error (EACCES)
1786 # to use utime() on directories.
1787 return
1788 try:
1789 os.utime(targetpath, (tarinfo.mtime, tarinfo.mtime))
1790 except EnvironmentError, e:
Georg Brandle4751e32006-05-18 06:11:19 +00001791 raise ExtractError("could not change modification time")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001792
1793 #--------------------------------------------------------------------------
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001794 def next(self):
1795 """Return the next member of the archive as a TarInfo object, when
1796 TarFile is opened for reading. Return None if there is no more
1797 available.
1798 """
1799 self._check("ra")
1800 if self.firstmember is not None:
1801 m = self.firstmember
1802 self.firstmember = None
1803 return m
1804
1805 # Read the next block.
Andrew M. Kuchling864bba12004-07-10 22:02:11 +00001806 self.fileobj.seek(self.offset)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001807 while True:
1808 buf = self.fileobj.read(BLOCKSIZE)
1809 if not buf:
1810 return None
Georg Brandl38c6a222006-05-10 16:26:03 +00001811
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001812 try:
1813 tarinfo = TarInfo.frombuf(buf)
Tim Peters8a299d22006-05-19 19:16:34 +00001814
Georg Brandl38c6a222006-05-10 16:26:03 +00001815 # Set the TarInfo object's offset to the current position of the
1816 # TarFile and set self.offset to the position where the data blocks
1817 # should begin.
1818 tarinfo.offset = self.offset
1819 self.offset += BLOCKSIZE
1820
1821 tarinfo = self.proc_member(tarinfo)
1822
1823 except ValueError, e:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001824 if self.ignore_zeros:
Georg Brandle4751e32006-05-18 06:11:19 +00001825 self._dbg(2, "0x%X: empty or invalid block: %s" %
1826 (self.offset, e))
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001827 self.offset += BLOCKSIZE
1828 continue
1829 else:
Andrew M. Kuchling864bba12004-07-10 22:02:11 +00001830 if self.offset == 0:
Georg Brandle4751e32006-05-18 06:11:19 +00001831 raise ReadError("empty, unreadable or compressed "
1832 "file: %s" % e)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001833 return None
1834 break
1835
Georg Brandl38c6a222006-05-10 16:26:03 +00001836 # Some old tar programs represent a directory as a regular
1837 # file with a trailing slash.
1838 if tarinfo.isreg() and tarinfo.name.endswith("/"):
1839 tarinfo.type = DIRTYPE
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001840
Georg Brandl38c6a222006-05-10 16:26:03 +00001841 # Directory names should have a '/' at the end.
Lars Gustäbeld2201442007-04-20 14:49:02 +00001842 if tarinfo.isdir() and not tarinfo.name.endswith("/"):
Georg Brandl38c6a222006-05-10 16:26:03 +00001843 tarinfo.name += "/"
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001844
Martin v. Löwisf3c56112004-09-18 09:08:52 +00001845 self.members.append(tarinfo)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001846 return tarinfo
1847
1848 #--------------------------------------------------------------------------
Georg Brandl38c6a222006-05-10 16:26:03 +00001849 # The following are methods that are called depending on the type of a
1850 # member. The entry point is proc_member() which is called with a TarInfo
1851 # object created from the header block from the current offset. The
1852 # proc_member() method can be overridden in a subclass to add custom
1853 # proc_*() methods. A proc_*() method MUST implement the following
1854 # operations:
1855 # 1. Set tarinfo.offset_data to the position where the data blocks begin,
1856 # if there is data that follows.
1857 # 2. Set self.offset to the position where the next member's header will
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001858 # begin.
Georg Brandl38c6a222006-05-10 16:26:03 +00001859 # 3. Return tarinfo or another valid TarInfo object.
1860 def proc_member(self, tarinfo):
1861 """Choose the right processing method for tarinfo depending
1862 on its type and call it.
1863 """
1864 if tarinfo.type in (GNUTYPE_LONGNAME, GNUTYPE_LONGLINK):
1865 return self.proc_gnulong(tarinfo)
1866 elif tarinfo.type == GNUTYPE_SPARSE:
1867 return self.proc_sparse(tarinfo)
1868 else:
1869 return self.proc_builtin(tarinfo)
1870
1871 def proc_builtin(self, tarinfo):
1872 """Process a builtin type member or an unknown member
1873 which will be treated as a regular file.
1874 """
1875 tarinfo.offset_data = self.offset
1876 if tarinfo.isreg() or tarinfo.type not in SUPPORTED_TYPES:
1877 # Skip the following data blocks.
1878 self.offset += self._block(tarinfo.size)
1879 return tarinfo
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001880
1881 def proc_gnulong(self, tarinfo):
Georg Brandl38c6a222006-05-10 16:26:03 +00001882 """Process the blocks that hold a GNU longname
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001883 or longlink member.
1884 """
1885 buf = ""
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001886 count = tarinfo.size
1887 while count > 0:
1888 block = self.fileobj.read(BLOCKSIZE)
1889 buf += block
1890 self.offset += BLOCKSIZE
1891 count -= BLOCKSIZE
1892
Georg Brandl38c6a222006-05-10 16:26:03 +00001893 # Fetch the next header and process it.
1894 b = self.fileobj.read(BLOCKSIZE)
1895 t = TarInfo.frombuf(b)
1896 t.offset = self.offset
1897 self.offset += BLOCKSIZE
1898 next = self.proc_member(t)
Andrew M. Kuchling864bba12004-07-10 22:02:11 +00001899
Georg Brandl38c6a222006-05-10 16:26:03 +00001900 # Patch the TarInfo object from the next header with
1901 # the longname information.
Andrew M. Kuchling864bba12004-07-10 22:02:11 +00001902 next.offset = tarinfo.offset
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001903 if tarinfo.type == GNUTYPE_LONGNAME:
Lars Gustäbel08303db2008-02-11 18:36:07 +00001904 next.name = nts(buf)
Andrew M. Kuchling864bba12004-07-10 22:02:11 +00001905 elif tarinfo.type == GNUTYPE_LONGLINK:
Lars Gustäbel08303db2008-02-11 18:36:07 +00001906 next.linkname = nts(buf)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001907
Andrew M. Kuchling864bba12004-07-10 22:02:11 +00001908 return next
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001909
1910 def proc_sparse(self, tarinfo):
Georg Brandl38c6a222006-05-10 16:26:03 +00001911 """Process a GNU sparse header plus extra headers.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001912 """
Georg Brandl38c6a222006-05-10 16:26:03 +00001913 buf = tarinfo.buf
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001914 sp = _ringbuffer()
1915 pos = 386
1916 lastpos = 0L
1917 realpos = 0L
1918 # There are 4 possible sparse structs in the
1919 # first header.
1920 for i in xrange(4):
1921 try:
Georg Brandl38c6a222006-05-10 16:26:03 +00001922 offset = nti(buf[pos:pos + 12])
1923 numbytes = nti(buf[pos + 12:pos + 24])
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001924 except ValueError:
1925 break
1926 if offset > lastpos:
1927 sp.append(_hole(lastpos, offset - lastpos))
1928 sp.append(_data(offset, numbytes, realpos))
1929 realpos += numbytes
1930 lastpos = offset + numbytes
1931 pos += 24
1932
1933 isextended = ord(buf[482])
Georg Brandl38c6a222006-05-10 16:26:03 +00001934 origsize = nti(buf[483:495])
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001935
1936 # If the isextended flag is given,
1937 # there are extra headers to process.
1938 while isextended == 1:
1939 buf = self.fileobj.read(BLOCKSIZE)
1940 self.offset += BLOCKSIZE
1941 pos = 0
1942 for i in xrange(21):
1943 try:
Georg Brandl38c6a222006-05-10 16:26:03 +00001944 offset = nti(buf[pos:pos + 12])
1945 numbytes = nti(buf[pos + 12:pos + 24])
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001946 except ValueError:
1947 break
1948 if offset > lastpos:
1949 sp.append(_hole(lastpos, offset - lastpos))
1950 sp.append(_data(offset, numbytes, realpos))
1951 realpos += numbytes
1952 lastpos = offset + numbytes
1953 pos += 24
1954 isextended = ord(buf[504])
1955
1956 if lastpos < origsize:
1957 sp.append(_hole(lastpos, origsize - lastpos))
1958
1959 tarinfo.sparse = sp
1960
1961 tarinfo.offset_data = self.offset
1962 self.offset += self._block(tarinfo.size)
1963 tarinfo.size = origsize
Andrew M. Kuchling864bba12004-07-10 22:02:11 +00001964
Georg Brandl38c6a222006-05-10 16:26:03 +00001965 return tarinfo
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001966
1967 #--------------------------------------------------------------------------
1968 # Little helper methods:
1969
1970 def _block(self, count):
1971 """Round up a byte count by BLOCKSIZE and return it,
1972 e.g. _block(834) => 1024.
1973 """
1974 blocks, remainder = divmod(count, BLOCKSIZE)
1975 if remainder:
1976 blocks += 1
1977 return blocks * BLOCKSIZE
1978
1979 def _getmember(self, name, tarinfo=None):
1980 """Find an archive member by name from bottom to top.
1981 If tarinfo is given, it is used as the starting point.
1982 """
Martin v. Löwisf3c56112004-09-18 09:08:52 +00001983 # Ensure that all members have been loaded.
1984 members = self.getmembers()
1985
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001986 if tarinfo is None:
Martin v. Löwisf3c56112004-09-18 09:08:52 +00001987 end = len(members)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001988 else:
Martin v. Löwisf3c56112004-09-18 09:08:52 +00001989 end = members.index(tarinfo)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001990
1991 for i in xrange(end - 1, -1, -1):
Martin v. Löwisf3c56112004-09-18 09:08:52 +00001992 if name == members[i].name:
1993 return members[i]
Andrew M. Kuchling864bba12004-07-10 22:02:11 +00001994
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001995 def _load(self):
1996 """Read through the entire archive file and look for readable
1997 members.
1998 """
1999 while True:
2000 tarinfo = self.next()
2001 if tarinfo is None:
2002 break
2003 self._loaded = True
2004
2005 def _check(self, mode=None):
2006 """Check if TarFile is still open, and if the operation's mode
2007 corresponds to TarFile's mode.
2008 """
2009 if self.closed:
Georg Brandle4751e32006-05-18 06:11:19 +00002010 raise IOError("%s is closed" % self.__class__.__name__)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002011 if mode is not None and self._mode not in mode:
Georg Brandle4751e32006-05-18 06:11:19 +00002012 raise IOError("bad operation for mode %r" % self._mode)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002013
2014 def __iter__(self):
2015 """Provide an iterator object.
2016 """
2017 if self._loaded:
2018 return iter(self.members)
2019 else:
2020 return TarIter(self)
2021
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002022 def _dbg(self, level, msg):
2023 """Write debugging output to sys.stderr.
2024 """
2025 if level <= self.debug:
2026 print >> sys.stderr, msg
2027# class TarFile
2028
2029class TarIter:
2030 """Iterator Class.
2031
2032 for tarinfo in TarFile(...):
2033 suite...
2034 """
2035
2036 def __init__(self, tarfile):
2037 """Construct a TarIter object.
2038 """
2039 self.tarfile = tarfile
Martin v. Löwis637431b2005-03-03 23:12:42 +00002040 self.index = 0
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002041 def __iter__(self):
2042 """Return iterator object.
2043 """
2044 return self
2045 def next(self):
2046 """Return the next item using TarFile's next() method.
2047 When all members have been read, set TarFile as _loaded.
2048 """
Martin v. Löwis637431b2005-03-03 23:12:42 +00002049 # Fix for SF #1100429: Under rare circumstances it can
2050 # happen that getmembers() is called during iteration,
2051 # which will cause TarIter to stop prematurely.
2052 if not self.tarfile._loaded:
2053 tarinfo = self.tarfile.next()
2054 if not tarinfo:
2055 self.tarfile._loaded = True
2056 raise StopIteration
2057 else:
2058 try:
2059 tarinfo = self.tarfile.members[self.index]
2060 except IndexError:
2061 raise StopIteration
2062 self.index += 1
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002063 return tarinfo
2064
2065# Helper classes for sparse file support
2066class _section:
2067 """Base class for _data and _hole.
2068 """
2069 def __init__(self, offset, size):
2070 self.offset = offset
2071 self.size = size
2072 def __contains__(self, offset):
2073 return self.offset <= offset < self.offset + self.size
2074
2075class _data(_section):
2076 """Represent a data section in a sparse file.
2077 """
2078 def __init__(self, offset, size, realpos):
2079 _section.__init__(self, offset, size)
2080 self.realpos = realpos
2081
2082class _hole(_section):
2083 """Represent a hole section in a sparse file.
2084 """
2085 pass
2086
2087class _ringbuffer(list):
2088 """Ringbuffer class which increases performance
2089 over a regular list.
2090 """
2091 def __init__(self):
2092 self.idx = 0
2093 def find(self, offset):
2094 idx = self.idx
2095 while True:
2096 item = self[idx]
2097 if offset in item:
2098 break
2099 idx += 1
2100 if idx == len(self):
2101 idx = 0
2102 if idx == self.idx:
2103 # End of File
2104 return None
2105 self.idx = idx
2106 return item
2107
2108#---------------------------------------------
2109# zipfile compatible TarFile class
2110#---------------------------------------------
2111TAR_PLAIN = 0 # zipfile.ZIP_STORED
2112TAR_GZIPPED = 8 # zipfile.ZIP_DEFLATED
2113class TarFileCompat:
2114 """TarFile class compatible with standard module zipfile's
2115 ZipFile class.
2116 """
2117 def __init__(self, file, mode="r", compression=TAR_PLAIN):
2118 if compression == TAR_PLAIN:
2119 self.tarfile = TarFile.taropen(file, mode)
2120 elif compression == TAR_GZIPPED:
2121 self.tarfile = TarFile.gzopen(file, mode)
2122 else:
Georg Brandle4751e32006-05-18 06:11:19 +00002123 raise ValueError("unknown compression constant")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002124 if mode[0:1] == "r":
2125 members = self.tarfile.getmembers()
Raymond Hettingera1d09e22005-09-11 16:34:05 +00002126 for m in members:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002127 m.filename = m.name
2128 m.file_size = m.size
2129 m.date_time = time.gmtime(m.mtime)[:6]
2130 def namelist(self):
2131 return map(lambda m: m.name, self.infolist())
2132 def infolist(self):
2133 return filter(lambda m: m.type in REGULAR_TYPES,
2134 self.tarfile.getmembers())
2135 def printdir(self):
2136 self.tarfile.list()
2137 def testzip(self):
2138 return
2139 def getinfo(self, name):
2140 return self.tarfile.getmember(name)
2141 def read(self, name):
2142 return self.tarfile.extractfile(self.tarfile.getmember(name)).read()
2143 def write(self, filename, arcname=None, compress_type=None):
2144 self.tarfile.add(filename, arcname)
2145 def writestr(self, zinfo, bytes):
Raymond Hettingera6172712004-12-31 19:15:26 +00002146 try:
2147 from cStringIO import StringIO
2148 except ImportError:
2149 from StringIO import StringIO
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002150 import calendar
2151 zinfo.name = zinfo.filename
2152 zinfo.size = zinfo.file_size
2153 zinfo.mtime = calendar.timegm(zinfo.date_time)
Raymond Hettingera6172712004-12-31 19:15:26 +00002154 self.tarfile.addfile(zinfo, StringIO(bytes))
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002155 def close(self):
2156 self.tarfile.close()
2157#class TarFileCompat
2158
2159#--------------------
2160# exported functions
2161#--------------------
2162def is_tarfile(name):
2163 """Return True if name points to a tar archive that we
2164 are able to handle, else return False.
2165 """
2166 try:
2167 t = open(name)
2168 t.close()
2169 return True
2170 except TarError:
2171 return False
2172
2173open = TarFile.open