blob: 8bce5d007542b7704efc4e87cb22324a36c802ac [file] [log] [blame]
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001#!/usr/bin/env python
2# -*- coding: iso-8859-1 -*-
3#-------------------------------------------------------------------
4# tarfile.py
5#-------------------------------------------------------------------
6# Copyright (C) 2002 Lars Gustäbel <lars@gustaebel.de>
7# All rights reserved.
8#
9# Permission is hereby granted, free of charge, to any person
10# obtaining a copy of this software and associated documentation
11# files (the "Software"), to deal in the Software without
12# restriction, including without limitation the rights to use,
13# copy, modify, merge, publish, distribute, sublicense, and/or sell
14# copies of the Software, and to permit persons to whom the
15# Software is furnished to do so, subject to the following
16# conditions:
17#
18# The above copyright notice and this permission notice shall be
19# included in all copies or substantial portions of the Software.
20#
21# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
22# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
23# OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
24# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
25# HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
26# WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
27# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
28# OTHER DEALINGS IN THE SOFTWARE.
29#
30"""Read from and write to tar format archives.
31"""
32
33__version__ = "$Revision$"
34# $Source$
35
36version = "0.6.4"
37__author__ = "Lars Gustäbel (lars@gustaebel.de)"
38__date__ = "$Date$"
39__cvsid__ = "$Id$"
40__credits__ = "Gustavo Niemeyer, Niels Gustäbel, Richard Townsend."
41
42#---------
43# Imports
44#---------
45import sys
46import os
47import shutil
48import stat
49import errno
50import time
51import struct
52
Jack Jansencfc49022003-03-07 13:37:32 +000053if sys.platform == 'mac':
54 # This module needs work for MacOS9, especially in the area of pathname
55 # handling. In many places it is assumed a simple substitution of / by the
56 # local os.path.sep is good enough to convert pathnames, but this does not
57 # work with the mac rooted:path:name versus :nonrooted:path:name syntax
58 raise ImportError, "tarfile does not work for platform==mac"
59
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +000060try:
61 import grp, pwd
62except ImportError:
63 grp = pwd = None
64
65# from tarfile import *
66__all__ = ["TarFile", "TarInfo", "is_tarfile", "TarError"]
67
68#---------------------------------------------------------
69# tar constants
70#---------------------------------------------------------
71NUL = "\0" # the null character
72BLOCKSIZE = 512 # length of processing blocks
73RECORDSIZE = BLOCKSIZE * 20 # length of records
74MAGIC = "ustar" # magic tar string
75VERSION = "00" # version number
76
77LENGTH_NAME = 100 # maximum length of a filename
78LENGTH_LINK = 100 # maximum length of a linkname
79LENGTH_PREFIX = 155 # maximum length of the prefix field
80MAXSIZE_MEMBER = 077777777777L # maximum size of a file (11 octal digits)
81
82REGTYPE = "0" # regular file
83AREGTYPE = "\0" # regular file
84LNKTYPE = "1" # link (inside tarfile)
85SYMTYPE = "2" # symbolic link
86CHRTYPE = "3" # character special device
87BLKTYPE = "4" # block special device
88DIRTYPE = "5" # directory
89FIFOTYPE = "6" # fifo special device
90CONTTYPE = "7" # contiguous file
91
92GNUTYPE_LONGNAME = "L" # GNU tar extension for longnames
93GNUTYPE_LONGLINK = "K" # GNU tar extension for longlink
94GNUTYPE_SPARSE = "S" # GNU tar extension for sparse file
95
96#---------------------------------------------------------
97# tarfile constants
98#---------------------------------------------------------
99SUPPORTED_TYPES = (REGTYPE, AREGTYPE, LNKTYPE, # file types that tarfile
100 SYMTYPE, DIRTYPE, FIFOTYPE, # can cope with.
101 CONTTYPE, CHRTYPE, BLKTYPE,
102 GNUTYPE_LONGNAME, GNUTYPE_LONGLINK,
103 GNUTYPE_SPARSE)
104
105REGULAR_TYPES = (REGTYPE, AREGTYPE, # file types that somehow
106 CONTTYPE, GNUTYPE_SPARSE) # represent regular files
107
108#---------------------------------------------------------
109# Bits used in the mode field, values in octal.
110#---------------------------------------------------------
111S_IFLNK = 0120000 # symbolic link
112S_IFREG = 0100000 # regular file
113S_IFBLK = 0060000 # block device
114S_IFDIR = 0040000 # directory
115S_IFCHR = 0020000 # character device
116S_IFIFO = 0010000 # fifo
117
118TSUID = 04000 # set UID on execution
119TSGID = 02000 # set GID on execution
120TSVTX = 01000 # reserved
121
122TUREAD = 0400 # read by owner
123TUWRITE = 0200 # write by owner
124TUEXEC = 0100 # execute/search by owner
125TGREAD = 0040 # read by group
126TGWRITE = 0020 # write by group
127TGEXEC = 0010 # execute/search by group
128TOREAD = 0004 # read by other
129TOWRITE = 0002 # write by other
130TOEXEC = 0001 # execute/search by other
131
132#---------------------------------------------------------
133# Some useful functions
134#---------------------------------------------------------
135def nts(s):
136 """Convert a null-terminated string buffer to a python string.
137 """
Andrew M. Kuchling864bba12004-07-10 22:02:11 +0000138 return s.rstrip(NUL)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000139
140def calc_chksum(buf):
141 """Calculate the checksum for a member's header. It's a simple addition
142 of all bytes, treating the chksum field as if filled with spaces.
143 buf is a 512 byte long string buffer which holds the header.
144 """
145 chk = 256 # chksum field is treated as blanks,
146 # so the initial value is 8 * ord(" ")
147 for c in buf[:148]: chk += ord(c) # sum up all bytes before chksum
148 for c in buf[156:]: chk += ord(c) # sum up all bytes after chksum
149 return chk
150
151def copyfileobj(src, dst, length=None):
152 """Copy length bytes from fileobj src to fileobj dst.
153 If length is None, copy the entire content.
154 """
155 if length == 0:
156 return
157 if length is None:
158 shutil.copyfileobj(src, dst)
159 return
160
161 BUFSIZE = 16 * 1024
162 blocks, remainder = divmod(length, BUFSIZE)
163 for b in xrange(blocks):
164 buf = src.read(BUFSIZE)
165 if len(buf) < BUFSIZE:
166 raise IOError, "end of file reached"
167 dst.write(buf)
168
169 if remainder != 0:
170 buf = src.read(remainder)
171 if len(buf) < remainder:
172 raise IOError, "end of file reached"
173 dst.write(buf)
174 return
175
176filemode_table = (
Andrew M. Kuchling8bc462f2004-10-20 11:48:42 +0000177 ((S_IFLNK, "l"),
178 (S_IFREG, "-"),
179 (S_IFBLK, "b"),
180 (S_IFDIR, "d"),
181 (S_IFCHR, "c"),
182 (S_IFIFO, "p")),
183
184 ((TUREAD, "r"),),
185 ((TUWRITE, "w"),),
186 ((TUEXEC|TSUID, "s"),
187 (TSUID, "S"),
188 (TUEXEC, "x")),
189
190 ((TGREAD, "r"),),
191 ((TGWRITE, "w"),),
192 ((TGEXEC|TSGID, "s"),
193 (TSGID, "S"),
194 (TGEXEC, "x")),
195
196 ((TOREAD, "r"),),
197 ((TOWRITE, "w"),),
198 ((TOEXEC|TSVTX, "t"),
199 (TSVTX, "T"),
200 (TOEXEC, "x"))
201)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000202
203def filemode(mode):
204 """Convert a file's mode to a string of the form
205 -rwxrwxrwx.
206 Used by TarFile.list()
207 """
Andrew M. Kuchling8bc462f2004-10-20 11:48:42 +0000208 perm = []
209 for table in filemode_table:
210 for bit, char in table:
211 if mode & bit == bit:
212 perm.append(char)
213 break
214 else:
215 perm.append("-")
216 return "".join(perm)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000217
218if os.sep != "/":
219 normpath = lambda path: os.path.normpath(path).replace(os.sep, "/")
220else:
221 normpath = os.path.normpath
222
223class TarError(Exception):
224 """Base exception."""
225 pass
226class ExtractError(TarError):
227 """General exception for extract errors."""
228 pass
229class ReadError(TarError):
230 """Exception for unreadble tar archives."""
231 pass
232class CompressionError(TarError):
233 """Exception for unavailable compression methods."""
234 pass
235class StreamError(TarError):
236 """Exception for unsupported operations on stream-like TarFiles."""
237 pass
238
239#---------------------------
240# internal stream interface
241#---------------------------
242class _LowLevelFile:
243 """Low-level file object. Supports reading and writing.
244 It is used instead of a regular file object for streaming
245 access.
246 """
247
248 def __init__(self, name, mode):
249 mode = {
250 "r": os.O_RDONLY,
251 "w": os.O_WRONLY | os.O_CREAT | os.O_TRUNC,
252 }[mode]
253 if hasattr(os, "O_BINARY"):
254 mode |= os.O_BINARY
255 self.fd = os.open(name, mode)
256
257 def close(self):
258 os.close(self.fd)
259
260 def read(self, size):
261 return os.read(self.fd, size)
262
263 def write(self, s):
264 os.write(self.fd, s)
265
266class _Stream:
267 """Class that serves as an adapter between TarFile and
268 a stream-like object. The stream-like object only
269 needs to have a read() or write() method and is accessed
270 blockwise. Use of gzip or bzip2 compression is possible.
271 A stream-like object could be for example: sys.stdin,
272 sys.stdout, a socket, a tape device etc.
273
274 _Stream is intended to be used only internally.
275 """
276
277 def __init__(self, name, mode, type, fileobj, bufsize):
278 """Construct a _Stream object.
279 """
280 self._extfileobj = True
281 if fileobj is None:
282 fileobj = _LowLevelFile(name, mode)
283 self._extfileobj = False
284
285 self.name = name or ""
286 self.mode = mode
287 self.type = type
288 self.fileobj = fileobj
289 self.bufsize = bufsize
290 self.buf = ""
291 self.pos = 0L
292 self.closed = False
293
294 if type == "gz":
295 try:
296 import zlib
297 except ImportError:
298 raise CompressionError, "zlib module is not available"
299 self.zlib = zlib
300 self.crc = zlib.crc32("")
301 if mode == "r":
302 self._init_read_gz()
303 else:
304 self._init_write_gz()
305
306 if type == "bz2":
307 try:
308 import bz2
309 except ImportError:
310 raise CompressionError, "bz2 module is not available"
311 if mode == "r":
312 self.dbuf = ""
313 self.cmp = bz2.BZ2Decompressor()
314 else:
315 self.cmp = bz2.BZ2Compressor()
316
317 def __del__(self):
318 if not self.closed:
319 self.close()
320
321 def _init_write_gz(self):
322 """Initialize for writing with gzip compression.
323 """
324 self.cmp = self.zlib.compressobj(9, self.zlib.DEFLATED,
325 -self.zlib.MAX_WBITS,
326 self.zlib.DEF_MEM_LEVEL,
327 0)
328 timestamp = struct.pack("<L", long(time.time()))
329 self.__write("\037\213\010\010%s\002\377" % timestamp)
330 if self.name.endswith(".gz"):
331 self.name = self.name[:-3]
332 self.__write(self.name + NUL)
333
334 def write(self, s):
335 """Write string s to the stream.
336 """
337 if self.type == "gz":
338 self.crc = self.zlib.crc32(s, self.crc)
339 self.pos += len(s)
340 if self.type != "tar":
341 s = self.cmp.compress(s)
342 self.__write(s)
343
344 def __write(self, s):
345 """Write string s to the stream if a whole new block
346 is ready to be written.
347 """
348 self.buf += s
349 while len(self.buf) > self.bufsize:
350 self.fileobj.write(self.buf[:self.bufsize])
351 self.buf = self.buf[self.bufsize:]
352
353 def close(self):
354 """Close the _Stream object. No operation should be
355 done on it afterwards.
356 """
357 if self.closed:
358 return
359
Martin v. Löwisc234a522004-08-22 21:28:33 +0000360 if self.mode == "w" and self.type != "tar":
361 self.buf += self.cmp.flush()
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000362 if self.mode == "w" and self.buf:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000363 self.fileobj.write(self.buf)
364 self.buf = ""
365 if self.type == "gz":
366 self.fileobj.write(struct.pack("<l", self.crc))
Andrew M. Kuchling10a44492003-10-24 17:38:34 +0000367 self.fileobj.write(struct.pack("<L", self.pos & 0xffffFFFFL))
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000368
369 if not self._extfileobj:
370 self.fileobj.close()
371
372 self.closed = True
373
374 def _init_read_gz(self):
375 """Initialize for reading a gzip compressed fileobj.
376 """
377 self.cmp = self.zlib.decompressobj(-self.zlib.MAX_WBITS)
378 self.dbuf = ""
379
380 # taken from gzip.GzipFile with some alterations
381 if self.__read(2) != "\037\213":
382 raise ReadError, "not a gzip file"
383 if self.__read(1) != "\010":
384 raise CompressionError, "unsupported compression method"
385
386 flag = ord(self.__read(1))
387 self.__read(6)
388
389 if flag & 4:
390 xlen = ord(self.__read(1)) + 256 * ord(self.__read(1))
391 self.read(xlen)
392 if flag & 8:
393 while True:
394 s = self.__read(1)
395 if not s or s == NUL:
396 break
397 if flag & 16:
398 while True:
399 s = self.__read(1)
400 if not s or s == NUL:
401 break
402 if flag & 2:
403 self.__read(2)
404
405 def tell(self):
406 """Return the stream's file pointer position.
407 """
408 return self.pos
409
410 def seek(self, pos=0):
411 """Set the stream's file pointer to pos. Negative seeking
412 is forbidden.
413 """
414 if pos - self.pos >= 0:
415 blocks, remainder = divmod(pos - self.pos, self.bufsize)
416 for i in xrange(blocks):
417 self.read(self.bufsize)
418 self.read(remainder)
419 else:
420 raise StreamError, "seeking backwards is not allowed"
421 return self.pos
422
423 def read(self, size=None):
424 """Return the next size number of bytes from the stream.
425 If size is not defined, return all bytes of the stream
426 up to EOF.
427 """
428 if size is None:
429 t = []
430 while True:
431 buf = self._read(self.bufsize)
432 if not buf:
433 break
434 t.append(buf)
435 buf = "".join(t)
436 else:
437 buf = self._read(size)
438 self.pos += len(buf)
439 return buf
440
441 def _read(self, size):
442 """Return size bytes from the stream.
443 """
444 if self.type == "tar":
445 return self.__read(size)
446
447 c = len(self.dbuf)
448 t = [self.dbuf]
449 while c < size:
450 buf = self.__read(self.bufsize)
451 if not buf:
452 break
453 buf = self.cmp.decompress(buf)
454 t.append(buf)
455 c += len(buf)
456 t = "".join(t)
457 self.dbuf = t[size:]
458 return t[:size]
459
460 def __read(self, size):
461 """Return size bytes from stream. If internal buffer is empty,
462 read another block from the stream.
463 """
464 c = len(self.buf)
465 t = [self.buf]
466 while c < size:
467 buf = self.fileobj.read(self.bufsize)
468 if not buf:
469 break
470 t.append(buf)
471 c += len(buf)
472 t = "".join(t)
473 self.buf = t[size:]
474 return t[:size]
475# class _Stream
476
477#------------------------
478# Extraction file object
479#------------------------
480class ExFileObject(object):
481 """File-like object for reading an archive member.
482 Is returned by TarFile.extractfile(). Support for
483 sparse files included.
484 """
485
486 def __init__(self, tarfile, tarinfo):
487 self.fileobj = tarfile.fileobj
488 self.name = tarinfo.name
489 self.mode = "r"
490 self.closed = False
491 self.offset = tarinfo.offset_data
492 self.size = tarinfo.size
493 self.pos = 0L
494 self.linebuffer = ""
495 if tarinfo.issparse():
496 self.sparse = tarinfo.sparse
497 self.read = self._readsparse
498 else:
499 self.read = self._readnormal
500
501 def __read(self, size):
502 """Overloadable read method.
503 """
504 return self.fileobj.read(size)
505
506 def readline(self, size=-1):
507 """Read a line with approx. size. If size is negative,
508 read a whole line. readline() and read() must not
509 be mixed up (!).
510 """
511 if size < 0:
512 size = sys.maxint
513
514 nl = self.linebuffer.find("\n")
515 if nl >= 0:
516 nl = min(nl, size)
517 else:
518 size -= len(self.linebuffer)
Martin v. Löwisc11d6f12004-08-25 10:52:58 +0000519 while (nl < 0 and size > 0):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000520 buf = self.read(min(size, 100))
521 if not buf:
522 break
523 self.linebuffer += buf
524 size -= len(buf)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000525 nl = self.linebuffer.find("\n")
526 if nl == -1:
527 s = self.linebuffer
528 self.linebuffer = ""
529 return s
530 buf = self.linebuffer[:nl]
531 self.linebuffer = self.linebuffer[nl + 1:]
532 while buf[-1:] == "\r":
533 buf = buf[:-1]
534 return buf + "\n"
535
536 def readlines(self):
537 """Return a list with all (following) lines.
538 """
539 result = []
540 while True:
541 line = self.readline()
542 if not line: break
543 result.append(line)
544 return result
545
546 def _readnormal(self, size=None):
547 """Read operation for regular files.
548 """
549 if self.closed:
550 raise ValueError, "file is closed"
551 self.fileobj.seek(self.offset + self.pos)
552 bytesleft = self.size - self.pos
553 if size is None:
554 bytestoread = bytesleft
555 else:
556 bytestoread = min(size, bytesleft)
557 self.pos += bytestoread
558 return self.__read(bytestoread)
559
560 def _readsparse(self, size=None):
561 """Read operation for sparse files.
562 """
563 if self.closed:
564 raise ValueError, "file is closed"
565
566 if size is None:
567 size = self.size - self.pos
568
569 data = []
570 while size > 0:
571 buf = self._readsparsesection(size)
572 if not buf:
573 break
574 size -= len(buf)
575 data.append(buf)
576 return "".join(data)
577
578 def _readsparsesection(self, size):
579 """Read a single section of a sparse file.
580 """
581 section = self.sparse.find(self.pos)
582
583 if section is None:
584 return ""
585
586 toread = min(size, section.offset + section.size - self.pos)
587 if isinstance(section, _data):
588 realpos = section.realpos + self.pos - section.offset
589 self.pos += toread
590 self.fileobj.seek(self.offset + realpos)
591 return self.__read(toread)
592 else:
593 self.pos += toread
594 return NUL * toread
595
596 def tell(self):
597 """Return the current file position.
598 """
599 return self.pos
600
601 def seek(self, pos, whence=0):
602 """Seek to a position in the file.
603 """
604 self.linebuffer = ""
605 if whence == 0:
606 self.pos = min(max(pos, 0), self.size)
607 if whence == 1:
608 if pos < 0:
609 self.pos = max(self.pos + pos, 0)
610 else:
611 self.pos = min(self.pos + pos, self.size)
612 if whence == 2:
613 self.pos = max(min(self.size + pos, self.size), 0)
614
615 def close(self):
616 """Close the file object.
617 """
618 self.closed = True
Martin v. Löwisdf241532005-03-03 08:17:42 +0000619
620 def __iter__(self):
621 """Get an iterator over the file object.
622 """
623 if self.closed:
624 raise ValueError("I/O operation on closed file")
625 return self
626
627 def next(self):
628 """Get the next item from the file iterator.
629 """
630 result = self.readline()
631 if not result:
632 raise StopIteration
633 return result
634
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000635#class ExFileObject
636
637#------------------
638# Exported Classes
639#------------------
640class TarInfo(object):
641 """Informational class which holds the details about an
642 archive member given by a tar header block.
643 TarInfo objects are returned by TarFile.getmember(),
644 TarFile.getmembers() and TarFile.gettarinfo() and are
645 usually created internally.
646 """
647
648 def __init__(self, name=""):
649 """Construct a TarInfo object. name is the optional name
650 of the member.
651 """
652
653 self.name = name # member name (dirnames must end with '/')
654 self.mode = 0666 # file permissions
655 self.uid = 0 # user id
656 self.gid = 0 # group id
657 self.size = 0 # file size
658 self.mtime = 0 # modification time
659 self.chksum = 0 # header checksum
660 self.type = REGTYPE # member type
661 self.linkname = "" # link name
662 self.uname = "user" # user name
663 self.gname = "group" # group name
664 self.devmajor = 0 #-
665 self.devminor = 0 #-for use with CHRTYPE and BLKTYPE
666 self.prefix = "" # prefix to filename or holding information
667 # about sparse files
668
669 self.offset = 0 # the tar header starts here
670 self.offset_data = 0 # the file's data starts here
671
672 def __repr__(self):
673 return "<%s %r at %#x>" % (self.__class__.__name__,self.name,id(self))
674
Guido van Rossum75b64e62005-01-16 00:16:11 +0000675 @classmethod
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000676 def frombuf(cls, buf):
677 """Construct a TarInfo object from a 512 byte string buffer.
678 """
679 tarinfo = cls()
Neal Norwitzd96d1012004-07-20 22:23:02 +0000680 tarinfo.name = nts(buf[0:100])
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000681 tarinfo.mode = int(buf[100:108], 8)
682 tarinfo.uid = int(buf[108:116],8)
683 tarinfo.gid = int(buf[116:124],8)
Neal Norwitzd96d1012004-07-20 22:23:02 +0000684
685 # There are two possible codings for the size field we
686 # have to discriminate, see comment in tobuf() below.
687 if buf[124] != chr(0200):
688 tarinfo.size = long(buf[124:136], 8)
689 else:
690 tarinfo.size = 0L
691 for i in range(11):
692 tarinfo.size <<= 8
693 tarinfo.size += ord(buf[125 + i])
694
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000695 tarinfo.mtime = long(buf[136:148], 8)
696 tarinfo.chksum = int(buf[148:156], 8)
697 tarinfo.type = buf[156:157]
698 tarinfo.linkname = nts(buf[157:257])
699 tarinfo.uname = nts(buf[265:297])
700 tarinfo.gname = nts(buf[297:329])
701 try:
702 tarinfo.devmajor = int(buf[329:337], 8)
703 tarinfo.devminor = int(buf[337:345], 8)
704 except ValueError:
705 tarinfo.devmajor = tarinfo.devmajor = 0
Jack Jansen2b4b5a52003-04-22 22:03:11 +0000706 tarinfo.prefix = buf[345:500]
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000707
708 # The prefix field is used for filenames > 100 in
709 # the POSIX standard.
Jack Jansen2b4b5a52003-04-22 22:03:11 +0000710 # name = prefix + '/' + name
711 if tarinfo.type != GNUTYPE_SPARSE:
712 tarinfo.name = normpath(os.path.join(nts(tarinfo.prefix), tarinfo.name))
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000713
714 # Directory names should have a '/' at the end.
715 if tarinfo.isdir() and tarinfo.name[-1:] != "/":
716 tarinfo.name += "/"
717 return tarinfo
718
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000719 def tobuf(self):
720 """Return a tar header block as a 512 byte string.
721 """
Neal Norwitzd96d1012004-07-20 22:23:02 +0000722 # Prefer the size to be encoded as 11 octal ascii digits
723 # which is the most portable. If the size exceeds this
724 # limit (>= 8 GB), encode it as an 88-bit value which is
725 # a GNU tar feature.
726 if self.size <= MAXSIZE_MEMBER:
727 size = "%011o" % self.size
728 else:
729 s = self.size
730 size = ""
731 for i in range(11):
732 size = chr(s & 0377) + size
733 s >>= 8
734 size = chr(0200) + size
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000735
736 # The following code was contributed by Detlef Lannert.
737 parts = []
738 for value, fieldsize in (
Neal Norwitzd96d1012004-07-20 22:23:02 +0000739 (self.name, 100),
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000740 ("%07o" % (self.mode & 07777), 8),
741 ("%07o" % self.uid, 8),
742 ("%07o" % self.gid, 8),
Neal Norwitzd96d1012004-07-20 22:23:02 +0000743 (size, 12),
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000744 ("%011o" % self.mtime, 12),
745 (" ", 8),
746 (self.type, 1),
747 (self.linkname, 100),
748 (MAGIC, 6),
749 (VERSION, 2),
750 (self.uname, 32),
751 (self.gname, 32),
752 ("%07o" % self.devmajor, 8),
753 ("%07o" % self.devminor, 8),
754 (self.prefix, 155)
755 ):
756 l = len(value)
Andrew M. Kuchling864bba12004-07-10 22:02:11 +0000757 parts.append(value[:fieldsize] + (fieldsize - l) * NUL)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000758
759 buf = "".join(parts)
760 chksum = calc_chksum(buf)
761 buf = buf[:148] + "%06o\0" % chksum + buf[155:]
762 buf += (BLOCKSIZE - len(buf)) * NUL
763 self.buf = buf
764 return buf
765
766 def isreg(self):
767 return self.type in REGULAR_TYPES
768 def isfile(self):
769 return self.isreg()
770 def isdir(self):
771 return self.type == DIRTYPE
772 def issym(self):
773 return self.type == SYMTYPE
774 def islnk(self):
775 return self.type == LNKTYPE
776 def ischr(self):
777 return self.type == CHRTYPE
778 def isblk(self):
779 return self.type == BLKTYPE
780 def isfifo(self):
781 return self.type == FIFOTYPE
782 def issparse(self):
783 return self.type == GNUTYPE_SPARSE
784 def isdev(self):
785 return self.type in (CHRTYPE, BLKTYPE, FIFOTYPE)
786# class TarInfo
787
788class TarFile(object):
789 """The TarFile Class provides an interface to tar archives.
790 """
791
792 debug = 0 # May be set from 0 (no msgs) to 3 (all msgs)
793
794 dereference = False # If true, add content of linked file to the
795 # tar file, else the link.
796
797 ignore_zeros = False # If true, skips empty or invalid blocks and
798 # continues processing.
799
800 errorlevel = 0 # If 0, fatal errors only appear in debug
801 # messages (if debug >= 0). If > 0, errors
802 # are passed to the caller as exceptions.
803
Martin v. Löwis75b9da42004-08-18 13:57:44 +0000804 posix = False # If True, generates POSIX.1-1990-compliant
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000805 # archives (no GNU extensions!)
806
807 fileobject = ExFileObject
808
809 def __init__(self, name=None, mode="r", fileobj=None):
810 """Open an (uncompressed) tar archive `name'. `mode' is either 'r' to
811 read from an existing archive, 'a' to append data to an existing
812 file or 'w' to create a new file overwriting an existing one. `mode'
813 defaults to 'r'.
814 If `fileobj' is given, it is used for reading or writing data. If it
815 can be determined, `mode' is overridden by `fileobj's mode.
816 `fileobj' is not closed, when TarFile is closed.
817 """
818 self.name = name
819
820 if len(mode) > 1 or mode not in "raw":
821 raise ValueError, "mode must be 'r', 'a' or 'w'"
822 self._mode = mode
823 self.mode = {"r": "rb", "a": "r+b", "w": "wb"}[mode]
824
825 if not fileobj:
826 fileobj = file(self.name, self.mode)
827 self._extfileobj = False
828 else:
829 if self.name is None and hasattr(fileobj, "name"):
830 self.name = fileobj.name
831 if hasattr(fileobj, "mode"):
832 self.mode = fileobj.mode
833 self._extfileobj = True
834 self.fileobj = fileobj
835
836 # Init datastructures
837 self.closed = False
838 self.members = [] # list of members as TarInfo objects
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000839 self._loaded = False # flag if all members have been read
840 self.offset = 0L # current position in the archive file
841 self.inodes = {} # dictionary caching the inodes of
842 # archive members already added
843
844 if self._mode == "r":
845 self.firstmember = None
846 self.firstmember = self.next()
847
848 if self._mode == "a":
849 # Move to the end of the archive,
850 # before the first empty block.
851 self.firstmember = None
852 while True:
853 try:
854 tarinfo = self.next()
855 except ReadError:
856 self.fileobj.seek(0)
857 break
858 if tarinfo is None:
859 self.fileobj.seek(- BLOCKSIZE, 1)
860 break
861
862 if self._mode in "aw":
863 self._loaded = True
864
865 #--------------------------------------------------------------------------
866 # Below are the classmethods which act as alternate constructors to the
867 # TarFile class. The open() method is the only one that is needed for
868 # public use; it is the "super"-constructor and is able to select an
869 # adequate "sub"-constructor for a particular compression using the mapping
870 # from OPEN_METH.
871 #
872 # This concept allows one to subclass TarFile without losing the comfort of
873 # the super-constructor. A sub-constructor is registered and made available
874 # by adding it to the mapping in OPEN_METH.
875
Guido van Rossum75b64e62005-01-16 00:16:11 +0000876 @classmethod
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000877 def open(cls, name=None, mode="r", fileobj=None, bufsize=20*512):
878 """Open a tar archive for reading, writing or appending. Return
879 an appropriate TarFile class.
880
881 mode:
882 'r' open for reading with transparent compression
883 'r:' open for reading exclusively uncompressed
884 'r:gz' open for reading with gzip compression
885 'r:bz2' open for reading with bzip2 compression
886 'a' or 'a:' open for appending
887 'w' or 'w:' open for writing without compression
888 'w:gz' open for writing with gzip compression
889 'w:bz2' open for writing with bzip2 compression
890 'r|' open an uncompressed stream of tar blocks for reading
891 'r|gz' open a gzip compressed stream of tar blocks
892 'r|bz2' open a bzip2 compressed stream of tar blocks
893 'w|' open an uncompressed stream for writing
894 'w|gz' open a gzip compressed stream for writing
895 'w|bz2' open a bzip2 compressed stream for writing
896 """
897
898 if not name and not fileobj:
899 raise ValueError, "nothing to open"
900
901 if ":" in mode:
902 filemode, comptype = mode.split(":", 1)
903 filemode = filemode or "r"
904 comptype = comptype or "tar"
905
906 # Select the *open() function according to
907 # given compression.
908 if comptype in cls.OPEN_METH:
909 func = getattr(cls, cls.OPEN_METH[comptype])
910 else:
911 raise CompressionError, "unknown compression type %r" % comptype
912 return func(name, filemode, fileobj)
913
914 elif "|" in mode:
915 filemode, comptype = mode.split("|", 1)
916 filemode = filemode or "r"
917 comptype = comptype or "tar"
918
919 if filemode not in "rw":
920 raise ValueError, "mode must be 'r' or 'w'"
921
922 t = cls(name, filemode,
923 _Stream(name, filemode, comptype, fileobj, bufsize))
924 t._extfileobj = False
925 return t
926
927 elif mode == "r":
928 # Find out which *open() is appropriate for opening the file.
929 for comptype in cls.OPEN_METH:
930 func = getattr(cls, cls.OPEN_METH[comptype])
931 try:
932 return func(name, "r", fileobj)
933 except (ReadError, CompressionError):
934 continue
935 raise ReadError, "file could not be opened successfully"
936
937 elif mode in "aw":
938 return cls.taropen(name, mode, fileobj)
939
940 raise ValueError, "undiscernible mode"
941
Guido van Rossum75b64e62005-01-16 00:16:11 +0000942 @classmethod
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000943 def taropen(cls, name, mode="r", fileobj=None):
944 """Open uncompressed tar archive name for reading or writing.
945 """
946 if len(mode) > 1 or mode not in "raw":
947 raise ValueError, "mode must be 'r', 'a' or 'w'"
948 return cls(name, mode, fileobj)
949
Guido van Rossum75b64e62005-01-16 00:16:11 +0000950 @classmethod
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000951 def gzopen(cls, name, mode="r", fileobj=None, compresslevel=9):
952 """Open gzip compressed tar archive name for reading or writing.
953 Appending is not allowed.
954 """
955 if len(mode) > 1 or mode not in "rw":
956 raise ValueError, "mode must be 'r' or 'w'"
957
958 try:
959 import gzip
Neal Norwitz4ec68242003-04-11 03:05:56 +0000960 gzip.GzipFile
961 except (ImportError, AttributeError):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000962 raise CompressionError, "gzip module is not available"
963
964 pre, ext = os.path.splitext(name)
965 pre = os.path.basename(pre)
966 if ext == ".tgz":
967 ext = ".tar"
968 if ext == ".gz":
969 ext = ""
970 tarname = pre + ext
971
972 if fileobj is None:
973 fileobj = file(name, mode + "b")
974
975 if mode != "r":
976 name = tarname
977
978 try:
979 t = cls.taropen(tarname, mode,
980 gzip.GzipFile(name, mode, compresslevel, fileobj)
981 )
982 except IOError:
983 raise ReadError, "not a gzip file"
984 t._extfileobj = False
985 return t
986
Guido van Rossum75b64e62005-01-16 00:16:11 +0000987 @classmethod
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000988 def bz2open(cls, name, mode="r", fileobj=None, compresslevel=9):
989 """Open bzip2 compressed tar archive name for reading or writing.
990 Appending is not allowed.
991 """
992 if len(mode) > 1 or mode not in "rw":
993 raise ValueError, "mode must be 'r' or 'w'."
994
995 try:
996 import bz2
997 except ImportError:
998 raise CompressionError, "bz2 module is not available"
999
1000 pre, ext = os.path.splitext(name)
1001 pre = os.path.basename(pre)
1002 if ext == ".tbz2":
1003 ext = ".tar"
1004 if ext == ".bz2":
1005 ext = ""
1006 tarname = pre + ext
1007
1008 if fileobj is not None:
1009 raise ValueError, "no support for external file objects"
1010
1011 try:
1012 t = cls.taropen(tarname, mode, bz2.BZ2File(name, mode, compresslevel=compresslevel))
1013 except IOError:
1014 raise ReadError, "not a bzip2 file"
1015 t._extfileobj = False
1016 return t
1017
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001018 # All *open() methods are registered here.
1019 OPEN_METH = {
1020 "tar": "taropen", # uncompressed tar
1021 "gz": "gzopen", # gzip compressed tar
1022 "bz2": "bz2open" # bzip2 compressed tar
1023 }
1024
1025 #--------------------------------------------------------------------------
1026 # The public methods which TarFile provides:
1027
1028 def close(self):
1029 """Close the TarFile. In write-mode, two finishing zero blocks are
1030 appended to the archive.
1031 """
1032 if self.closed:
1033 return
1034
1035 if self._mode in "aw":
1036 self.fileobj.write(NUL * (BLOCKSIZE * 2))
1037 self.offset += (BLOCKSIZE * 2)
1038 # fill up the end with zero-blocks
1039 # (like option -b20 for tar does)
1040 blocks, remainder = divmod(self.offset, RECORDSIZE)
1041 if remainder > 0:
1042 self.fileobj.write(NUL * (RECORDSIZE - remainder))
1043
1044 if not self._extfileobj:
1045 self.fileobj.close()
1046 self.closed = True
1047
1048 def getmember(self, name):
1049 """Return a TarInfo object for member `name'. If `name' can not be
1050 found in the archive, KeyError is raised. If a member occurs more
1051 than once in the archive, its last occurence is assumed to be the
1052 most up-to-date version.
1053 """
Martin v. Löwisf3c56112004-09-18 09:08:52 +00001054 tarinfo = self._getmember(name)
1055 if tarinfo is None:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001056 raise KeyError, "filename %r not found" % name
Martin v. Löwisf3c56112004-09-18 09:08:52 +00001057 return tarinfo
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001058
1059 def getmembers(self):
1060 """Return the members of the archive as a list of TarInfo objects. The
1061 list has the same order as the members in the archive.
1062 """
1063 self._check()
1064 if not self._loaded: # if we want to obtain a list of
1065 self._load() # all members, we first have to
1066 # scan the whole archive.
1067 return self.members
1068
1069 def getnames(self):
1070 """Return the members of the archive as a list of their names. It has
1071 the same order as the list returned by getmembers().
1072 """
Martin v. Löwisf3c56112004-09-18 09:08:52 +00001073 return [tarinfo.name for tarinfo in self.getmembers()]
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001074
1075 def gettarinfo(self, name=None, arcname=None, fileobj=None):
1076 """Create a TarInfo object for either the file `name' or the file
1077 object `fileobj' (using os.fstat on its file descriptor). You can
1078 modify some of the TarInfo's attributes before you add it using
1079 addfile(). If given, `arcname' specifies an alternative name for the
1080 file in the archive.
1081 """
1082 self._check("aw")
1083
1084 # When fileobj is given, replace name by
1085 # fileobj's real name.
1086 if fileobj is not None:
1087 name = fileobj.name
1088
1089 # Building the name of the member in the archive.
1090 # Backward slashes are converted to forward slashes,
1091 # Absolute paths are turned to relative paths.
1092 if arcname is None:
1093 arcname = name
1094 arcname = normpath(arcname)
1095 drv, arcname = os.path.splitdrive(arcname)
1096 while arcname[0:1] == "/":
1097 arcname = arcname[1:]
1098
1099 # Now, fill the TarInfo object with
1100 # information specific for the file.
1101 tarinfo = TarInfo()
1102
1103 # Use os.stat or os.lstat, depending on platform
1104 # and if symlinks shall be resolved.
1105 if fileobj is None:
1106 if hasattr(os, "lstat") and not self.dereference:
1107 statres = os.lstat(name)
1108 else:
1109 statres = os.stat(name)
1110 else:
1111 statres = os.fstat(fileobj.fileno())
1112 linkname = ""
1113
1114 stmd = statres.st_mode
1115 if stat.S_ISREG(stmd):
1116 inode = (statres.st_ino, statres.st_dev)
1117 if inode in self.inodes and not self.dereference:
1118 # Is it a hardlink to an already
1119 # archived file?
1120 type = LNKTYPE
1121 linkname = self.inodes[inode]
1122 else:
1123 # The inode is added only if its valid.
1124 # For win32 it is always 0.
1125 type = REGTYPE
1126 if inode[0]:
1127 self.inodes[inode] = arcname
1128 elif stat.S_ISDIR(stmd):
1129 type = DIRTYPE
1130 if arcname[-1:] != "/":
1131 arcname += "/"
1132 elif stat.S_ISFIFO(stmd):
1133 type = FIFOTYPE
1134 elif stat.S_ISLNK(stmd):
1135 type = SYMTYPE
1136 linkname = os.readlink(name)
1137 elif stat.S_ISCHR(stmd):
1138 type = CHRTYPE
1139 elif stat.S_ISBLK(stmd):
1140 type = BLKTYPE
1141 else:
1142 return None
1143
1144 # Fill the TarInfo object with all
1145 # information we can get.
1146 tarinfo.name = arcname
1147 tarinfo.mode = stmd
1148 tarinfo.uid = statres.st_uid
1149 tarinfo.gid = statres.st_gid
Martin v. Löwis61d77e02004-08-20 06:35:46 +00001150 if stat.S_ISDIR(stmd):
1151 # For a directory, the size must be 0
1152 tarinfo.size = 0
1153 else:
1154 tarinfo.size = statres.st_size
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001155 tarinfo.mtime = statres.st_mtime
1156 tarinfo.type = type
1157 tarinfo.linkname = linkname
1158 if pwd:
1159 try:
1160 tarinfo.uname = pwd.getpwuid(tarinfo.uid)[0]
1161 except KeyError:
1162 pass
1163 if grp:
1164 try:
1165 tarinfo.gname = grp.getgrgid(tarinfo.gid)[0]
1166 except KeyError:
1167 pass
1168
1169 if type in (CHRTYPE, BLKTYPE):
1170 if hasattr(os, "major") and hasattr(os, "minor"):
1171 tarinfo.devmajor = os.major(statres.st_rdev)
1172 tarinfo.devminor = os.minor(statres.st_rdev)
1173 return tarinfo
1174
1175 def list(self, verbose=True):
1176 """Print a table of contents to sys.stdout. If `verbose' is False, only
1177 the names of the members are printed. If it is True, an `ls -l'-like
1178 output is produced.
1179 """
1180 self._check()
1181
1182 for tarinfo in self:
1183 if verbose:
1184 print filemode(tarinfo.mode),
1185 print "%s/%s" % (tarinfo.uname or tarinfo.uid,
1186 tarinfo.gname or tarinfo.gid),
1187 if tarinfo.ischr() or tarinfo.isblk():
1188 print "%10s" % ("%d,%d" \
1189 % (tarinfo.devmajor, tarinfo.devminor)),
1190 else:
1191 print "%10d" % tarinfo.size,
1192 print "%d-%02d-%02d %02d:%02d:%02d" \
1193 % time.localtime(tarinfo.mtime)[:6],
1194
1195 print tarinfo.name,
1196
1197 if verbose:
1198 if tarinfo.issym():
1199 print "->", tarinfo.linkname,
1200 if tarinfo.islnk():
1201 print "link to", tarinfo.linkname,
1202 print
1203
1204 def add(self, name, arcname=None, recursive=True):
1205 """Add the file `name' to the archive. `name' may be any type of file
1206 (directory, fifo, symbolic link, etc.). If given, `arcname'
1207 specifies an alternative name for the file in the archive.
1208 Directories are added recursively by default. This can be avoided by
1209 setting `recursive' to False.
1210 """
1211 self._check("aw")
1212
1213 if arcname is None:
1214 arcname = name
1215
1216 # Skip if somebody tries to archive the archive...
1217 if self.name is not None \
1218 and os.path.abspath(name) == os.path.abspath(self.name):
1219 self._dbg(2, "tarfile: Skipped %r" % name)
1220 return
1221
1222 # Special case: The user wants to add the current
1223 # working directory.
1224 if name == ".":
1225 if recursive:
1226 if arcname == ".":
1227 arcname = ""
1228 for f in os.listdir("."):
1229 self.add(f, os.path.join(arcname, f))
1230 return
1231
1232 self._dbg(1, name)
1233
1234 # Create a TarInfo object from the file.
1235 tarinfo = self.gettarinfo(name, arcname)
1236
1237 if tarinfo is None:
1238 self._dbg(1, "tarfile: Unsupported type %r" % name)
1239 return
1240
1241 # Append the tar header and data to the archive.
1242 if tarinfo.isreg():
1243 f = file(name, "rb")
1244 self.addfile(tarinfo, f)
1245 f.close()
1246
1247 if tarinfo.type in (LNKTYPE, SYMTYPE, FIFOTYPE, CHRTYPE, BLKTYPE):
1248 tarinfo.size = 0L
1249 self.addfile(tarinfo)
1250
1251 if tarinfo.isdir():
1252 self.addfile(tarinfo)
1253 if recursive:
1254 for f in os.listdir(name):
1255 self.add(os.path.join(name, f), os.path.join(arcname, f))
1256
1257 def addfile(self, tarinfo, fileobj=None):
1258 """Add the TarInfo object `tarinfo' to the archive. If `fileobj' is
1259 given, tarinfo.size bytes are read from it and added to the archive.
1260 You can create TarInfo objects using gettarinfo().
1261 On Windows platforms, `fileobj' should always be opened with mode
1262 'rb' to avoid irritation about the file size.
1263 """
1264 self._check("aw")
1265
1266 tarinfo.name = normpath(tarinfo.name)
1267 if tarinfo.isdir():
1268 # directories should end with '/'
1269 tarinfo.name += "/"
1270
1271 if tarinfo.linkname:
1272 tarinfo.linkname = normpath(tarinfo.linkname)
1273
1274 if tarinfo.size > MAXSIZE_MEMBER:
Neal Norwitzd96d1012004-07-20 22:23:02 +00001275 if self.posix:
1276 raise ValueError, "file is too large (>= 8 GB)"
1277 else:
1278 self._dbg(2, "tarfile: Created GNU tar largefile header")
1279
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001280
1281 if len(tarinfo.linkname) > LENGTH_LINK:
1282 if self.posix:
1283 raise ValueError, "linkname is too long (>%d)" \
1284 % (LENGTH_LINK)
1285 else:
1286 self._create_gnulong(tarinfo.linkname, GNUTYPE_LONGLINK)
1287 tarinfo.linkname = tarinfo.linkname[:LENGTH_LINK -1]
1288 self._dbg(2, "tarfile: Created GNU tar extension LONGLINK")
1289
1290 if len(tarinfo.name) > LENGTH_NAME:
1291 if self.posix:
1292 prefix = tarinfo.name[:LENGTH_PREFIX + 1]
1293 while prefix and prefix[-1] != "/":
Tim Peters2c60f7a2003-01-29 03:49:43 +00001294 prefix = prefix[:-1]
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001295
1296 name = tarinfo.name[len(prefix):]
1297 prefix = prefix[:-1]
1298
1299 if not prefix or len(name) > LENGTH_NAME:
1300 raise ValueError, "name is too long (>%d)" \
1301 % (LENGTH_NAME)
1302
1303 tarinfo.name = name
1304 tarinfo.prefix = prefix
1305 else:
1306 self._create_gnulong(tarinfo.name, GNUTYPE_LONGNAME)
1307 tarinfo.name = tarinfo.name[:LENGTH_NAME - 1]
1308 self._dbg(2, "tarfile: Created GNU tar extension LONGNAME")
1309
1310 self.fileobj.write(tarinfo.tobuf())
1311 self.offset += BLOCKSIZE
1312
1313 # If there's data to follow, append it.
1314 if fileobj is not None:
1315 copyfileobj(fileobj, self.fileobj, tarinfo.size)
1316 blocks, remainder = divmod(tarinfo.size, BLOCKSIZE)
1317 if remainder > 0:
1318 self.fileobj.write(NUL * (BLOCKSIZE - remainder))
1319 blocks += 1
1320 self.offset += blocks * BLOCKSIZE
1321
Martin v. Löwisf3c56112004-09-18 09:08:52 +00001322 self.members.append(tarinfo)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001323
Martin v. Löwis00a73e72005-03-04 19:40:34 +00001324 def extractall(self, path=".", members=None):
1325 """Extract all members from the archive to the current working
1326 directory and set owner, modification time and permissions on
1327 directories afterwards. `path' specifies a different directory
1328 to extract to. `members' is optional and must be a subset of the
1329 list returned by getmembers().
1330 """
1331 directories = []
1332
1333 if members is None:
1334 members = self
1335
1336 for tarinfo in members:
1337 if tarinfo.isdir():
1338 # Extract directory with a safe mode, so that
1339 # all files below can be extracted as well.
1340 try:
1341 os.makedirs(os.path.join(path, tarinfo.name), 0777)
1342 except EnvironmentError:
1343 pass
1344 directories.append(tarinfo)
1345 else:
1346 self.extract(tarinfo, path)
1347
1348 # Reverse sort directories.
1349 directories.sort(lambda a, b: cmp(a.name, b.name))
1350 directories.reverse()
1351
1352 # Set correct owner, mtime and filemode on directories.
1353 for tarinfo in directories:
1354 path = os.path.join(path, tarinfo.name)
1355 try:
1356 self.chown(tarinfo, path)
1357 self.utime(tarinfo, path)
1358 self.chmod(tarinfo, path)
1359 except ExtractError, e:
1360 if self.errorlevel > 1:
1361 raise
1362 else:
1363 self._dbg(1, "tarfile: %s" % e)
1364
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001365 def extract(self, member, path=""):
1366 """Extract a member from the archive to the current working directory,
1367 using its full name. Its file information is extracted as accurately
1368 as possible. `member' may be a filename or a TarInfo object. You can
1369 specify a different directory using `path'.
1370 """
1371 self._check("r")
1372
1373 if isinstance(member, TarInfo):
1374 tarinfo = member
1375 else:
1376 tarinfo = self.getmember(member)
1377
Neal Norwitza4f651a2004-07-20 22:07:44 +00001378 # Prepare the link target for makelink().
1379 if tarinfo.islnk():
1380 tarinfo._link_target = os.path.join(path, tarinfo.linkname)
1381
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001382 try:
1383 self._extract_member(tarinfo, os.path.join(path, tarinfo.name))
1384 except EnvironmentError, e:
1385 if self.errorlevel > 0:
1386 raise
1387 else:
1388 if e.filename is None:
1389 self._dbg(1, "tarfile: %s" % e.strerror)
1390 else:
1391 self._dbg(1, "tarfile: %s %r" % (e.strerror, e.filename))
1392 except ExtractError, e:
1393 if self.errorlevel > 1:
1394 raise
1395 else:
1396 self._dbg(1, "tarfile: %s" % e)
1397
1398 def extractfile(self, member):
1399 """Extract a member from the archive as a file object. `member' may be
1400 a filename or a TarInfo object. If `member' is a regular file, a
1401 file-like object is returned. If `member' is a link, a file-like
1402 object is constructed from the link's target. If `member' is none of
1403 the above, None is returned.
1404 The file-like object is read-only and provides the following
1405 methods: read(), readline(), readlines(), seek() and tell()
1406 """
1407 self._check("r")
1408
1409 if isinstance(member, TarInfo):
1410 tarinfo = member
1411 else:
1412 tarinfo = self.getmember(member)
1413
1414 if tarinfo.isreg():
1415 return self.fileobject(self, tarinfo)
1416
1417 elif tarinfo.type not in SUPPORTED_TYPES:
1418 # If a member's type is unknown, it is treated as a
1419 # regular file.
1420 return self.fileobject(self, tarinfo)
1421
1422 elif tarinfo.islnk() or tarinfo.issym():
1423 if isinstance(self.fileobj, _Stream):
1424 # A small but ugly workaround for the case that someone tries
1425 # to extract a (sym)link as a file-object from a non-seekable
1426 # stream of tar blocks.
1427 raise StreamError, "cannot extract (sym)link as file object"
1428 else:
1429 # A (sym)link's file object is it's target's file object.
1430 return self.extractfile(self._getmember(tarinfo.linkname,
1431 tarinfo))
1432 else:
1433 # If there's no data associated with the member (directory, chrdev,
1434 # blkdev, etc.), return None instead of a file object.
1435 return None
1436
1437 def _extract_member(self, tarinfo, targetpath):
1438 """Extract the TarInfo object tarinfo to a physical
1439 file called targetpath.
1440 """
1441 # Fetch the TarInfo object for the given name
1442 # and build the destination pathname, replacing
1443 # forward slashes to platform specific separators.
1444 if targetpath[-1:] == "/":
1445 targetpath = targetpath[:-1]
1446 targetpath = os.path.normpath(targetpath)
1447
1448 # Create all upper directories.
1449 upperdirs = os.path.dirname(targetpath)
1450 if upperdirs and not os.path.exists(upperdirs):
1451 ti = TarInfo()
1452 ti.name = upperdirs
1453 ti.type = DIRTYPE
1454 ti.mode = 0777
1455 ti.mtime = tarinfo.mtime
1456 ti.uid = tarinfo.uid
1457 ti.gid = tarinfo.gid
1458 ti.uname = tarinfo.uname
1459 ti.gname = tarinfo.gname
1460 try:
1461 self._extract_member(ti, ti.name)
1462 except:
1463 pass
1464
1465 if tarinfo.islnk() or tarinfo.issym():
1466 self._dbg(1, "%s -> %s" % (tarinfo.name, tarinfo.linkname))
1467 else:
1468 self._dbg(1, tarinfo.name)
1469
1470 if tarinfo.isreg():
1471 self.makefile(tarinfo, targetpath)
1472 elif tarinfo.isdir():
1473 self.makedir(tarinfo, targetpath)
1474 elif tarinfo.isfifo():
1475 self.makefifo(tarinfo, targetpath)
1476 elif tarinfo.ischr() or tarinfo.isblk():
1477 self.makedev(tarinfo, targetpath)
1478 elif tarinfo.islnk() or tarinfo.issym():
1479 self.makelink(tarinfo, targetpath)
1480 elif tarinfo.type not in SUPPORTED_TYPES:
1481 self.makeunknown(tarinfo, targetpath)
1482 else:
1483 self.makefile(tarinfo, targetpath)
1484
1485 self.chown(tarinfo, targetpath)
1486 if not tarinfo.issym():
1487 self.chmod(tarinfo, targetpath)
1488 self.utime(tarinfo, targetpath)
1489
1490 #--------------------------------------------------------------------------
1491 # Below are the different file methods. They are called via
1492 # _extract_member() when extract() is called. They can be replaced in a
1493 # subclass to implement other functionality.
1494
1495 def makedir(self, tarinfo, targetpath):
1496 """Make a directory called targetpath.
1497 """
1498 try:
1499 os.mkdir(targetpath)
1500 except EnvironmentError, e:
1501 if e.errno != errno.EEXIST:
1502 raise
1503
1504 def makefile(self, tarinfo, targetpath):
1505 """Make a file called targetpath.
1506 """
1507 source = self.extractfile(tarinfo)
1508 target = file(targetpath, "wb")
1509 copyfileobj(source, target)
1510 source.close()
1511 target.close()
1512
1513 def makeunknown(self, tarinfo, targetpath):
1514 """Make a file from a TarInfo object with an unknown type
1515 at targetpath.
1516 """
1517 self.makefile(tarinfo, targetpath)
1518 self._dbg(1, "tarfile: Unknown file type %r, " \
1519 "extracted as regular file." % tarinfo.type)
1520
1521 def makefifo(self, tarinfo, targetpath):
1522 """Make a fifo called targetpath.
1523 """
1524 if hasattr(os, "mkfifo"):
1525 os.mkfifo(targetpath)
1526 else:
1527 raise ExtractError, "fifo not supported by system"
1528
1529 def makedev(self, tarinfo, targetpath):
1530 """Make a character or block device called targetpath.
1531 """
1532 if not hasattr(os, "mknod") or not hasattr(os, "makedev"):
1533 raise ExtractError, "special devices not supported by system"
1534
1535 mode = tarinfo.mode
1536 if tarinfo.isblk():
1537 mode |= stat.S_IFBLK
1538 else:
1539 mode |= stat.S_IFCHR
1540
1541 os.mknod(targetpath, mode,
1542 os.makedev(tarinfo.devmajor, tarinfo.devminor))
1543
1544 def makelink(self, tarinfo, targetpath):
1545 """Make a (symbolic) link called targetpath. If it cannot be created
1546 (platform limitation), we try to make a copy of the referenced file
1547 instead of a link.
1548 """
1549 linkpath = tarinfo.linkname
1550 try:
1551 if tarinfo.issym():
1552 os.symlink(linkpath, targetpath)
1553 else:
Neal Norwitza4f651a2004-07-20 22:07:44 +00001554 # See extract().
1555 os.link(tarinfo._link_target, targetpath)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001556 except AttributeError:
1557 if tarinfo.issym():
1558 linkpath = os.path.join(os.path.dirname(tarinfo.name),
1559 linkpath)
1560 linkpath = normpath(linkpath)
1561
1562 try:
1563 self._extract_member(self.getmember(linkpath), targetpath)
1564 except (EnvironmentError, KeyError), e:
1565 linkpath = os.path.normpath(linkpath)
1566 try:
1567 shutil.copy2(linkpath, targetpath)
1568 except EnvironmentError, e:
1569 raise IOError, "link could not be created"
1570
1571 def chown(self, tarinfo, targetpath):
1572 """Set owner of targetpath according to tarinfo.
1573 """
1574 if pwd and hasattr(os, "geteuid") and os.geteuid() == 0:
1575 # We have to be root to do so.
1576 try:
1577 g = grp.getgrnam(tarinfo.gname)[2]
1578 except KeyError:
1579 try:
1580 g = grp.getgrgid(tarinfo.gid)[2]
1581 except KeyError:
1582 g = os.getgid()
1583 try:
1584 u = pwd.getpwnam(tarinfo.uname)[2]
1585 except KeyError:
1586 try:
1587 u = pwd.getpwuid(tarinfo.uid)[2]
1588 except KeyError:
1589 u = os.getuid()
1590 try:
1591 if tarinfo.issym() and hasattr(os, "lchown"):
1592 os.lchown(targetpath, u, g)
1593 else:
Andrew MacIntyre7970d202003-02-19 12:51:34 +00001594 if sys.platform != "os2emx":
1595 os.chown(targetpath, u, g)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001596 except EnvironmentError, e:
1597 raise ExtractError, "could not change owner"
1598
1599 def chmod(self, tarinfo, targetpath):
1600 """Set file permissions of targetpath according to tarinfo.
1601 """
Jack Jansen834eff62003-03-07 12:47:06 +00001602 if hasattr(os, 'chmod'):
1603 try:
1604 os.chmod(targetpath, tarinfo.mode)
1605 except EnvironmentError, e:
1606 raise ExtractError, "could not change mode"
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001607
1608 def utime(self, tarinfo, targetpath):
1609 """Set modification time of targetpath according to tarinfo.
1610 """
Jack Jansen834eff62003-03-07 12:47:06 +00001611 if not hasattr(os, 'utime'):
Tim Petersf9347782003-03-07 15:36:41 +00001612 return
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001613 if sys.platform == "win32" and tarinfo.isdir():
1614 # According to msdn.microsoft.com, it is an error (EACCES)
1615 # to use utime() on directories.
1616 return
1617 try:
1618 os.utime(targetpath, (tarinfo.mtime, tarinfo.mtime))
1619 except EnvironmentError, e:
1620 raise ExtractError, "could not change modification time"
1621
1622 #--------------------------------------------------------------------------
1623
1624 def next(self):
1625 """Return the next member of the archive as a TarInfo object, when
1626 TarFile is opened for reading. Return None if there is no more
1627 available.
1628 """
1629 self._check("ra")
1630 if self.firstmember is not None:
1631 m = self.firstmember
1632 self.firstmember = None
1633 return m
1634
1635 # Read the next block.
Andrew M. Kuchling864bba12004-07-10 22:02:11 +00001636 self.fileobj.seek(self.offset)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001637 while True:
1638 buf = self.fileobj.read(BLOCKSIZE)
1639 if not buf:
1640 return None
1641 try:
1642 tarinfo = TarInfo.frombuf(buf)
1643 except ValueError:
1644 if self.ignore_zeros:
1645 if buf.count(NUL) == BLOCKSIZE:
1646 adj = "empty"
1647 else:
1648 adj = "invalid"
1649 self._dbg(2, "0x%X: %s block" % (self.offset, adj))
1650 self.offset += BLOCKSIZE
1651 continue
1652 else:
1653 # Block is empty or unreadable.
Andrew M. Kuchling864bba12004-07-10 22:02:11 +00001654 if self.offset == 0:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001655 # If the first block is invalid. That does not
1656 # look like a tar archive we can handle.
1657 raise ReadError,"empty, unreadable or compressed file"
1658 return None
1659 break
1660
1661 # We shouldn't rely on this checksum, because some tar programs
1662 # calculate it differently and it is merely validating the
1663 # header block. We could just as well skip this part, which would
1664 # have a slight effect on performance...
1665 if tarinfo.chksum != calc_chksum(buf):
1666 self._dbg(1, "tarfile: Bad Checksum %r" % tarinfo.name)
1667
1668 # Set the TarInfo object's offset to the current position of the
1669 # TarFile and set self.offset to the position where the data blocks
1670 # should begin.
1671 tarinfo.offset = self.offset
1672 self.offset += BLOCKSIZE
1673
1674 # Check if the TarInfo object has a typeflag for which a callback
1675 # method is registered in the TYPE_METH. If so, then call it.
1676 if tarinfo.type in self.TYPE_METH:
Andrew M. Kuchling864bba12004-07-10 22:02:11 +00001677 return self.TYPE_METH[tarinfo.type](self, tarinfo)
1678
1679 tarinfo.offset_data = self.offset
1680 if tarinfo.isreg() or tarinfo.type not in SUPPORTED_TYPES:
1681 # Skip the following data blocks.
1682 self.offset += self._block(tarinfo.size)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001683
1684 if tarinfo.isreg() and tarinfo.name[:-1] == "/":
1685 # some old tar programs don't know DIRTYPE
1686 tarinfo.type = DIRTYPE
1687
Martin v. Löwisf3c56112004-09-18 09:08:52 +00001688 self.members.append(tarinfo)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001689 return tarinfo
1690
1691 #--------------------------------------------------------------------------
1692 # Below are some methods which are called for special typeflags in the
1693 # next() method, e.g. for unwrapping GNU longname/longlink blocks. They
1694 # are registered in TYPE_METH below. You can register your own methods
1695 # with this mapping.
1696 # A registered method is called with a TarInfo object as only argument.
1697 #
1698 # During its execution the method MUST perform the following tasks:
1699 # 1. set tarinfo.offset_data to the position where the data blocks begin,
1700 # if there is data to follow.
1701 # 2. set self.offset to the position where the next member's header will
1702 # begin.
Martin v. Löwisf3c56112004-09-18 09:08:52 +00001703 # 3. append the tarinfo object to self.members, if it is supposed to appear
1704 # as a member of the TarFile object.
Andrew M. Kuchling864bba12004-07-10 22:02:11 +00001705 # 4. return tarinfo or another valid TarInfo object.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001706
1707 def proc_gnulong(self, tarinfo):
1708 """Evaluate the blocks that hold a GNU longname
1709 or longlink member.
1710 """
1711 buf = ""
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001712 count = tarinfo.size
1713 while count > 0:
1714 block = self.fileobj.read(BLOCKSIZE)
1715 buf += block
1716 self.offset += BLOCKSIZE
1717 count -= BLOCKSIZE
1718
Andrew M. Kuchling864bba12004-07-10 22:02:11 +00001719 # Fetch the next header
1720 next = self.next()
1721
1722 next.offset = tarinfo.offset
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001723 if tarinfo.type == GNUTYPE_LONGNAME:
Andrew M. Kuchling864bba12004-07-10 22:02:11 +00001724 next.name = nts(buf)
1725 elif tarinfo.type == GNUTYPE_LONGLINK:
1726 next.linkname = nts(buf)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001727
Andrew M. Kuchling864bba12004-07-10 22:02:11 +00001728 return next
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001729
1730 def proc_sparse(self, tarinfo):
1731 """Analyze a GNU sparse header plus extra headers.
1732 """
1733 buf = tarinfo.tobuf()
1734 sp = _ringbuffer()
1735 pos = 386
1736 lastpos = 0L
1737 realpos = 0L
1738 # There are 4 possible sparse structs in the
1739 # first header.
1740 for i in xrange(4):
1741 try:
1742 offset = int(buf[pos:pos + 12], 8)
1743 numbytes = int(buf[pos + 12:pos + 24], 8)
1744 except ValueError:
1745 break
1746 if offset > lastpos:
1747 sp.append(_hole(lastpos, offset - lastpos))
1748 sp.append(_data(offset, numbytes, realpos))
1749 realpos += numbytes
1750 lastpos = offset + numbytes
1751 pos += 24
1752
1753 isextended = ord(buf[482])
1754 origsize = int(buf[483:495], 8)
1755
1756 # If the isextended flag is given,
1757 # there are extra headers to process.
1758 while isextended == 1:
1759 buf = self.fileobj.read(BLOCKSIZE)
1760 self.offset += BLOCKSIZE
1761 pos = 0
1762 for i in xrange(21):
1763 try:
1764 offset = int(buf[pos:pos + 12], 8)
1765 numbytes = int(buf[pos + 12:pos + 24], 8)
1766 except ValueError:
1767 break
1768 if offset > lastpos:
1769 sp.append(_hole(lastpos, offset - lastpos))
1770 sp.append(_data(offset, numbytes, realpos))
1771 realpos += numbytes
1772 lastpos = offset + numbytes
1773 pos += 24
1774 isextended = ord(buf[504])
1775
1776 if lastpos < origsize:
1777 sp.append(_hole(lastpos, origsize - lastpos))
1778
1779 tarinfo.sparse = sp
1780
1781 tarinfo.offset_data = self.offset
1782 self.offset += self._block(tarinfo.size)
1783 tarinfo.size = origsize
Andrew M. Kuchling864bba12004-07-10 22:02:11 +00001784
Martin v. Löwisf3c56112004-09-18 09:08:52 +00001785 self.members.append(tarinfo)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001786 return tarinfo
1787
1788 # The type mapping for the next() method. The keys are single character
1789 # strings, the typeflag. The values are methods which are called when
1790 # next() encounters such a typeflag.
1791 TYPE_METH = {
1792 GNUTYPE_LONGNAME: proc_gnulong,
1793 GNUTYPE_LONGLINK: proc_gnulong,
1794 GNUTYPE_SPARSE: proc_sparse
1795 }
1796
1797 #--------------------------------------------------------------------------
1798 # Little helper methods:
1799
1800 def _block(self, count):
1801 """Round up a byte count by BLOCKSIZE and return it,
1802 e.g. _block(834) => 1024.
1803 """
1804 blocks, remainder = divmod(count, BLOCKSIZE)
1805 if remainder:
1806 blocks += 1
1807 return blocks * BLOCKSIZE
1808
1809 def _getmember(self, name, tarinfo=None):
1810 """Find an archive member by name from bottom to top.
1811 If tarinfo is given, it is used as the starting point.
1812 """
Martin v. Löwisf3c56112004-09-18 09:08:52 +00001813 # Ensure that all members have been loaded.
1814 members = self.getmembers()
1815
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001816 if tarinfo is None:
Martin v. Löwisf3c56112004-09-18 09:08:52 +00001817 end = len(members)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001818 else:
Martin v. Löwisf3c56112004-09-18 09:08:52 +00001819 end = members.index(tarinfo)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001820
1821 for i in xrange(end - 1, -1, -1):
Martin v. Löwisf3c56112004-09-18 09:08:52 +00001822 if name == members[i].name:
1823 return members[i]
Andrew M. Kuchling864bba12004-07-10 22:02:11 +00001824
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001825 def _load(self):
1826 """Read through the entire archive file and look for readable
1827 members.
1828 """
1829 while True:
1830 tarinfo = self.next()
1831 if tarinfo is None:
1832 break
1833 self._loaded = True
1834
1835 def _check(self, mode=None):
1836 """Check if TarFile is still open, and if the operation's mode
1837 corresponds to TarFile's mode.
1838 """
1839 if self.closed:
1840 raise IOError, "%s is closed" % self.__class__.__name__
1841 if mode is not None and self._mode not in mode:
1842 raise IOError, "bad operation for mode %r" % self._mode
1843
1844 def __iter__(self):
1845 """Provide an iterator object.
1846 """
1847 if self._loaded:
1848 return iter(self.members)
1849 else:
1850 return TarIter(self)
1851
1852 def _create_gnulong(self, name, type):
1853 """Write a GNU longname/longlink member to the TarFile.
1854 It consists of an extended tar header, with the length
1855 of the longname as size, followed by data blocks,
1856 which contain the longname as a null terminated string.
1857 """
Neal Norwitz0662f8a2004-07-20 21:54:18 +00001858 name += NUL
1859
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001860 tarinfo = TarInfo()
1861 tarinfo.name = "././@LongLink"
1862 tarinfo.type = type
1863 tarinfo.mode = 0
1864 tarinfo.size = len(name)
1865
1866 # write extended header
1867 self.fileobj.write(tarinfo.tobuf())
Neal Norwitz0662f8a2004-07-20 21:54:18 +00001868 self.offset += BLOCKSIZE
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001869 # write name blocks
1870 self.fileobj.write(name)
1871 blocks, remainder = divmod(tarinfo.size, BLOCKSIZE)
1872 if remainder > 0:
1873 self.fileobj.write(NUL * (BLOCKSIZE - remainder))
1874 blocks += 1
1875 self.offset += blocks * BLOCKSIZE
1876
1877 def _dbg(self, level, msg):
1878 """Write debugging output to sys.stderr.
1879 """
1880 if level <= self.debug:
1881 print >> sys.stderr, msg
1882# class TarFile
1883
1884class TarIter:
1885 """Iterator Class.
1886
1887 for tarinfo in TarFile(...):
1888 suite...
1889 """
1890
1891 def __init__(self, tarfile):
1892 """Construct a TarIter object.
1893 """
1894 self.tarfile = tarfile
Martin v. Löwis637431b2005-03-03 23:12:42 +00001895 self.index = 0
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001896 def __iter__(self):
1897 """Return iterator object.
1898 """
1899 return self
1900 def next(self):
1901 """Return the next item using TarFile's next() method.
1902 When all members have been read, set TarFile as _loaded.
1903 """
Martin v. Löwis637431b2005-03-03 23:12:42 +00001904 # Fix for SF #1100429: Under rare circumstances it can
1905 # happen that getmembers() is called during iteration,
1906 # which will cause TarIter to stop prematurely.
1907 if not self.tarfile._loaded:
1908 tarinfo = self.tarfile.next()
1909 if not tarinfo:
1910 self.tarfile._loaded = True
1911 raise StopIteration
1912 else:
1913 try:
1914 tarinfo = self.tarfile.members[self.index]
1915 except IndexError:
1916 raise StopIteration
1917 self.index += 1
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001918 return tarinfo
1919
1920# Helper classes for sparse file support
1921class _section:
1922 """Base class for _data and _hole.
1923 """
1924 def __init__(self, offset, size):
1925 self.offset = offset
1926 self.size = size
1927 def __contains__(self, offset):
1928 return self.offset <= offset < self.offset + self.size
1929
1930class _data(_section):
1931 """Represent a data section in a sparse file.
1932 """
1933 def __init__(self, offset, size, realpos):
1934 _section.__init__(self, offset, size)
1935 self.realpos = realpos
1936
1937class _hole(_section):
1938 """Represent a hole section in a sparse file.
1939 """
1940 pass
1941
1942class _ringbuffer(list):
1943 """Ringbuffer class which increases performance
1944 over a regular list.
1945 """
1946 def __init__(self):
1947 self.idx = 0
1948 def find(self, offset):
1949 idx = self.idx
1950 while True:
1951 item = self[idx]
1952 if offset in item:
1953 break
1954 idx += 1
1955 if idx == len(self):
1956 idx = 0
1957 if idx == self.idx:
1958 # End of File
1959 return None
1960 self.idx = idx
1961 return item
1962
1963#---------------------------------------------
1964# zipfile compatible TarFile class
1965#---------------------------------------------
1966TAR_PLAIN = 0 # zipfile.ZIP_STORED
1967TAR_GZIPPED = 8 # zipfile.ZIP_DEFLATED
1968class TarFileCompat:
1969 """TarFile class compatible with standard module zipfile's
1970 ZipFile class.
1971 """
1972 def __init__(self, file, mode="r", compression=TAR_PLAIN):
1973 if compression == TAR_PLAIN:
1974 self.tarfile = TarFile.taropen(file, mode)
1975 elif compression == TAR_GZIPPED:
1976 self.tarfile = TarFile.gzopen(file, mode)
1977 else:
1978 raise ValueError, "unknown compression constant"
1979 if mode[0:1] == "r":
1980 members = self.tarfile.getmembers()
1981 for i in xrange(len(members)):
1982 m = members[i]
1983 m.filename = m.name
1984 m.file_size = m.size
1985 m.date_time = time.gmtime(m.mtime)[:6]
1986 def namelist(self):
1987 return map(lambda m: m.name, self.infolist())
1988 def infolist(self):
1989 return filter(lambda m: m.type in REGULAR_TYPES,
1990 self.tarfile.getmembers())
1991 def printdir(self):
1992 self.tarfile.list()
1993 def testzip(self):
1994 return
1995 def getinfo(self, name):
1996 return self.tarfile.getmember(name)
1997 def read(self, name):
1998 return self.tarfile.extractfile(self.tarfile.getmember(name)).read()
1999 def write(self, filename, arcname=None, compress_type=None):
2000 self.tarfile.add(filename, arcname)
2001 def writestr(self, zinfo, bytes):
Raymond Hettingera6172712004-12-31 19:15:26 +00002002 try:
2003 from cStringIO import StringIO
2004 except ImportError:
2005 from StringIO import StringIO
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002006 import calendar
2007 zinfo.name = zinfo.filename
2008 zinfo.size = zinfo.file_size
2009 zinfo.mtime = calendar.timegm(zinfo.date_time)
Raymond Hettingera6172712004-12-31 19:15:26 +00002010 self.tarfile.addfile(zinfo, StringIO(bytes))
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002011 def close(self):
2012 self.tarfile.close()
2013#class TarFileCompat
2014
2015#--------------------
2016# exported functions
2017#--------------------
2018def is_tarfile(name):
2019 """Return True if name points to a tar archive that we
2020 are able to handle, else return False.
2021 """
2022 try:
2023 t = open(name)
2024 t.close()
2025 return True
2026 except TarError:
2027 return False
2028
2029open = TarFile.open