blob: 32bb87efc8635c48b4e9a0fce8a5ed890cc5aff5 [file] [log] [blame]
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001#!/usr/bin/env python
2# -*- coding: iso-8859-1 -*-
3#-------------------------------------------------------------------
4# tarfile.py
5#-------------------------------------------------------------------
6# Copyright (C) 2002 Lars Gustäbel <lars@gustaebel.de>
7# All rights reserved.
8#
9# Permission is hereby granted, free of charge, to any person
10# obtaining a copy of this software and associated documentation
11# files (the "Software"), to deal in the Software without
12# restriction, including without limitation the rights to use,
13# copy, modify, merge, publish, distribute, sublicense, and/or sell
14# copies of the Software, and to permit persons to whom the
15# Software is furnished to do so, subject to the following
16# conditions:
17#
18# The above copyright notice and this permission notice shall be
19# included in all copies or substantial portions of the Software.
20#
21# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
22# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
23# OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
24# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
25# HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
26# WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
27# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
28# OTHER DEALINGS IN THE SOFTWARE.
29#
30"""Read from and write to tar format archives.
31"""
32
33__version__ = "$Revision$"
34# $Source$
35
36version = "0.6.4"
37__author__ = "Lars Gustäbel (lars@gustaebel.de)"
38__date__ = "$Date$"
39__cvsid__ = "$Id$"
40__credits__ = "Gustavo Niemeyer, Niels Gustäbel, Richard Townsend."
41
42#---------
43# Imports
44#---------
45import sys
46import os
47import shutil
48import stat
49import errno
50import time
51import struct
52
53try:
54 import grp, pwd
55except ImportError:
56 grp = pwd = None
57
58# from tarfile import *
59__all__ = ["TarFile", "TarInfo", "is_tarfile", "TarError"]
60
61#---------------------------------------------------------
62# tar constants
63#---------------------------------------------------------
64NUL = "\0" # the null character
65BLOCKSIZE = 512 # length of processing blocks
66RECORDSIZE = BLOCKSIZE * 20 # length of records
67MAGIC = "ustar" # magic tar string
68VERSION = "00" # version number
69
70LENGTH_NAME = 100 # maximum length of a filename
71LENGTH_LINK = 100 # maximum length of a linkname
72LENGTH_PREFIX = 155 # maximum length of the prefix field
73MAXSIZE_MEMBER = 077777777777L # maximum size of a file (11 octal digits)
74
75REGTYPE = "0" # regular file
76AREGTYPE = "\0" # regular file
77LNKTYPE = "1" # link (inside tarfile)
78SYMTYPE = "2" # symbolic link
79CHRTYPE = "3" # character special device
80BLKTYPE = "4" # block special device
81DIRTYPE = "5" # directory
82FIFOTYPE = "6" # fifo special device
83CONTTYPE = "7" # contiguous file
84
85GNUTYPE_LONGNAME = "L" # GNU tar extension for longnames
86GNUTYPE_LONGLINK = "K" # GNU tar extension for longlink
87GNUTYPE_SPARSE = "S" # GNU tar extension for sparse file
88
89#---------------------------------------------------------
90# tarfile constants
91#---------------------------------------------------------
92SUPPORTED_TYPES = (REGTYPE, AREGTYPE, LNKTYPE, # file types that tarfile
93 SYMTYPE, DIRTYPE, FIFOTYPE, # can cope with.
94 CONTTYPE, CHRTYPE, BLKTYPE,
95 GNUTYPE_LONGNAME, GNUTYPE_LONGLINK,
96 GNUTYPE_SPARSE)
97
98REGULAR_TYPES = (REGTYPE, AREGTYPE, # file types that somehow
99 CONTTYPE, GNUTYPE_SPARSE) # represent regular files
100
101#---------------------------------------------------------
102# Bits used in the mode field, values in octal.
103#---------------------------------------------------------
104S_IFLNK = 0120000 # symbolic link
105S_IFREG = 0100000 # regular file
106S_IFBLK = 0060000 # block device
107S_IFDIR = 0040000 # directory
108S_IFCHR = 0020000 # character device
109S_IFIFO = 0010000 # fifo
110
111TSUID = 04000 # set UID on execution
112TSGID = 02000 # set GID on execution
113TSVTX = 01000 # reserved
114
115TUREAD = 0400 # read by owner
116TUWRITE = 0200 # write by owner
117TUEXEC = 0100 # execute/search by owner
118TGREAD = 0040 # read by group
119TGWRITE = 0020 # write by group
120TGEXEC = 0010 # execute/search by group
121TOREAD = 0004 # read by other
122TOWRITE = 0002 # write by other
123TOEXEC = 0001 # execute/search by other
124
125#---------------------------------------------------------
126# Some useful functions
127#---------------------------------------------------------
128def nts(s):
129 """Convert a null-terminated string buffer to a python string.
130 """
131 return s.split(NUL, 1)[0]
132
133def calc_chksum(buf):
134 """Calculate the checksum for a member's header. It's a simple addition
135 of all bytes, treating the chksum field as if filled with spaces.
136 buf is a 512 byte long string buffer which holds the header.
137 """
138 chk = 256 # chksum field is treated as blanks,
139 # so the initial value is 8 * ord(" ")
140 for c in buf[:148]: chk += ord(c) # sum up all bytes before chksum
141 for c in buf[156:]: chk += ord(c) # sum up all bytes after chksum
142 return chk
143
144def copyfileobj(src, dst, length=None):
145 """Copy length bytes from fileobj src to fileobj dst.
146 If length is None, copy the entire content.
147 """
148 if length == 0:
149 return
150 if length is None:
151 shutil.copyfileobj(src, dst)
152 return
153
154 BUFSIZE = 16 * 1024
155 blocks, remainder = divmod(length, BUFSIZE)
156 for b in xrange(blocks):
157 buf = src.read(BUFSIZE)
158 if len(buf) < BUFSIZE:
159 raise IOError, "end of file reached"
160 dst.write(buf)
161
162 if remainder != 0:
163 buf = src.read(remainder)
164 if len(buf) < remainder:
165 raise IOError, "end of file reached"
166 dst.write(buf)
167 return
168
169filemode_table = (
170 (S_IFLNK, "l",
171 S_IFREG, "-",
172 S_IFBLK, "b",
173 S_IFDIR, "d",
174 S_IFCHR, "c",
175 S_IFIFO, "p"),
176 (TUREAD, "r"),
177 (TUWRITE, "w"),
178 (TUEXEC, "x", TSUID, "S", TUEXEC|TSUID, "s"),
179 (TGREAD, "r"),
180 (TGWRITE, "w"),
181 (TGEXEC, "x", TSGID, "S", TGEXEC|TSGID, "s"),
182 (TOREAD, "r"),
183 (TOWRITE, "w"),
184 (TOEXEC, "x", TSVTX, "T", TOEXEC|TSVTX, "t"))
185
186def filemode(mode):
187 """Convert a file's mode to a string of the form
188 -rwxrwxrwx.
189 Used by TarFile.list()
190 """
191 s = ""
192 for t in filemode_table:
193 while True:
194 if mode & t[0] == t[0]:
195 s += t[1]
196 elif len(t) > 2:
197 t = t[2:]
198 continue
199 else:
200 s += "-"
201 break
202 return s
203
204if os.sep != "/":
205 normpath = lambda path: os.path.normpath(path).replace(os.sep, "/")
206else:
207 normpath = os.path.normpath
208
209class TarError(Exception):
210 """Base exception."""
211 pass
212class ExtractError(TarError):
213 """General exception for extract errors."""
214 pass
215class ReadError(TarError):
216 """Exception for unreadble tar archives."""
217 pass
218class CompressionError(TarError):
219 """Exception for unavailable compression methods."""
220 pass
221class StreamError(TarError):
222 """Exception for unsupported operations on stream-like TarFiles."""
223 pass
224
225#---------------------------
226# internal stream interface
227#---------------------------
228class _LowLevelFile:
229 """Low-level file object. Supports reading and writing.
230 It is used instead of a regular file object for streaming
231 access.
232 """
233
234 def __init__(self, name, mode):
235 mode = {
236 "r": os.O_RDONLY,
237 "w": os.O_WRONLY | os.O_CREAT | os.O_TRUNC,
238 }[mode]
239 if hasattr(os, "O_BINARY"):
240 mode |= os.O_BINARY
241 self.fd = os.open(name, mode)
242
243 def close(self):
244 os.close(self.fd)
245
246 def read(self, size):
247 return os.read(self.fd, size)
248
249 def write(self, s):
250 os.write(self.fd, s)
251
252class _Stream:
253 """Class that serves as an adapter between TarFile and
254 a stream-like object. The stream-like object only
255 needs to have a read() or write() method and is accessed
256 blockwise. Use of gzip or bzip2 compression is possible.
257 A stream-like object could be for example: sys.stdin,
258 sys.stdout, a socket, a tape device etc.
259
260 _Stream is intended to be used only internally.
261 """
262
263 def __init__(self, name, mode, type, fileobj, bufsize):
264 """Construct a _Stream object.
265 """
266 self._extfileobj = True
267 if fileobj is None:
268 fileobj = _LowLevelFile(name, mode)
269 self._extfileobj = False
270
271 self.name = name or ""
272 self.mode = mode
273 self.type = type
274 self.fileobj = fileobj
275 self.bufsize = bufsize
276 self.buf = ""
277 self.pos = 0L
278 self.closed = False
279
280 if type == "gz":
281 try:
282 import zlib
283 except ImportError:
284 raise CompressionError, "zlib module is not available"
285 self.zlib = zlib
286 self.crc = zlib.crc32("")
287 if mode == "r":
288 self._init_read_gz()
289 else:
290 self._init_write_gz()
291
292 if type == "bz2":
293 try:
294 import bz2
295 except ImportError:
296 raise CompressionError, "bz2 module is not available"
297 if mode == "r":
298 self.dbuf = ""
299 self.cmp = bz2.BZ2Decompressor()
300 else:
301 self.cmp = bz2.BZ2Compressor()
302
303 def __del__(self):
304 if not self.closed:
305 self.close()
306
307 def _init_write_gz(self):
308 """Initialize for writing with gzip compression.
309 """
310 self.cmp = self.zlib.compressobj(9, self.zlib.DEFLATED,
311 -self.zlib.MAX_WBITS,
312 self.zlib.DEF_MEM_LEVEL,
313 0)
314 timestamp = struct.pack("<L", long(time.time()))
315 self.__write("\037\213\010\010%s\002\377" % timestamp)
316 if self.name.endswith(".gz"):
317 self.name = self.name[:-3]
318 self.__write(self.name + NUL)
319
320 def write(self, s):
321 """Write string s to the stream.
322 """
323 if self.type == "gz":
324 self.crc = self.zlib.crc32(s, self.crc)
325 self.pos += len(s)
326 if self.type != "tar":
327 s = self.cmp.compress(s)
328 self.__write(s)
329
330 def __write(self, s):
331 """Write string s to the stream if a whole new block
332 is ready to be written.
333 """
334 self.buf += s
335 while len(self.buf) > self.bufsize:
336 self.fileobj.write(self.buf[:self.bufsize])
337 self.buf = self.buf[self.bufsize:]
338
339 def close(self):
340 """Close the _Stream object. No operation should be
341 done on it afterwards.
342 """
343 if self.closed:
344 return
345
346 if self.mode == "w" and self.buf:
347 if self.type != "tar":
348 self.buf += self.cmp.flush()
349 self.fileobj.write(self.buf)
350 self.buf = ""
351 if self.type == "gz":
352 self.fileobj.write(struct.pack("<l", self.crc))
353 self.fileobj.write(struct.pack("<L", self.pos))
354
355 if not self._extfileobj:
356 self.fileobj.close()
357
358 self.closed = True
359
360 def _init_read_gz(self):
361 """Initialize for reading a gzip compressed fileobj.
362 """
363 self.cmp = self.zlib.decompressobj(-self.zlib.MAX_WBITS)
364 self.dbuf = ""
365
366 # taken from gzip.GzipFile with some alterations
367 if self.__read(2) != "\037\213":
368 raise ReadError, "not a gzip file"
369 if self.__read(1) != "\010":
370 raise CompressionError, "unsupported compression method"
371
372 flag = ord(self.__read(1))
373 self.__read(6)
374
375 if flag & 4:
376 xlen = ord(self.__read(1)) + 256 * ord(self.__read(1))
377 self.read(xlen)
378 if flag & 8:
379 while True:
380 s = self.__read(1)
381 if not s or s == NUL:
382 break
383 if flag & 16:
384 while True:
385 s = self.__read(1)
386 if not s or s == NUL:
387 break
388 if flag & 2:
389 self.__read(2)
390
391 def tell(self):
392 """Return the stream's file pointer position.
393 """
394 return self.pos
395
396 def seek(self, pos=0):
397 """Set the stream's file pointer to pos. Negative seeking
398 is forbidden.
399 """
400 if pos - self.pos >= 0:
401 blocks, remainder = divmod(pos - self.pos, self.bufsize)
402 for i in xrange(blocks):
403 self.read(self.bufsize)
404 self.read(remainder)
405 else:
406 raise StreamError, "seeking backwards is not allowed"
407 return self.pos
408
409 def read(self, size=None):
410 """Return the next size number of bytes from the stream.
411 If size is not defined, return all bytes of the stream
412 up to EOF.
413 """
414 if size is None:
415 t = []
416 while True:
417 buf = self._read(self.bufsize)
418 if not buf:
419 break
420 t.append(buf)
421 buf = "".join(t)
422 else:
423 buf = self._read(size)
424 self.pos += len(buf)
425 return buf
426
427 def _read(self, size):
428 """Return size bytes from the stream.
429 """
430 if self.type == "tar":
431 return self.__read(size)
432
433 c = len(self.dbuf)
434 t = [self.dbuf]
435 while c < size:
436 buf = self.__read(self.bufsize)
437 if not buf:
438 break
439 buf = self.cmp.decompress(buf)
440 t.append(buf)
441 c += len(buf)
442 t = "".join(t)
443 self.dbuf = t[size:]
444 return t[:size]
445
446 def __read(self, size):
447 """Return size bytes from stream. If internal buffer is empty,
448 read another block from the stream.
449 """
450 c = len(self.buf)
451 t = [self.buf]
452 while c < size:
453 buf = self.fileobj.read(self.bufsize)
454 if not buf:
455 break
456 t.append(buf)
457 c += len(buf)
458 t = "".join(t)
459 self.buf = t[size:]
460 return t[:size]
461# class _Stream
462
463#------------------------
464# Extraction file object
465#------------------------
466class ExFileObject(object):
467 """File-like object for reading an archive member.
468 Is returned by TarFile.extractfile(). Support for
469 sparse files included.
470 """
471
472 def __init__(self, tarfile, tarinfo):
473 self.fileobj = tarfile.fileobj
474 self.name = tarinfo.name
475 self.mode = "r"
476 self.closed = False
477 self.offset = tarinfo.offset_data
478 self.size = tarinfo.size
479 self.pos = 0L
480 self.linebuffer = ""
481 if tarinfo.issparse():
482 self.sparse = tarinfo.sparse
483 self.read = self._readsparse
484 else:
485 self.read = self._readnormal
486
487 def __read(self, size):
488 """Overloadable read method.
489 """
490 return self.fileobj.read(size)
491
492 def readline(self, size=-1):
493 """Read a line with approx. size. If size is negative,
494 read a whole line. readline() and read() must not
495 be mixed up (!).
496 """
497 if size < 0:
498 size = sys.maxint
499
500 nl = self.linebuffer.find("\n")
501 if nl >= 0:
502 nl = min(nl, size)
503 else:
504 size -= len(self.linebuffer)
505 while nl < 0:
506 buf = self.read(min(size, 100))
507 if not buf:
508 break
509 self.linebuffer += buf
510 size -= len(buf)
511 if size <= 0:
512 break
513 nl = self.linebuffer.find("\n")
514 if nl == -1:
515 s = self.linebuffer
516 self.linebuffer = ""
517 return s
518 buf = self.linebuffer[:nl]
519 self.linebuffer = self.linebuffer[nl + 1:]
520 while buf[-1:] == "\r":
521 buf = buf[:-1]
522 return buf + "\n"
523
524 def readlines(self):
525 """Return a list with all (following) lines.
526 """
527 result = []
528 while True:
529 line = self.readline()
530 if not line: break
531 result.append(line)
532 return result
533
534 def _readnormal(self, size=None):
535 """Read operation for regular files.
536 """
537 if self.closed:
538 raise ValueError, "file is closed"
539 self.fileobj.seek(self.offset + self.pos)
540 bytesleft = self.size - self.pos
541 if size is None:
542 bytestoread = bytesleft
543 else:
544 bytestoread = min(size, bytesleft)
545 self.pos += bytestoread
546 return self.__read(bytestoread)
547
548 def _readsparse(self, size=None):
549 """Read operation for sparse files.
550 """
551 if self.closed:
552 raise ValueError, "file is closed"
553
554 if size is None:
555 size = self.size - self.pos
556
557 data = []
558 while size > 0:
559 buf = self._readsparsesection(size)
560 if not buf:
561 break
562 size -= len(buf)
563 data.append(buf)
564 return "".join(data)
565
566 def _readsparsesection(self, size):
567 """Read a single section of a sparse file.
568 """
569 section = self.sparse.find(self.pos)
570
571 if section is None:
572 return ""
573
574 toread = min(size, section.offset + section.size - self.pos)
575 if isinstance(section, _data):
576 realpos = section.realpos + self.pos - section.offset
577 self.pos += toread
578 self.fileobj.seek(self.offset + realpos)
579 return self.__read(toread)
580 else:
581 self.pos += toread
582 return NUL * toread
583
584 def tell(self):
585 """Return the current file position.
586 """
587 return self.pos
588
589 def seek(self, pos, whence=0):
590 """Seek to a position in the file.
591 """
592 self.linebuffer = ""
593 if whence == 0:
594 self.pos = min(max(pos, 0), self.size)
595 if whence == 1:
596 if pos < 0:
597 self.pos = max(self.pos + pos, 0)
598 else:
599 self.pos = min(self.pos + pos, self.size)
600 if whence == 2:
601 self.pos = max(min(self.size + pos, self.size), 0)
602
603 def close(self):
604 """Close the file object.
605 """
606 self.closed = True
607#class ExFileObject
608
609#------------------
610# Exported Classes
611#------------------
612class TarInfo(object):
613 """Informational class which holds the details about an
614 archive member given by a tar header block.
615 TarInfo objects are returned by TarFile.getmember(),
616 TarFile.getmembers() and TarFile.gettarinfo() and are
617 usually created internally.
618 """
619
620 def __init__(self, name=""):
621 """Construct a TarInfo object. name is the optional name
622 of the member.
623 """
624
625 self.name = name # member name (dirnames must end with '/')
626 self.mode = 0666 # file permissions
627 self.uid = 0 # user id
628 self.gid = 0 # group id
629 self.size = 0 # file size
630 self.mtime = 0 # modification time
631 self.chksum = 0 # header checksum
632 self.type = REGTYPE # member type
633 self.linkname = "" # link name
634 self.uname = "user" # user name
635 self.gname = "group" # group name
636 self.devmajor = 0 #-
637 self.devminor = 0 #-for use with CHRTYPE and BLKTYPE
638 self.prefix = "" # prefix to filename or holding information
639 # about sparse files
640
641 self.offset = 0 # the tar header starts here
642 self.offset_data = 0 # the file's data starts here
643
644 def __repr__(self):
645 return "<%s %r at %#x>" % (self.__class__.__name__,self.name,id(self))
646
647 def frombuf(cls, buf):
648 """Construct a TarInfo object from a 512 byte string buffer.
649 """
650 tarinfo = cls()
651 tarinfo.name = nts(buf[0:100])
652 tarinfo.mode = int(buf[100:108], 8)
653 tarinfo.uid = int(buf[108:116],8)
654 tarinfo.gid = int(buf[116:124],8)
655 tarinfo.size = long(buf[124:136], 8)
656 tarinfo.mtime = long(buf[136:148], 8)
657 tarinfo.chksum = int(buf[148:156], 8)
658 tarinfo.type = buf[156:157]
659 tarinfo.linkname = nts(buf[157:257])
660 tarinfo.uname = nts(buf[265:297])
661 tarinfo.gname = nts(buf[297:329])
662 try:
663 tarinfo.devmajor = int(buf[329:337], 8)
664 tarinfo.devminor = int(buf[337:345], 8)
665 except ValueError:
666 tarinfo.devmajor = tarinfo.devmajor = 0
667
668 # The prefix field is used for filenames > 100 in
669 # the POSIX standard.
670 # name = prefix + "/" + name
671 prefix = buf[345:500]
672 while prefix and prefix[-1] == NUL:
673 prefix = prefix[:-1]
674 if len(prefix.split(NUL)) == 1:
675 tarinfo.prefix = prefix
676 tarinfo.name = normpath(os.path.join(tarinfo.prefix, tarinfo.name))
677 else:
678 tarinfo.prefix = buf[345:500]
679
680 # Directory names should have a '/' at the end.
681 if tarinfo.isdir() and tarinfo.name[-1:] != "/":
682 tarinfo.name += "/"
683 return tarinfo
684
685 frombuf = classmethod(frombuf)
686
687 def tobuf(self):
688 """Return a tar header block as a 512 byte string.
689 """
690 name = self.name
691
692 # The following code was contributed by Detlef Lannert.
693 parts = []
694 for value, fieldsize in (
695 (name, 100),
696 ("%07o" % (self.mode & 07777), 8),
697 ("%07o" % self.uid, 8),
698 ("%07o" % self.gid, 8),
699 ("%011o" % self.size, 12),
700 ("%011o" % self.mtime, 12),
701 (" ", 8),
702 (self.type, 1),
703 (self.linkname, 100),
704 (MAGIC, 6),
705 (VERSION, 2),
706 (self.uname, 32),
707 (self.gname, 32),
708 ("%07o" % self.devmajor, 8),
709 ("%07o" % self.devminor, 8),
710 (self.prefix, 155)
711 ):
712 l = len(value)
713 parts.append(value + (fieldsize - l) * NUL)
714
715 buf = "".join(parts)
716 chksum = calc_chksum(buf)
717 buf = buf[:148] + "%06o\0" % chksum + buf[155:]
718 buf += (BLOCKSIZE - len(buf)) * NUL
719 self.buf = buf
720 return buf
721
722 def isreg(self):
723 return self.type in REGULAR_TYPES
724 def isfile(self):
725 return self.isreg()
726 def isdir(self):
727 return self.type == DIRTYPE
728 def issym(self):
729 return self.type == SYMTYPE
730 def islnk(self):
731 return self.type == LNKTYPE
732 def ischr(self):
733 return self.type == CHRTYPE
734 def isblk(self):
735 return self.type == BLKTYPE
736 def isfifo(self):
737 return self.type == FIFOTYPE
738 def issparse(self):
739 return self.type == GNUTYPE_SPARSE
740 def isdev(self):
741 return self.type in (CHRTYPE, BLKTYPE, FIFOTYPE)
742# class TarInfo
743
744class TarFile(object):
745 """The TarFile Class provides an interface to tar archives.
746 """
747
748 debug = 0 # May be set from 0 (no msgs) to 3 (all msgs)
749
750 dereference = False # If true, add content of linked file to the
751 # tar file, else the link.
752
753 ignore_zeros = False # If true, skips empty or invalid blocks and
754 # continues processing.
755
756 errorlevel = 0 # If 0, fatal errors only appear in debug
757 # messages (if debug >= 0). If > 0, errors
758 # are passed to the caller as exceptions.
759
760 posix = True # If True, generates POSIX.1-1990-compliant
761 # archives (no GNU extensions!)
762
763 fileobject = ExFileObject
764
765 def __init__(self, name=None, mode="r", fileobj=None):
766 """Open an (uncompressed) tar archive `name'. `mode' is either 'r' to
767 read from an existing archive, 'a' to append data to an existing
768 file or 'w' to create a new file overwriting an existing one. `mode'
769 defaults to 'r'.
770 If `fileobj' is given, it is used for reading or writing data. If it
771 can be determined, `mode' is overridden by `fileobj's mode.
772 `fileobj' is not closed, when TarFile is closed.
773 """
774 self.name = name
775
776 if len(mode) > 1 or mode not in "raw":
777 raise ValueError, "mode must be 'r', 'a' or 'w'"
778 self._mode = mode
779 self.mode = {"r": "rb", "a": "r+b", "w": "wb"}[mode]
780
781 if not fileobj:
782 fileobj = file(self.name, self.mode)
783 self._extfileobj = False
784 else:
785 if self.name is None and hasattr(fileobj, "name"):
786 self.name = fileobj.name
787 if hasattr(fileobj, "mode"):
788 self.mode = fileobj.mode
789 self._extfileobj = True
790 self.fileobj = fileobj
791
792 # Init datastructures
793 self.closed = False
794 self.members = [] # list of members as TarInfo objects
795 self.membernames = [] # names of members
796 self.chunks = [0] # chunk cache
797 self._loaded = False # flag if all members have been read
798 self.offset = 0L # current position in the archive file
799 self.inodes = {} # dictionary caching the inodes of
800 # archive members already added
801
802 if self._mode == "r":
803 self.firstmember = None
804 self.firstmember = self.next()
805
806 if self._mode == "a":
807 # Move to the end of the archive,
808 # before the first empty block.
809 self.firstmember = None
810 while True:
811 try:
812 tarinfo = self.next()
813 except ReadError:
814 self.fileobj.seek(0)
815 break
816 if tarinfo is None:
817 self.fileobj.seek(- BLOCKSIZE, 1)
818 break
819
820 if self._mode in "aw":
821 self._loaded = True
822
823 #--------------------------------------------------------------------------
824 # Below are the classmethods which act as alternate constructors to the
825 # TarFile class. The open() method is the only one that is needed for
826 # public use; it is the "super"-constructor and is able to select an
827 # adequate "sub"-constructor for a particular compression using the mapping
828 # from OPEN_METH.
829 #
830 # This concept allows one to subclass TarFile without losing the comfort of
831 # the super-constructor. A sub-constructor is registered and made available
832 # by adding it to the mapping in OPEN_METH.
833
834 def open(cls, name=None, mode="r", fileobj=None, bufsize=20*512):
835 """Open a tar archive for reading, writing or appending. Return
836 an appropriate TarFile class.
837
838 mode:
839 'r' open for reading with transparent compression
840 'r:' open for reading exclusively uncompressed
841 'r:gz' open for reading with gzip compression
842 'r:bz2' open for reading with bzip2 compression
843 'a' or 'a:' open for appending
844 'w' or 'w:' open for writing without compression
845 'w:gz' open for writing with gzip compression
846 'w:bz2' open for writing with bzip2 compression
847 'r|' open an uncompressed stream of tar blocks for reading
848 'r|gz' open a gzip compressed stream of tar blocks
849 'r|bz2' open a bzip2 compressed stream of tar blocks
850 'w|' open an uncompressed stream for writing
851 'w|gz' open a gzip compressed stream for writing
852 'w|bz2' open a bzip2 compressed stream for writing
853 """
854
855 if not name and not fileobj:
856 raise ValueError, "nothing to open"
857
858 if ":" in mode:
859 filemode, comptype = mode.split(":", 1)
860 filemode = filemode or "r"
861 comptype = comptype or "tar"
862
863 # Select the *open() function according to
864 # given compression.
865 if comptype in cls.OPEN_METH:
866 func = getattr(cls, cls.OPEN_METH[comptype])
867 else:
868 raise CompressionError, "unknown compression type %r" % comptype
869 return func(name, filemode, fileobj)
870
871 elif "|" in mode:
872 filemode, comptype = mode.split("|", 1)
873 filemode = filemode or "r"
874 comptype = comptype or "tar"
875
876 if filemode not in "rw":
877 raise ValueError, "mode must be 'r' or 'w'"
878
879 t = cls(name, filemode,
880 _Stream(name, filemode, comptype, fileobj, bufsize))
881 t._extfileobj = False
882 return t
883
884 elif mode == "r":
885 # Find out which *open() is appropriate for opening the file.
886 for comptype in cls.OPEN_METH:
887 func = getattr(cls, cls.OPEN_METH[comptype])
888 try:
889 return func(name, "r", fileobj)
890 except (ReadError, CompressionError):
891 continue
892 raise ReadError, "file could not be opened successfully"
893
894 elif mode in "aw":
895 return cls.taropen(name, mode, fileobj)
896
897 raise ValueError, "undiscernible mode"
898
899 open = classmethod(open)
900
901 def taropen(cls, name, mode="r", fileobj=None):
902 """Open uncompressed tar archive name for reading or writing.
903 """
904 if len(mode) > 1 or mode not in "raw":
905 raise ValueError, "mode must be 'r', 'a' or 'w'"
906 return cls(name, mode, fileobj)
907
908 taropen = classmethod(taropen)
909
910 def gzopen(cls, name, mode="r", fileobj=None, compresslevel=9):
911 """Open gzip compressed tar archive name for reading or writing.
912 Appending is not allowed.
913 """
914 if len(mode) > 1 or mode not in "rw":
915 raise ValueError, "mode must be 'r' or 'w'"
916
917 try:
918 import gzip
919 except ImportError:
920 raise CompressionError, "gzip module is not available"
921
922 pre, ext = os.path.splitext(name)
923 pre = os.path.basename(pre)
924 if ext == ".tgz":
925 ext = ".tar"
926 if ext == ".gz":
927 ext = ""
928 tarname = pre + ext
929
930 if fileobj is None:
931 fileobj = file(name, mode + "b")
932
933 if mode != "r":
934 name = tarname
935
936 try:
937 t = cls.taropen(tarname, mode,
938 gzip.GzipFile(name, mode, compresslevel, fileobj)
939 )
940 except IOError:
941 raise ReadError, "not a gzip file"
942 t._extfileobj = False
943 return t
944
945 gzopen = classmethod(gzopen)
946
947 def bz2open(cls, name, mode="r", fileobj=None, compresslevel=9):
948 """Open bzip2 compressed tar archive name for reading or writing.
949 Appending is not allowed.
950 """
951 if len(mode) > 1 or mode not in "rw":
952 raise ValueError, "mode must be 'r' or 'w'."
953
954 try:
955 import bz2
956 except ImportError:
957 raise CompressionError, "bz2 module is not available"
958
959 pre, ext = os.path.splitext(name)
960 pre = os.path.basename(pre)
961 if ext == ".tbz2":
962 ext = ".tar"
963 if ext == ".bz2":
964 ext = ""
965 tarname = pre + ext
966
967 if fileobj is not None:
968 raise ValueError, "no support for external file objects"
969
970 try:
971 t = cls.taropen(tarname, mode, bz2.BZ2File(name, mode, compresslevel=compresslevel))
972 except IOError:
973 raise ReadError, "not a bzip2 file"
974 t._extfileobj = False
975 return t
976
977 bz2open = classmethod(bz2open)
978
979 # All *open() methods are registered here.
980 OPEN_METH = {
981 "tar": "taropen", # uncompressed tar
982 "gz": "gzopen", # gzip compressed tar
983 "bz2": "bz2open" # bzip2 compressed tar
984 }
985
986 #--------------------------------------------------------------------------
987 # The public methods which TarFile provides:
988
989 def close(self):
990 """Close the TarFile. In write-mode, two finishing zero blocks are
991 appended to the archive.
992 """
993 if self.closed:
994 return
995
996 if self._mode in "aw":
997 self.fileobj.write(NUL * (BLOCKSIZE * 2))
998 self.offset += (BLOCKSIZE * 2)
999 # fill up the end with zero-blocks
1000 # (like option -b20 for tar does)
1001 blocks, remainder = divmod(self.offset, RECORDSIZE)
1002 if remainder > 0:
1003 self.fileobj.write(NUL * (RECORDSIZE - remainder))
1004
1005 if not self._extfileobj:
1006 self.fileobj.close()
1007 self.closed = True
1008
1009 def getmember(self, name):
1010 """Return a TarInfo object for member `name'. If `name' can not be
1011 found in the archive, KeyError is raised. If a member occurs more
1012 than once in the archive, its last occurence is assumed to be the
1013 most up-to-date version.
1014 """
1015 self._check()
1016 if name not in self.membernames and not self._loaded:
1017 self._load()
1018 if name not in self.membernames:
1019 raise KeyError, "filename %r not found" % name
1020 return self._getmember(name)
1021
1022 def getmembers(self):
1023 """Return the members of the archive as a list of TarInfo objects. The
1024 list has the same order as the members in the archive.
1025 """
1026 self._check()
1027 if not self._loaded: # if we want to obtain a list of
1028 self._load() # all members, we first have to
1029 # scan the whole archive.
1030 return self.members
1031
1032 def getnames(self):
1033 """Return the members of the archive as a list of their names. It has
1034 the same order as the list returned by getmembers().
1035 """
1036 self._check()
1037 if not self._loaded:
1038 self._load()
1039 return self.membernames
1040
1041 def gettarinfo(self, name=None, arcname=None, fileobj=None):
1042 """Create a TarInfo object for either the file `name' or the file
1043 object `fileobj' (using os.fstat on its file descriptor). You can
1044 modify some of the TarInfo's attributes before you add it using
1045 addfile(). If given, `arcname' specifies an alternative name for the
1046 file in the archive.
1047 """
1048 self._check("aw")
1049
1050 # When fileobj is given, replace name by
1051 # fileobj's real name.
1052 if fileobj is not None:
1053 name = fileobj.name
1054
1055 # Building the name of the member in the archive.
1056 # Backward slashes are converted to forward slashes,
1057 # Absolute paths are turned to relative paths.
1058 if arcname is None:
1059 arcname = name
1060 arcname = normpath(arcname)
1061 drv, arcname = os.path.splitdrive(arcname)
1062 while arcname[0:1] == "/":
1063 arcname = arcname[1:]
1064
1065 # Now, fill the TarInfo object with
1066 # information specific for the file.
1067 tarinfo = TarInfo()
1068
1069 # Use os.stat or os.lstat, depending on platform
1070 # and if symlinks shall be resolved.
1071 if fileobj is None:
1072 if hasattr(os, "lstat") and not self.dereference:
1073 statres = os.lstat(name)
1074 else:
1075 statres = os.stat(name)
1076 else:
1077 statres = os.fstat(fileobj.fileno())
1078 linkname = ""
1079
1080 stmd = statres.st_mode
1081 if stat.S_ISREG(stmd):
1082 inode = (statres.st_ino, statres.st_dev)
1083 if inode in self.inodes and not self.dereference:
1084 # Is it a hardlink to an already
1085 # archived file?
1086 type = LNKTYPE
1087 linkname = self.inodes[inode]
1088 else:
1089 # The inode is added only if its valid.
1090 # For win32 it is always 0.
1091 type = REGTYPE
1092 if inode[0]:
1093 self.inodes[inode] = arcname
1094 elif stat.S_ISDIR(stmd):
1095 type = DIRTYPE
1096 if arcname[-1:] != "/":
1097 arcname += "/"
1098 elif stat.S_ISFIFO(stmd):
1099 type = FIFOTYPE
1100 elif stat.S_ISLNK(stmd):
1101 type = SYMTYPE
1102 linkname = os.readlink(name)
1103 elif stat.S_ISCHR(stmd):
1104 type = CHRTYPE
1105 elif stat.S_ISBLK(stmd):
1106 type = BLKTYPE
1107 else:
1108 return None
1109
1110 # Fill the TarInfo object with all
1111 # information we can get.
1112 tarinfo.name = arcname
1113 tarinfo.mode = stmd
1114 tarinfo.uid = statres.st_uid
1115 tarinfo.gid = statres.st_gid
1116 tarinfo.size = statres.st_size
1117 tarinfo.mtime = statres.st_mtime
1118 tarinfo.type = type
1119 tarinfo.linkname = linkname
1120 if pwd:
1121 try:
1122 tarinfo.uname = pwd.getpwuid(tarinfo.uid)[0]
1123 except KeyError:
1124 pass
1125 if grp:
1126 try:
1127 tarinfo.gname = grp.getgrgid(tarinfo.gid)[0]
1128 except KeyError:
1129 pass
1130
1131 if type in (CHRTYPE, BLKTYPE):
1132 if hasattr(os, "major") and hasattr(os, "minor"):
1133 tarinfo.devmajor = os.major(statres.st_rdev)
1134 tarinfo.devminor = os.minor(statres.st_rdev)
1135 return tarinfo
1136
1137 def list(self, verbose=True):
1138 """Print a table of contents to sys.stdout. If `verbose' is False, only
1139 the names of the members are printed. If it is True, an `ls -l'-like
1140 output is produced.
1141 """
1142 self._check()
1143
1144 for tarinfo in self:
1145 if verbose:
1146 print filemode(tarinfo.mode),
1147 print "%s/%s" % (tarinfo.uname or tarinfo.uid,
1148 tarinfo.gname or tarinfo.gid),
1149 if tarinfo.ischr() or tarinfo.isblk():
1150 print "%10s" % ("%d,%d" \
1151 % (tarinfo.devmajor, tarinfo.devminor)),
1152 else:
1153 print "%10d" % tarinfo.size,
1154 print "%d-%02d-%02d %02d:%02d:%02d" \
1155 % time.localtime(tarinfo.mtime)[:6],
1156
1157 print tarinfo.name,
1158
1159 if verbose:
1160 if tarinfo.issym():
1161 print "->", tarinfo.linkname,
1162 if tarinfo.islnk():
1163 print "link to", tarinfo.linkname,
1164 print
1165
1166 def add(self, name, arcname=None, recursive=True):
1167 """Add the file `name' to the archive. `name' may be any type of file
1168 (directory, fifo, symbolic link, etc.). If given, `arcname'
1169 specifies an alternative name for the file in the archive.
1170 Directories are added recursively by default. This can be avoided by
1171 setting `recursive' to False.
1172 """
1173 self._check("aw")
1174
1175 if arcname is None:
1176 arcname = name
1177
1178 # Skip if somebody tries to archive the archive...
1179 if self.name is not None \
1180 and os.path.abspath(name) == os.path.abspath(self.name):
1181 self._dbg(2, "tarfile: Skipped %r" % name)
1182 return
1183
1184 # Special case: The user wants to add the current
1185 # working directory.
1186 if name == ".":
1187 if recursive:
1188 if arcname == ".":
1189 arcname = ""
1190 for f in os.listdir("."):
1191 self.add(f, os.path.join(arcname, f))
1192 return
1193
1194 self._dbg(1, name)
1195
1196 # Create a TarInfo object from the file.
1197 tarinfo = self.gettarinfo(name, arcname)
1198
1199 if tarinfo is None:
1200 self._dbg(1, "tarfile: Unsupported type %r" % name)
1201 return
1202
1203 # Append the tar header and data to the archive.
1204 if tarinfo.isreg():
1205 f = file(name, "rb")
1206 self.addfile(tarinfo, f)
1207 f.close()
1208
1209 if tarinfo.type in (LNKTYPE, SYMTYPE, FIFOTYPE, CHRTYPE, BLKTYPE):
1210 tarinfo.size = 0L
1211 self.addfile(tarinfo)
1212
1213 if tarinfo.isdir():
1214 self.addfile(tarinfo)
1215 if recursive:
1216 for f in os.listdir(name):
1217 self.add(os.path.join(name, f), os.path.join(arcname, f))
1218
1219 def addfile(self, tarinfo, fileobj=None):
1220 """Add the TarInfo object `tarinfo' to the archive. If `fileobj' is
1221 given, tarinfo.size bytes are read from it and added to the archive.
1222 You can create TarInfo objects using gettarinfo().
1223 On Windows platforms, `fileobj' should always be opened with mode
1224 'rb' to avoid irritation about the file size.
1225 """
1226 self._check("aw")
1227
1228 tarinfo.name = normpath(tarinfo.name)
1229 if tarinfo.isdir():
1230 # directories should end with '/'
1231 tarinfo.name += "/"
1232
1233 if tarinfo.linkname:
1234 tarinfo.linkname = normpath(tarinfo.linkname)
1235
1236 if tarinfo.size > MAXSIZE_MEMBER:
1237 raise ValueError, "file is too large (>8GB)"
1238
1239 if len(tarinfo.linkname) > LENGTH_LINK:
1240 if self.posix:
1241 raise ValueError, "linkname is too long (>%d)" \
1242 % (LENGTH_LINK)
1243 else:
1244 self._create_gnulong(tarinfo.linkname, GNUTYPE_LONGLINK)
1245 tarinfo.linkname = tarinfo.linkname[:LENGTH_LINK -1]
1246 self._dbg(2, "tarfile: Created GNU tar extension LONGLINK")
1247
1248 if len(tarinfo.name) > LENGTH_NAME:
1249 if self.posix:
1250 prefix = tarinfo.name[:LENGTH_PREFIX + 1]
1251 while prefix and prefix[-1] != "/":
Tim Peters2c60f7a2003-01-29 03:49:43 +00001252 prefix = prefix[:-1]
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001253
1254 name = tarinfo.name[len(prefix):]
1255 prefix = prefix[:-1]
1256
1257 if not prefix or len(name) > LENGTH_NAME:
1258 raise ValueError, "name is too long (>%d)" \
1259 % (LENGTH_NAME)
1260
1261 tarinfo.name = name
1262 tarinfo.prefix = prefix
1263 else:
1264 self._create_gnulong(tarinfo.name, GNUTYPE_LONGNAME)
1265 tarinfo.name = tarinfo.name[:LENGTH_NAME - 1]
1266 self._dbg(2, "tarfile: Created GNU tar extension LONGNAME")
1267
1268 self.fileobj.write(tarinfo.tobuf())
1269 self.offset += BLOCKSIZE
1270
1271 # If there's data to follow, append it.
1272 if fileobj is not None:
1273 copyfileobj(fileobj, self.fileobj, tarinfo.size)
1274 blocks, remainder = divmod(tarinfo.size, BLOCKSIZE)
1275 if remainder > 0:
1276 self.fileobj.write(NUL * (BLOCKSIZE - remainder))
1277 blocks += 1
1278 self.offset += blocks * BLOCKSIZE
1279
1280 self.members.append(tarinfo)
1281 self.membernames.append(tarinfo.name)
1282 self.chunks.append(self.offset)
1283
1284 def extract(self, member, path=""):
1285 """Extract a member from the archive to the current working directory,
1286 using its full name. Its file information is extracted as accurately
1287 as possible. `member' may be a filename or a TarInfo object. You can
1288 specify a different directory using `path'.
1289 """
1290 self._check("r")
1291
1292 if isinstance(member, TarInfo):
1293 tarinfo = member
1294 else:
1295 tarinfo = self.getmember(member)
1296
1297 try:
1298 self._extract_member(tarinfo, os.path.join(path, tarinfo.name))
1299 except EnvironmentError, e:
1300 if self.errorlevel > 0:
1301 raise
1302 else:
1303 if e.filename is None:
1304 self._dbg(1, "tarfile: %s" % e.strerror)
1305 else:
1306 self._dbg(1, "tarfile: %s %r" % (e.strerror, e.filename))
1307 except ExtractError, e:
1308 if self.errorlevel > 1:
1309 raise
1310 else:
1311 self._dbg(1, "tarfile: %s" % e)
1312
1313 def extractfile(self, member):
1314 """Extract a member from the archive as a file object. `member' may be
1315 a filename or a TarInfo object. If `member' is a regular file, a
1316 file-like object is returned. If `member' is a link, a file-like
1317 object is constructed from the link's target. If `member' is none of
1318 the above, None is returned.
1319 The file-like object is read-only and provides the following
1320 methods: read(), readline(), readlines(), seek() and tell()
1321 """
1322 self._check("r")
1323
1324 if isinstance(member, TarInfo):
1325 tarinfo = member
1326 else:
1327 tarinfo = self.getmember(member)
1328
1329 if tarinfo.isreg():
1330 return self.fileobject(self, tarinfo)
1331
1332 elif tarinfo.type not in SUPPORTED_TYPES:
1333 # If a member's type is unknown, it is treated as a
1334 # regular file.
1335 return self.fileobject(self, tarinfo)
1336
1337 elif tarinfo.islnk() or tarinfo.issym():
1338 if isinstance(self.fileobj, _Stream):
1339 # A small but ugly workaround for the case that someone tries
1340 # to extract a (sym)link as a file-object from a non-seekable
1341 # stream of tar blocks.
1342 raise StreamError, "cannot extract (sym)link as file object"
1343 else:
1344 # A (sym)link's file object is it's target's file object.
1345 return self.extractfile(self._getmember(tarinfo.linkname,
1346 tarinfo))
1347 else:
1348 # If there's no data associated with the member (directory, chrdev,
1349 # blkdev, etc.), return None instead of a file object.
1350 return None
1351
1352 def _extract_member(self, tarinfo, targetpath):
1353 """Extract the TarInfo object tarinfo to a physical
1354 file called targetpath.
1355 """
1356 # Fetch the TarInfo object for the given name
1357 # and build the destination pathname, replacing
1358 # forward slashes to platform specific separators.
1359 if targetpath[-1:] == "/":
1360 targetpath = targetpath[:-1]
1361 targetpath = os.path.normpath(targetpath)
1362
1363 # Create all upper directories.
1364 upperdirs = os.path.dirname(targetpath)
1365 if upperdirs and not os.path.exists(upperdirs):
1366 ti = TarInfo()
1367 ti.name = upperdirs
1368 ti.type = DIRTYPE
1369 ti.mode = 0777
1370 ti.mtime = tarinfo.mtime
1371 ti.uid = tarinfo.uid
1372 ti.gid = tarinfo.gid
1373 ti.uname = tarinfo.uname
1374 ti.gname = tarinfo.gname
1375 try:
1376 self._extract_member(ti, ti.name)
1377 except:
1378 pass
1379
1380 if tarinfo.islnk() or tarinfo.issym():
1381 self._dbg(1, "%s -> %s" % (tarinfo.name, tarinfo.linkname))
1382 else:
1383 self._dbg(1, tarinfo.name)
1384
1385 if tarinfo.isreg():
1386 self.makefile(tarinfo, targetpath)
1387 elif tarinfo.isdir():
1388 self.makedir(tarinfo, targetpath)
1389 elif tarinfo.isfifo():
1390 self.makefifo(tarinfo, targetpath)
1391 elif tarinfo.ischr() or tarinfo.isblk():
1392 self.makedev(tarinfo, targetpath)
1393 elif tarinfo.islnk() or tarinfo.issym():
1394 self.makelink(tarinfo, targetpath)
1395 elif tarinfo.type not in SUPPORTED_TYPES:
1396 self.makeunknown(tarinfo, targetpath)
1397 else:
1398 self.makefile(tarinfo, targetpath)
1399
1400 self.chown(tarinfo, targetpath)
1401 if not tarinfo.issym():
1402 self.chmod(tarinfo, targetpath)
1403 self.utime(tarinfo, targetpath)
1404
1405 #--------------------------------------------------------------------------
1406 # Below are the different file methods. They are called via
1407 # _extract_member() when extract() is called. They can be replaced in a
1408 # subclass to implement other functionality.
1409
1410 def makedir(self, tarinfo, targetpath):
1411 """Make a directory called targetpath.
1412 """
1413 try:
1414 os.mkdir(targetpath)
1415 except EnvironmentError, e:
1416 if e.errno != errno.EEXIST:
1417 raise
1418
1419 def makefile(self, tarinfo, targetpath):
1420 """Make a file called targetpath.
1421 """
1422 source = self.extractfile(tarinfo)
1423 target = file(targetpath, "wb")
1424 copyfileobj(source, target)
1425 source.close()
1426 target.close()
1427
1428 def makeunknown(self, tarinfo, targetpath):
1429 """Make a file from a TarInfo object with an unknown type
1430 at targetpath.
1431 """
1432 self.makefile(tarinfo, targetpath)
1433 self._dbg(1, "tarfile: Unknown file type %r, " \
1434 "extracted as regular file." % tarinfo.type)
1435
1436 def makefifo(self, tarinfo, targetpath):
1437 """Make a fifo called targetpath.
1438 """
1439 if hasattr(os, "mkfifo"):
1440 os.mkfifo(targetpath)
1441 else:
1442 raise ExtractError, "fifo not supported by system"
1443
1444 def makedev(self, tarinfo, targetpath):
1445 """Make a character or block device called targetpath.
1446 """
1447 if not hasattr(os, "mknod") or not hasattr(os, "makedev"):
1448 raise ExtractError, "special devices not supported by system"
1449
1450 mode = tarinfo.mode
1451 if tarinfo.isblk():
1452 mode |= stat.S_IFBLK
1453 else:
1454 mode |= stat.S_IFCHR
1455
1456 os.mknod(targetpath, mode,
1457 os.makedev(tarinfo.devmajor, tarinfo.devminor))
1458
1459 def makelink(self, tarinfo, targetpath):
1460 """Make a (symbolic) link called targetpath. If it cannot be created
1461 (platform limitation), we try to make a copy of the referenced file
1462 instead of a link.
1463 """
1464 linkpath = tarinfo.linkname
1465 try:
1466 if tarinfo.issym():
1467 os.symlink(linkpath, targetpath)
1468 else:
1469 os.link(linkpath, targetpath)
1470 except AttributeError:
1471 if tarinfo.issym():
1472 linkpath = os.path.join(os.path.dirname(tarinfo.name),
1473 linkpath)
1474 linkpath = normpath(linkpath)
1475
1476 try:
1477 self._extract_member(self.getmember(linkpath), targetpath)
1478 except (EnvironmentError, KeyError), e:
1479 linkpath = os.path.normpath(linkpath)
1480 try:
1481 shutil.copy2(linkpath, targetpath)
1482 except EnvironmentError, e:
1483 raise IOError, "link could not be created"
1484
1485 def chown(self, tarinfo, targetpath):
1486 """Set owner of targetpath according to tarinfo.
1487 """
1488 if pwd and hasattr(os, "geteuid") and os.geteuid() == 0:
1489 # We have to be root to do so.
1490 try:
1491 g = grp.getgrnam(tarinfo.gname)[2]
1492 except KeyError:
1493 try:
1494 g = grp.getgrgid(tarinfo.gid)[2]
1495 except KeyError:
1496 g = os.getgid()
1497 try:
1498 u = pwd.getpwnam(tarinfo.uname)[2]
1499 except KeyError:
1500 try:
1501 u = pwd.getpwuid(tarinfo.uid)[2]
1502 except KeyError:
1503 u = os.getuid()
1504 try:
1505 if tarinfo.issym() and hasattr(os, "lchown"):
1506 os.lchown(targetpath, u, g)
1507 else:
Andrew MacIntyre7970d202003-02-19 12:51:34 +00001508 if sys.platform != "os2emx":
1509 os.chown(targetpath, u, g)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001510 except EnvironmentError, e:
1511 raise ExtractError, "could not change owner"
1512
1513 def chmod(self, tarinfo, targetpath):
1514 """Set file permissions of targetpath according to tarinfo.
1515 """
1516 try:
1517 os.chmod(targetpath, tarinfo.mode)
1518 except EnvironmentError, e:
1519 raise ExtractError, "could not change mode"
1520
1521 def utime(self, tarinfo, targetpath):
1522 """Set modification time of targetpath according to tarinfo.
1523 """
1524 if sys.platform == "win32" and tarinfo.isdir():
1525 # According to msdn.microsoft.com, it is an error (EACCES)
1526 # to use utime() on directories.
1527 return
1528 try:
1529 os.utime(targetpath, (tarinfo.mtime, tarinfo.mtime))
1530 except EnvironmentError, e:
1531 raise ExtractError, "could not change modification time"
1532
1533 #--------------------------------------------------------------------------
1534
1535 def next(self):
1536 """Return the next member of the archive as a TarInfo object, when
1537 TarFile is opened for reading. Return None if there is no more
1538 available.
1539 """
1540 self._check("ra")
1541 if self.firstmember is not None:
1542 m = self.firstmember
1543 self.firstmember = None
1544 return m
1545
1546 # Read the next block.
1547 self.fileobj.seek(self.chunks[-1])
1548 while True:
1549 buf = self.fileobj.read(BLOCKSIZE)
1550 if not buf:
1551 return None
1552 try:
1553 tarinfo = TarInfo.frombuf(buf)
1554 except ValueError:
1555 if self.ignore_zeros:
1556 if buf.count(NUL) == BLOCKSIZE:
1557 adj = "empty"
1558 else:
1559 adj = "invalid"
1560 self._dbg(2, "0x%X: %s block" % (self.offset, adj))
1561 self.offset += BLOCKSIZE
1562 continue
1563 else:
1564 # Block is empty or unreadable.
1565 if self.chunks[-1] == 0:
1566 # If the first block is invalid. That does not
1567 # look like a tar archive we can handle.
1568 raise ReadError,"empty, unreadable or compressed file"
1569 return None
1570 break
1571
1572 # We shouldn't rely on this checksum, because some tar programs
1573 # calculate it differently and it is merely validating the
1574 # header block. We could just as well skip this part, which would
1575 # have a slight effect on performance...
1576 if tarinfo.chksum != calc_chksum(buf):
1577 self._dbg(1, "tarfile: Bad Checksum %r" % tarinfo.name)
1578
1579 # Set the TarInfo object's offset to the current position of the
1580 # TarFile and set self.offset to the position where the data blocks
1581 # should begin.
1582 tarinfo.offset = self.offset
1583 self.offset += BLOCKSIZE
1584
1585 # Check if the TarInfo object has a typeflag for which a callback
1586 # method is registered in the TYPE_METH. If so, then call it.
1587 if tarinfo.type in self.TYPE_METH:
1588 tarinfo = self.TYPE_METH[tarinfo.type](self, tarinfo)
1589 else:
1590 tarinfo.offset_data = self.offset
1591 if tarinfo.isreg() or tarinfo.type not in SUPPORTED_TYPES:
1592 # Skip the following data blocks.
1593 self.offset += self._block(tarinfo.size)
1594
1595 if tarinfo.isreg() and tarinfo.name[:-1] == "/":
1596 # some old tar programs don't know DIRTYPE
1597 tarinfo.type = DIRTYPE
1598
1599 self.members.append(tarinfo)
1600 self.membernames.append(tarinfo.name)
1601 self.chunks.append(self.offset)
1602 return tarinfo
1603
1604 #--------------------------------------------------------------------------
1605 # Below are some methods which are called for special typeflags in the
1606 # next() method, e.g. for unwrapping GNU longname/longlink blocks. They
1607 # are registered in TYPE_METH below. You can register your own methods
1608 # with this mapping.
1609 # A registered method is called with a TarInfo object as only argument.
1610 #
1611 # During its execution the method MUST perform the following tasks:
1612 # 1. set tarinfo.offset_data to the position where the data blocks begin,
1613 # if there is data to follow.
1614 # 2. set self.offset to the position where the next member's header will
1615 # begin.
1616 # 3. return a valid TarInfo object.
1617
1618 def proc_gnulong(self, tarinfo):
1619 """Evaluate the blocks that hold a GNU longname
1620 or longlink member.
1621 """
1622 buf = ""
1623 name = None
1624 linkname = None
1625 count = tarinfo.size
1626 while count > 0:
1627 block = self.fileobj.read(BLOCKSIZE)
1628 buf += block
1629 self.offset += BLOCKSIZE
1630 count -= BLOCKSIZE
1631
1632 if tarinfo.type == GNUTYPE_LONGNAME:
1633 name = nts(buf)
1634 if tarinfo.type == GNUTYPE_LONGLINK:
1635 linkname = nts(buf)
1636
1637 buf = self.fileobj.read(BLOCKSIZE)
1638
1639 tarinfo = TarInfo.frombuf(buf)
1640 tarinfo.offset = self.offset
1641 self.offset += BLOCKSIZE
1642 tarinfo.offset_data = self.offset
1643 tarinfo.name = name or tarinfo.name
1644 tarinfo.linkname = linkname or tarinfo.linkname
1645
1646 if tarinfo.isreg() or tarinfo.type not in SUPPORTED_TYPES:
1647 # Skip the following data blocks.
1648 self.offset += self._block(tarinfo.size)
1649 return tarinfo
1650
1651 def proc_sparse(self, tarinfo):
1652 """Analyze a GNU sparse header plus extra headers.
1653 """
1654 buf = tarinfo.tobuf()
1655 sp = _ringbuffer()
1656 pos = 386
1657 lastpos = 0L
1658 realpos = 0L
1659 # There are 4 possible sparse structs in the
1660 # first header.
1661 for i in xrange(4):
1662 try:
1663 offset = int(buf[pos:pos + 12], 8)
1664 numbytes = int(buf[pos + 12:pos + 24], 8)
1665 except ValueError:
1666 break
1667 if offset > lastpos:
1668 sp.append(_hole(lastpos, offset - lastpos))
1669 sp.append(_data(offset, numbytes, realpos))
1670 realpos += numbytes
1671 lastpos = offset + numbytes
1672 pos += 24
1673
1674 isextended = ord(buf[482])
1675 origsize = int(buf[483:495], 8)
1676
1677 # If the isextended flag is given,
1678 # there are extra headers to process.
1679 while isextended == 1:
1680 buf = self.fileobj.read(BLOCKSIZE)
1681 self.offset += BLOCKSIZE
1682 pos = 0
1683 for i in xrange(21):
1684 try:
1685 offset = int(buf[pos:pos + 12], 8)
1686 numbytes = int(buf[pos + 12:pos + 24], 8)
1687 except ValueError:
1688 break
1689 if offset > lastpos:
1690 sp.append(_hole(lastpos, offset - lastpos))
1691 sp.append(_data(offset, numbytes, realpos))
1692 realpos += numbytes
1693 lastpos = offset + numbytes
1694 pos += 24
1695 isextended = ord(buf[504])
1696
1697 if lastpos < origsize:
1698 sp.append(_hole(lastpos, origsize - lastpos))
1699
1700 tarinfo.sparse = sp
1701
1702 tarinfo.offset_data = self.offset
1703 self.offset += self._block(tarinfo.size)
1704 tarinfo.size = origsize
1705 return tarinfo
1706
1707 # The type mapping for the next() method. The keys are single character
1708 # strings, the typeflag. The values are methods which are called when
1709 # next() encounters such a typeflag.
1710 TYPE_METH = {
1711 GNUTYPE_LONGNAME: proc_gnulong,
1712 GNUTYPE_LONGLINK: proc_gnulong,
1713 GNUTYPE_SPARSE: proc_sparse
1714 }
1715
1716 #--------------------------------------------------------------------------
1717 # Little helper methods:
1718
1719 def _block(self, count):
1720 """Round up a byte count by BLOCKSIZE and return it,
1721 e.g. _block(834) => 1024.
1722 """
1723 blocks, remainder = divmod(count, BLOCKSIZE)
1724 if remainder:
1725 blocks += 1
1726 return blocks * BLOCKSIZE
1727
1728 def _getmember(self, name, tarinfo=None):
1729 """Find an archive member by name from bottom to top.
1730 If tarinfo is given, it is used as the starting point.
1731 """
1732 if tarinfo is None:
1733 end = len(self.members)
1734 else:
1735 end = self.members.index(tarinfo)
1736
1737 for i in xrange(end - 1, -1, -1):
1738 if name == self.membernames[i]:
1739 return self.members[i]
1740
1741 def _load(self):
1742 """Read through the entire archive file and look for readable
1743 members.
1744 """
1745 while True:
1746 tarinfo = self.next()
1747 if tarinfo is None:
1748 break
1749 self._loaded = True
1750
1751 def _check(self, mode=None):
1752 """Check if TarFile is still open, and if the operation's mode
1753 corresponds to TarFile's mode.
1754 """
1755 if self.closed:
1756 raise IOError, "%s is closed" % self.__class__.__name__
1757 if mode is not None and self._mode not in mode:
1758 raise IOError, "bad operation for mode %r" % self._mode
1759
1760 def __iter__(self):
1761 """Provide an iterator object.
1762 """
1763 if self._loaded:
1764 return iter(self.members)
1765 else:
1766 return TarIter(self)
1767
1768 def _create_gnulong(self, name, type):
1769 """Write a GNU longname/longlink member to the TarFile.
1770 It consists of an extended tar header, with the length
1771 of the longname as size, followed by data blocks,
1772 which contain the longname as a null terminated string.
1773 """
1774 tarinfo = TarInfo()
1775 tarinfo.name = "././@LongLink"
1776 tarinfo.type = type
1777 tarinfo.mode = 0
1778 tarinfo.size = len(name)
1779
1780 # write extended header
1781 self.fileobj.write(tarinfo.tobuf())
1782 # write name blocks
1783 self.fileobj.write(name)
1784 blocks, remainder = divmod(tarinfo.size, BLOCKSIZE)
1785 if remainder > 0:
1786 self.fileobj.write(NUL * (BLOCKSIZE - remainder))
1787 blocks += 1
1788 self.offset += blocks * BLOCKSIZE
1789
1790 def _dbg(self, level, msg):
1791 """Write debugging output to sys.stderr.
1792 """
1793 if level <= self.debug:
1794 print >> sys.stderr, msg
1795# class TarFile
1796
1797class TarIter:
1798 """Iterator Class.
1799
1800 for tarinfo in TarFile(...):
1801 suite...
1802 """
1803
1804 def __init__(self, tarfile):
1805 """Construct a TarIter object.
1806 """
1807 self.tarfile = tarfile
1808 def __iter__(self):
1809 """Return iterator object.
1810 """
1811 return self
1812 def next(self):
1813 """Return the next item using TarFile's next() method.
1814 When all members have been read, set TarFile as _loaded.
1815 """
1816 tarinfo = self.tarfile.next()
1817 if not tarinfo:
1818 self.tarfile._loaded = True
1819 raise StopIteration
1820 return tarinfo
1821
1822# Helper classes for sparse file support
1823class _section:
1824 """Base class for _data and _hole.
1825 """
1826 def __init__(self, offset, size):
1827 self.offset = offset
1828 self.size = size
1829 def __contains__(self, offset):
1830 return self.offset <= offset < self.offset + self.size
1831
1832class _data(_section):
1833 """Represent a data section in a sparse file.
1834 """
1835 def __init__(self, offset, size, realpos):
1836 _section.__init__(self, offset, size)
1837 self.realpos = realpos
1838
1839class _hole(_section):
1840 """Represent a hole section in a sparse file.
1841 """
1842 pass
1843
1844class _ringbuffer(list):
1845 """Ringbuffer class which increases performance
1846 over a regular list.
1847 """
1848 def __init__(self):
1849 self.idx = 0
1850 def find(self, offset):
1851 idx = self.idx
1852 while True:
1853 item = self[idx]
1854 if offset in item:
1855 break
1856 idx += 1
1857 if idx == len(self):
1858 idx = 0
1859 if idx == self.idx:
1860 # End of File
1861 return None
1862 self.idx = idx
1863 return item
1864
1865#---------------------------------------------
1866# zipfile compatible TarFile class
1867#---------------------------------------------
1868TAR_PLAIN = 0 # zipfile.ZIP_STORED
1869TAR_GZIPPED = 8 # zipfile.ZIP_DEFLATED
1870class TarFileCompat:
1871 """TarFile class compatible with standard module zipfile's
1872 ZipFile class.
1873 """
1874 def __init__(self, file, mode="r", compression=TAR_PLAIN):
1875 if compression == TAR_PLAIN:
1876 self.tarfile = TarFile.taropen(file, mode)
1877 elif compression == TAR_GZIPPED:
1878 self.tarfile = TarFile.gzopen(file, mode)
1879 else:
1880 raise ValueError, "unknown compression constant"
1881 if mode[0:1] == "r":
1882 members = self.tarfile.getmembers()
1883 for i in xrange(len(members)):
1884 m = members[i]
1885 m.filename = m.name
1886 m.file_size = m.size
1887 m.date_time = time.gmtime(m.mtime)[:6]
1888 def namelist(self):
1889 return map(lambda m: m.name, self.infolist())
1890 def infolist(self):
1891 return filter(lambda m: m.type in REGULAR_TYPES,
1892 self.tarfile.getmembers())
1893 def printdir(self):
1894 self.tarfile.list()
1895 def testzip(self):
1896 return
1897 def getinfo(self, name):
1898 return self.tarfile.getmember(name)
1899 def read(self, name):
1900 return self.tarfile.extractfile(self.tarfile.getmember(name)).read()
1901 def write(self, filename, arcname=None, compress_type=None):
1902 self.tarfile.add(filename, arcname)
1903 def writestr(self, zinfo, bytes):
1904 import StringIO
1905 import calendar
1906 zinfo.name = zinfo.filename
1907 zinfo.size = zinfo.file_size
1908 zinfo.mtime = calendar.timegm(zinfo.date_time)
1909 self.tarfile.addfile(zinfo, StringIO.StringIO(bytes))
1910 def close(self):
1911 self.tarfile.close()
1912#class TarFileCompat
1913
1914#--------------------
1915# exported functions
1916#--------------------
1917def is_tarfile(name):
1918 """Return True if name points to a tar archive that we
1919 are able to handle, else return False.
1920 """
1921 try:
1922 t = open(name)
1923 t.close()
1924 return True
1925 except TarError:
1926 return False
1927
1928open = TarFile.open