blob: 6f44146920d080d342bd2e9edb3dc0eb01298131 [file] [log] [blame]
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001#!/usr/bin/env python
2# -*- coding: iso-8859-1 -*-
3#-------------------------------------------------------------------
4# tarfile.py
5#-------------------------------------------------------------------
6# Copyright (C) 2002 Lars Gustäbel <lars@gustaebel.de>
7# All rights reserved.
8#
9# Permission is hereby granted, free of charge, to any person
10# obtaining a copy of this software and associated documentation
11# files (the "Software"), to deal in the Software without
12# restriction, including without limitation the rights to use,
13# copy, modify, merge, publish, distribute, sublicense, and/or sell
14# copies of the Software, and to permit persons to whom the
15# Software is furnished to do so, subject to the following
16# conditions:
17#
18# The above copyright notice and this permission notice shall be
19# included in all copies or substantial portions of the Software.
20#
21# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
22# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
23# OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
24# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
25# HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
26# WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
27# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
28# OTHER DEALINGS IN THE SOFTWARE.
29#
30"""Read from and write to tar format archives.
31"""
32
33__version__ = "$Revision$"
34# $Source$
35
36version = "0.6.4"
37__author__ = "Lars Gustäbel (lars@gustaebel.de)"
38__date__ = "$Date$"
39__cvsid__ = "$Id$"
40__credits__ = "Gustavo Niemeyer, Niels Gustäbel, Richard Townsend."
41
42#---------
43# Imports
44#---------
45import sys
46import os
47import shutil
48import stat
49import errno
50import time
51import struct
52
Jack Jansencfc49022003-03-07 13:37:32 +000053if sys.platform == 'mac':
54 # This module needs work for MacOS9, especially in the area of pathname
55 # handling. In many places it is assumed a simple substitution of / by the
56 # local os.path.sep is good enough to convert pathnames, but this does not
57 # work with the mac rooted:path:name versus :nonrooted:path:name syntax
58 raise ImportError, "tarfile does not work for platform==mac"
59
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +000060try:
61 import grp, pwd
62except ImportError:
63 grp = pwd = None
64
65# from tarfile import *
66__all__ = ["TarFile", "TarInfo", "is_tarfile", "TarError"]
67
68#---------------------------------------------------------
69# tar constants
70#---------------------------------------------------------
71NUL = "\0" # the null character
72BLOCKSIZE = 512 # length of processing blocks
73RECORDSIZE = BLOCKSIZE * 20 # length of records
74MAGIC = "ustar" # magic tar string
75VERSION = "00" # version number
76
77LENGTH_NAME = 100 # maximum length of a filename
78LENGTH_LINK = 100 # maximum length of a linkname
79LENGTH_PREFIX = 155 # maximum length of the prefix field
80MAXSIZE_MEMBER = 077777777777L # maximum size of a file (11 octal digits)
81
82REGTYPE = "0" # regular file
83AREGTYPE = "\0" # regular file
84LNKTYPE = "1" # link (inside tarfile)
85SYMTYPE = "2" # symbolic link
86CHRTYPE = "3" # character special device
87BLKTYPE = "4" # block special device
88DIRTYPE = "5" # directory
89FIFOTYPE = "6" # fifo special device
90CONTTYPE = "7" # contiguous file
91
92GNUTYPE_LONGNAME = "L" # GNU tar extension for longnames
93GNUTYPE_LONGLINK = "K" # GNU tar extension for longlink
94GNUTYPE_SPARSE = "S" # GNU tar extension for sparse file
95
96#---------------------------------------------------------
97# tarfile constants
98#---------------------------------------------------------
99SUPPORTED_TYPES = (REGTYPE, AREGTYPE, LNKTYPE, # file types that tarfile
100 SYMTYPE, DIRTYPE, FIFOTYPE, # can cope with.
101 CONTTYPE, CHRTYPE, BLKTYPE,
102 GNUTYPE_LONGNAME, GNUTYPE_LONGLINK,
103 GNUTYPE_SPARSE)
104
105REGULAR_TYPES = (REGTYPE, AREGTYPE, # file types that somehow
106 CONTTYPE, GNUTYPE_SPARSE) # represent regular files
107
108#---------------------------------------------------------
109# Bits used in the mode field, values in octal.
110#---------------------------------------------------------
111S_IFLNK = 0120000 # symbolic link
112S_IFREG = 0100000 # regular file
113S_IFBLK = 0060000 # block device
114S_IFDIR = 0040000 # directory
115S_IFCHR = 0020000 # character device
116S_IFIFO = 0010000 # fifo
117
118TSUID = 04000 # set UID on execution
119TSGID = 02000 # set GID on execution
120TSVTX = 01000 # reserved
121
122TUREAD = 0400 # read by owner
123TUWRITE = 0200 # write by owner
124TUEXEC = 0100 # execute/search by owner
125TGREAD = 0040 # read by group
126TGWRITE = 0020 # write by group
127TGEXEC = 0010 # execute/search by group
128TOREAD = 0004 # read by other
129TOWRITE = 0002 # write by other
130TOEXEC = 0001 # execute/search by other
131
132#---------------------------------------------------------
133# Some useful functions
134#---------------------------------------------------------
135def nts(s):
136 """Convert a null-terminated string buffer to a python string.
137 """
138 return s.split(NUL, 1)[0]
139
140def calc_chksum(buf):
141 """Calculate the checksum for a member's header. It's a simple addition
142 of all bytes, treating the chksum field as if filled with spaces.
143 buf is a 512 byte long string buffer which holds the header.
144 """
145 chk = 256 # chksum field is treated as blanks,
146 # so the initial value is 8 * ord(" ")
147 for c in buf[:148]: chk += ord(c) # sum up all bytes before chksum
148 for c in buf[156:]: chk += ord(c) # sum up all bytes after chksum
149 return chk
150
151def copyfileobj(src, dst, length=None):
152 """Copy length bytes from fileobj src to fileobj dst.
153 If length is None, copy the entire content.
154 """
155 if length == 0:
156 return
157 if length is None:
158 shutil.copyfileobj(src, dst)
159 return
160
161 BUFSIZE = 16 * 1024
162 blocks, remainder = divmod(length, BUFSIZE)
163 for b in xrange(blocks):
164 buf = src.read(BUFSIZE)
165 if len(buf) < BUFSIZE:
166 raise IOError, "end of file reached"
167 dst.write(buf)
168
169 if remainder != 0:
170 buf = src.read(remainder)
171 if len(buf) < remainder:
172 raise IOError, "end of file reached"
173 dst.write(buf)
174 return
175
176filemode_table = (
177 (S_IFLNK, "l",
178 S_IFREG, "-",
179 S_IFBLK, "b",
180 S_IFDIR, "d",
181 S_IFCHR, "c",
182 S_IFIFO, "p"),
183 (TUREAD, "r"),
184 (TUWRITE, "w"),
185 (TUEXEC, "x", TSUID, "S", TUEXEC|TSUID, "s"),
186 (TGREAD, "r"),
187 (TGWRITE, "w"),
188 (TGEXEC, "x", TSGID, "S", TGEXEC|TSGID, "s"),
189 (TOREAD, "r"),
190 (TOWRITE, "w"),
191 (TOEXEC, "x", TSVTX, "T", TOEXEC|TSVTX, "t"))
192
193def filemode(mode):
194 """Convert a file's mode to a string of the form
195 -rwxrwxrwx.
196 Used by TarFile.list()
197 """
198 s = ""
199 for t in filemode_table:
200 while True:
201 if mode & t[0] == t[0]:
202 s += t[1]
203 elif len(t) > 2:
204 t = t[2:]
205 continue
206 else:
207 s += "-"
208 break
209 return s
210
211if os.sep != "/":
212 normpath = lambda path: os.path.normpath(path).replace(os.sep, "/")
213else:
214 normpath = os.path.normpath
215
216class TarError(Exception):
217 """Base exception."""
218 pass
219class ExtractError(TarError):
220 """General exception for extract errors."""
221 pass
222class ReadError(TarError):
223 """Exception for unreadble tar archives."""
224 pass
225class CompressionError(TarError):
226 """Exception for unavailable compression methods."""
227 pass
228class StreamError(TarError):
229 """Exception for unsupported operations on stream-like TarFiles."""
230 pass
231
232#---------------------------
233# internal stream interface
234#---------------------------
235class _LowLevelFile:
236 """Low-level file object. Supports reading and writing.
237 It is used instead of a regular file object for streaming
238 access.
239 """
240
241 def __init__(self, name, mode):
242 mode = {
243 "r": os.O_RDONLY,
244 "w": os.O_WRONLY | os.O_CREAT | os.O_TRUNC,
245 }[mode]
246 if hasattr(os, "O_BINARY"):
247 mode |= os.O_BINARY
248 self.fd = os.open(name, mode)
249
250 def close(self):
251 os.close(self.fd)
252
253 def read(self, size):
254 return os.read(self.fd, size)
255
256 def write(self, s):
257 os.write(self.fd, s)
258
259class _Stream:
260 """Class that serves as an adapter between TarFile and
261 a stream-like object. The stream-like object only
262 needs to have a read() or write() method and is accessed
263 blockwise. Use of gzip or bzip2 compression is possible.
264 A stream-like object could be for example: sys.stdin,
265 sys.stdout, a socket, a tape device etc.
266
267 _Stream is intended to be used only internally.
268 """
269
270 def __init__(self, name, mode, type, fileobj, bufsize):
271 """Construct a _Stream object.
272 """
273 self._extfileobj = True
274 if fileobj is None:
275 fileobj = _LowLevelFile(name, mode)
276 self._extfileobj = False
277
278 self.name = name or ""
279 self.mode = mode
280 self.type = type
281 self.fileobj = fileobj
282 self.bufsize = bufsize
283 self.buf = ""
284 self.pos = 0L
285 self.closed = False
286
287 if type == "gz":
288 try:
289 import zlib
290 except ImportError:
291 raise CompressionError, "zlib module is not available"
292 self.zlib = zlib
293 self.crc = zlib.crc32("")
294 if mode == "r":
295 self._init_read_gz()
296 else:
297 self._init_write_gz()
298
299 if type == "bz2":
300 try:
301 import bz2
302 except ImportError:
303 raise CompressionError, "bz2 module is not available"
304 if mode == "r":
305 self.dbuf = ""
306 self.cmp = bz2.BZ2Decompressor()
307 else:
308 self.cmp = bz2.BZ2Compressor()
309
310 def __del__(self):
311 if not self.closed:
312 self.close()
313
314 def _init_write_gz(self):
315 """Initialize for writing with gzip compression.
316 """
317 self.cmp = self.zlib.compressobj(9, self.zlib.DEFLATED,
318 -self.zlib.MAX_WBITS,
319 self.zlib.DEF_MEM_LEVEL,
320 0)
321 timestamp = struct.pack("<L", long(time.time()))
322 self.__write("\037\213\010\010%s\002\377" % timestamp)
323 if self.name.endswith(".gz"):
324 self.name = self.name[:-3]
325 self.__write(self.name + NUL)
326
327 def write(self, s):
328 """Write string s to the stream.
329 """
330 if self.type == "gz":
331 self.crc = self.zlib.crc32(s, self.crc)
332 self.pos += len(s)
333 if self.type != "tar":
334 s = self.cmp.compress(s)
335 self.__write(s)
336
337 def __write(self, s):
338 """Write string s to the stream if a whole new block
339 is ready to be written.
340 """
341 self.buf += s
342 while len(self.buf) > self.bufsize:
343 self.fileobj.write(self.buf[:self.bufsize])
344 self.buf = self.buf[self.bufsize:]
345
346 def close(self):
347 """Close the _Stream object. No operation should be
348 done on it afterwards.
349 """
350 if self.closed:
351 return
352
353 if self.mode == "w" and self.buf:
354 if self.type != "tar":
355 self.buf += self.cmp.flush()
Andrew M. Kuchling6e4f7a82004-01-02 15:44:29 +0000356 self.__write("") # Write remaining blocks to output
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000357 self.fileobj.write(self.buf)
358 self.buf = ""
359 if self.type == "gz":
360 self.fileobj.write(struct.pack("<l", self.crc))
Andrew M. Kuchling10a44492003-10-24 17:38:34 +0000361 self.fileobj.write(struct.pack("<L", self.pos & 0xffffFFFFL))
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000362
363 if not self._extfileobj:
364 self.fileobj.close()
365
366 self.closed = True
367
368 def _init_read_gz(self):
369 """Initialize for reading a gzip compressed fileobj.
370 """
371 self.cmp = self.zlib.decompressobj(-self.zlib.MAX_WBITS)
372 self.dbuf = ""
373
374 # taken from gzip.GzipFile with some alterations
375 if self.__read(2) != "\037\213":
376 raise ReadError, "not a gzip file"
377 if self.__read(1) != "\010":
378 raise CompressionError, "unsupported compression method"
379
380 flag = ord(self.__read(1))
381 self.__read(6)
382
383 if flag & 4:
384 xlen = ord(self.__read(1)) + 256 * ord(self.__read(1))
385 self.read(xlen)
386 if flag & 8:
387 while True:
388 s = self.__read(1)
389 if not s or s == NUL:
390 break
391 if flag & 16:
392 while True:
393 s = self.__read(1)
394 if not s or s == NUL:
395 break
396 if flag & 2:
397 self.__read(2)
398
399 def tell(self):
400 """Return the stream's file pointer position.
401 """
402 return self.pos
403
404 def seek(self, pos=0):
405 """Set the stream's file pointer to pos. Negative seeking
406 is forbidden.
407 """
408 if pos - self.pos >= 0:
409 blocks, remainder = divmod(pos - self.pos, self.bufsize)
410 for i in xrange(blocks):
411 self.read(self.bufsize)
412 self.read(remainder)
413 else:
414 raise StreamError, "seeking backwards is not allowed"
415 return self.pos
416
417 def read(self, size=None):
418 """Return the next size number of bytes from the stream.
419 If size is not defined, return all bytes of the stream
420 up to EOF.
421 """
422 if size is None:
423 t = []
424 while True:
425 buf = self._read(self.bufsize)
426 if not buf:
427 break
428 t.append(buf)
429 buf = "".join(t)
430 else:
431 buf = self._read(size)
432 self.pos += len(buf)
433 return buf
434
435 def _read(self, size):
436 """Return size bytes from the stream.
437 """
438 if self.type == "tar":
439 return self.__read(size)
440
441 c = len(self.dbuf)
442 t = [self.dbuf]
443 while c < size:
444 buf = self.__read(self.bufsize)
445 if not buf:
446 break
447 buf = self.cmp.decompress(buf)
448 t.append(buf)
449 c += len(buf)
450 t = "".join(t)
451 self.dbuf = t[size:]
452 return t[:size]
453
454 def __read(self, size):
455 """Return size bytes from stream. If internal buffer is empty,
456 read another block from the stream.
457 """
458 c = len(self.buf)
459 t = [self.buf]
460 while c < size:
461 buf = self.fileobj.read(self.bufsize)
462 if not buf:
463 break
464 t.append(buf)
465 c += len(buf)
466 t = "".join(t)
467 self.buf = t[size:]
468 return t[:size]
469# class _Stream
470
471#------------------------
472# Extraction file object
473#------------------------
474class ExFileObject(object):
475 """File-like object for reading an archive member.
476 Is returned by TarFile.extractfile(). Support for
477 sparse files included.
478 """
479
480 def __init__(self, tarfile, tarinfo):
481 self.fileobj = tarfile.fileobj
482 self.name = tarinfo.name
483 self.mode = "r"
484 self.closed = False
485 self.offset = tarinfo.offset_data
486 self.size = tarinfo.size
487 self.pos = 0L
488 self.linebuffer = ""
489 if tarinfo.issparse():
490 self.sparse = tarinfo.sparse
491 self.read = self._readsparse
492 else:
493 self.read = self._readnormal
494
495 def __read(self, size):
496 """Overloadable read method.
497 """
498 return self.fileobj.read(size)
499
500 def readline(self, size=-1):
501 """Read a line with approx. size. If size is negative,
502 read a whole line. readline() and read() must not
503 be mixed up (!).
504 """
505 if size < 0:
506 size = sys.maxint
507
508 nl = self.linebuffer.find("\n")
509 if nl >= 0:
510 nl = min(nl, size)
511 else:
512 size -= len(self.linebuffer)
513 while nl < 0:
514 buf = self.read(min(size, 100))
515 if not buf:
516 break
517 self.linebuffer += buf
518 size -= len(buf)
519 if size <= 0:
520 break
521 nl = self.linebuffer.find("\n")
522 if nl == -1:
523 s = self.linebuffer
524 self.linebuffer = ""
525 return s
526 buf = self.linebuffer[:nl]
527 self.linebuffer = self.linebuffer[nl + 1:]
528 while buf[-1:] == "\r":
529 buf = buf[:-1]
530 return buf + "\n"
531
532 def readlines(self):
533 """Return a list with all (following) lines.
534 """
535 result = []
536 while True:
537 line = self.readline()
538 if not line: break
539 result.append(line)
540 return result
541
542 def _readnormal(self, size=None):
543 """Read operation for regular files.
544 """
545 if self.closed:
546 raise ValueError, "file is closed"
547 self.fileobj.seek(self.offset + self.pos)
548 bytesleft = self.size - self.pos
549 if size is None:
550 bytestoread = bytesleft
551 else:
552 bytestoread = min(size, bytesleft)
553 self.pos += bytestoread
554 return self.__read(bytestoread)
555
556 def _readsparse(self, size=None):
557 """Read operation for sparse files.
558 """
559 if self.closed:
560 raise ValueError, "file is closed"
561
562 if size is None:
563 size = self.size - self.pos
564
565 data = []
566 while size > 0:
567 buf = self._readsparsesection(size)
568 if not buf:
569 break
570 size -= len(buf)
571 data.append(buf)
572 return "".join(data)
573
574 def _readsparsesection(self, size):
575 """Read a single section of a sparse file.
576 """
577 section = self.sparse.find(self.pos)
578
579 if section is None:
580 return ""
581
582 toread = min(size, section.offset + section.size - self.pos)
583 if isinstance(section, _data):
584 realpos = section.realpos + self.pos - section.offset
585 self.pos += toread
586 self.fileobj.seek(self.offset + realpos)
587 return self.__read(toread)
588 else:
589 self.pos += toread
590 return NUL * toread
591
592 def tell(self):
593 """Return the current file position.
594 """
595 return self.pos
596
597 def seek(self, pos, whence=0):
598 """Seek to a position in the file.
599 """
600 self.linebuffer = ""
601 if whence == 0:
602 self.pos = min(max(pos, 0), self.size)
603 if whence == 1:
604 if pos < 0:
605 self.pos = max(self.pos + pos, 0)
606 else:
607 self.pos = min(self.pos + pos, self.size)
608 if whence == 2:
609 self.pos = max(min(self.size + pos, self.size), 0)
610
611 def close(self):
612 """Close the file object.
613 """
614 self.closed = True
615#class ExFileObject
616
617#------------------
618# Exported Classes
619#------------------
620class TarInfo(object):
621 """Informational class which holds the details about an
622 archive member given by a tar header block.
623 TarInfo objects are returned by TarFile.getmember(),
624 TarFile.getmembers() and TarFile.gettarinfo() and are
625 usually created internally.
626 """
627
628 def __init__(self, name=""):
629 """Construct a TarInfo object. name is the optional name
630 of the member.
631 """
632
633 self.name = name # member name (dirnames must end with '/')
634 self.mode = 0666 # file permissions
635 self.uid = 0 # user id
636 self.gid = 0 # group id
637 self.size = 0 # file size
638 self.mtime = 0 # modification time
639 self.chksum = 0 # header checksum
640 self.type = REGTYPE # member type
641 self.linkname = "" # link name
642 self.uname = "user" # user name
643 self.gname = "group" # group name
644 self.devmajor = 0 #-
645 self.devminor = 0 #-for use with CHRTYPE and BLKTYPE
646 self.prefix = "" # prefix to filename or holding information
647 # about sparse files
648
649 self.offset = 0 # the tar header starts here
650 self.offset_data = 0 # the file's data starts here
651
652 def __repr__(self):
653 return "<%s %r at %#x>" % (self.__class__.__name__,self.name,id(self))
654
655 def frombuf(cls, buf):
656 """Construct a TarInfo object from a 512 byte string buffer.
657 """
658 tarinfo = cls()
659 tarinfo.name = nts(buf[0:100])
660 tarinfo.mode = int(buf[100:108], 8)
661 tarinfo.uid = int(buf[108:116],8)
662 tarinfo.gid = int(buf[116:124],8)
663 tarinfo.size = long(buf[124:136], 8)
664 tarinfo.mtime = long(buf[136:148], 8)
665 tarinfo.chksum = int(buf[148:156], 8)
666 tarinfo.type = buf[156:157]
667 tarinfo.linkname = nts(buf[157:257])
668 tarinfo.uname = nts(buf[265:297])
669 tarinfo.gname = nts(buf[297:329])
670 try:
671 tarinfo.devmajor = int(buf[329:337], 8)
672 tarinfo.devminor = int(buf[337:345], 8)
673 except ValueError:
674 tarinfo.devmajor = tarinfo.devmajor = 0
Jack Jansen2b4b5a52003-04-22 22:03:11 +0000675 tarinfo.prefix = buf[345:500]
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000676
677 # The prefix field is used for filenames > 100 in
678 # the POSIX standard.
Jack Jansen2b4b5a52003-04-22 22:03:11 +0000679 # name = prefix + '/' + name
680 if tarinfo.type != GNUTYPE_SPARSE:
681 tarinfo.name = normpath(os.path.join(nts(tarinfo.prefix), tarinfo.name))
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000682
683 # Directory names should have a '/' at the end.
684 if tarinfo.isdir() and tarinfo.name[-1:] != "/":
685 tarinfo.name += "/"
686 return tarinfo
687
688 frombuf = classmethod(frombuf)
689
690 def tobuf(self):
691 """Return a tar header block as a 512 byte string.
692 """
693 name = self.name
694
695 # The following code was contributed by Detlef Lannert.
696 parts = []
697 for value, fieldsize in (
698 (name, 100),
699 ("%07o" % (self.mode & 07777), 8),
700 ("%07o" % self.uid, 8),
701 ("%07o" % self.gid, 8),
702 ("%011o" % self.size, 12),
703 ("%011o" % self.mtime, 12),
704 (" ", 8),
705 (self.type, 1),
706 (self.linkname, 100),
707 (MAGIC, 6),
708 (VERSION, 2),
709 (self.uname, 32),
710 (self.gname, 32),
711 ("%07o" % self.devmajor, 8),
712 ("%07o" % self.devminor, 8),
713 (self.prefix, 155)
714 ):
715 l = len(value)
716 parts.append(value + (fieldsize - l) * NUL)
717
718 buf = "".join(parts)
719 chksum = calc_chksum(buf)
720 buf = buf[:148] + "%06o\0" % chksum + buf[155:]
721 buf += (BLOCKSIZE - len(buf)) * NUL
722 self.buf = buf
723 return buf
724
725 def isreg(self):
726 return self.type in REGULAR_TYPES
727 def isfile(self):
728 return self.isreg()
729 def isdir(self):
730 return self.type == DIRTYPE
731 def issym(self):
732 return self.type == SYMTYPE
733 def islnk(self):
734 return self.type == LNKTYPE
735 def ischr(self):
736 return self.type == CHRTYPE
737 def isblk(self):
738 return self.type == BLKTYPE
739 def isfifo(self):
740 return self.type == FIFOTYPE
741 def issparse(self):
742 return self.type == GNUTYPE_SPARSE
743 def isdev(self):
744 return self.type in (CHRTYPE, BLKTYPE, FIFOTYPE)
745# class TarInfo
746
747class TarFile(object):
748 """The TarFile Class provides an interface to tar archives.
749 """
750
751 debug = 0 # May be set from 0 (no msgs) to 3 (all msgs)
752
753 dereference = False # If true, add content of linked file to the
754 # tar file, else the link.
755
756 ignore_zeros = False # If true, skips empty or invalid blocks and
757 # continues processing.
758
759 errorlevel = 0 # If 0, fatal errors only appear in debug
760 # messages (if debug >= 0). If > 0, errors
761 # are passed to the caller as exceptions.
762
763 posix = True # If True, generates POSIX.1-1990-compliant
764 # archives (no GNU extensions!)
765
766 fileobject = ExFileObject
767
768 def __init__(self, name=None, mode="r", fileobj=None):
769 """Open an (uncompressed) tar archive `name'. `mode' is either 'r' to
770 read from an existing archive, 'a' to append data to an existing
771 file or 'w' to create a new file overwriting an existing one. `mode'
772 defaults to 'r'.
773 If `fileobj' is given, it is used for reading or writing data. If it
774 can be determined, `mode' is overridden by `fileobj's mode.
775 `fileobj' is not closed, when TarFile is closed.
776 """
777 self.name = name
778
779 if len(mode) > 1 or mode not in "raw":
780 raise ValueError, "mode must be 'r', 'a' or 'w'"
781 self._mode = mode
782 self.mode = {"r": "rb", "a": "r+b", "w": "wb"}[mode]
783
784 if not fileobj:
785 fileobj = file(self.name, self.mode)
786 self._extfileobj = False
787 else:
788 if self.name is None and hasattr(fileobj, "name"):
789 self.name = fileobj.name
790 if hasattr(fileobj, "mode"):
791 self.mode = fileobj.mode
792 self._extfileobj = True
793 self.fileobj = fileobj
794
795 # Init datastructures
796 self.closed = False
797 self.members = [] # list of members as TarInfo objects
798 self.membernames = [] # names of members
799 self.chunks = [0] # chunk cache
800 self._loaded = False # flag if all members have been read
801 self.offset = 0L # current position in the archive file
802 self.inodes = {} # dictionary caching the inodes of
803 # archive members already added
804
805 if self._mode == "r":
806 self.firstmember = None
807 self.firstmember = self.next()
808
809 if self._mode == "a":
810 # Move to the end of the archive,
811 # before the first empty block.
812 self.firstmember = None
813 while True:
814 try:
815 tarinfo = self.next()
816 except ReadError:
817 self.fileobj.seek(0)
818 break
819 if tarinfo is None:
820 self.fileobj.seek(- BLOCKSIZE, 1)
821 break
822
823 if self._mode in "aw":
824 self._loaded = True
825
826 #--------------------------------------------------------------------------
827 # Below are the classmethods which act as alternate constructors to the
828 # TarFile class. The open() method is the only one that is needed for
829 # public use; it is the "super"-constructor and is able to select an
830 # adequate "sub"-constructor for a particular compression using the mapping
831 # from OPEN_METH.
832 #
833 # This concept allows one to subclass TarFile without losing the comfort of
834 # the super-constructor. A sub-constructor is registered and made available
835 # by adding it to the mapping in OPEN_METH.
836
837 def open(cls, name=None, mode="r", fileobj=None, bufsize=20*512):
838 """Open a tar archive for reading, writing or appending. Return
839 an appropriate TarFile class.
840
841 mode:
842 'r' open for reading with transparent compression
843 'r:' open for reading exclusively uncompressed
844 'r:gz' open for reading with gzip compression
845 'r:bz2' open for reading with bzip2 compression
846 'a' or 'a:' open for appending
847 'w' or 'w:' open for writing without compression
848 'w:gz' open for writing with gzip compression
849 'w:bz2' open for writing with bzip2 compression
850 'r|' open an uncompressed stream of tar blocks for reading
851 'r|gz' open a gzip compressed stream of tar blocks
852 'r|bz2' open a bzip2 compressed stream of tar blocks
853 'w|' open an uncompressed stream for writing
854 'w|gz' open a gzip compressed stream for writing
855 'w|bz2' open a bzip2 compressed stream for writing
856 """
857
858 if not name and not fileobj:
859 raise ValueError, "nothing to open"
860
861 if ":" in mode:
862 filemode, comptype = mode.split(":", 1)
863 filemode = filemode or "r"
864 comptype = comptype or "tar"
865
866 # Select the *open() function according to
867 # given compression.
868 if comptype in cls.OPEN_METH:
869 func = getattr(cls, cls.OPEN_METH[comptype])
870 else:
871 raise CompressionError, "unknown compression type %r" % comptype
872 return func(name, filemode, fileobj)
873
874 elif "|" in mode:
875 filemode, comptype = mode.split("|", 1)
876 filemode = filemode or "r"
877 comptype = comptype or "tar"
878
879 if filemode not in "rw":
880 raise ValueError, "mode must be 'r' or 'w'"
881
882 t = cls(name, filemode,
883 _Stream(name, filemode, comptype, fileobj, bufsize))
884 t._extfileobj = False
885 return t
886
887 elif mode == "r":
888 # Find out which *open() is appropriate for opening the file.
889 for comptype in cls.OPEN_METH:
890 func = getattr(cls, cls.OPEN_METH[comptype])
891 try:
892 return func(name, "r", fileobj)
893 except (ReadError, CompressionError):
894 continue
895 raise ReadError, "file could not be opened successfully"
896
897 elif mode in "aw":
898 return cls.taropen(name, mode, fileobj)
899
900 raise ValueError, "undiscernible mode"
901
902 open = classmethod(open)
903
904 def taropen(cls, name, mode="r", fileobj=None):
905 """Open uncompressed tar archive name for reading or writing.
906 """
907 if len(mode) > 1 or mode not in "raw":
908 raise ValueError, "mode must be 'r', 'a' or 'w'"
909 return cls(name, mode, fileobj)
910
911 taropen = classmethod(taropen)
912
913 def gzopen(cls, name, mode="r", fileobj=None, compresslevel=9):
914 """Open gzip compressed tar archive name for reading or writing.
915 Appending is not allowed.
916 """
917 if len(mode) > 1 or mode not in "rw":
918 raise ValueError, "mode must be 'r' or 'w'"
919
920 try:
921 import gzip
Neal Norwitz4ec68242003-04-11 03:05:56 +0000922 gzip.GzipFile
923 except (ImportError, AttributeError):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000924 raise CompressionError, "gzip module is not available"
925
926 pre, ext = os.path.splitext(name)
927 pre = os.path.basename(pre)
928 if ext == ".tgz":
929 ext = ".tar"
930 if ext == ".gz":
931 ext = ""
932 tarname = pre + ext
933
934 if fileobj is None:
935 fileobj = file(name, mode + "b")
936
937 if mode != "r":
938 name = tarname
939
940 try:
941 t = cls.taropen(tarname, mode,
942 gzip.GzipFile(name, mode, compresslevel, fileobj)
943 )
944 except IOError:
945 raise ReadError, "not a gzip file"
946 t._extfileobj = False
947 return t
948
949 gzopen = classmethod(gzopen)
950
951 def bz2open(cls, name, mode="r", fileobj=None, compresslevel=9):
952 """Open bzip2 compressed tar archive name for reading or writing.
953 Appending is not allowed.
954 """
955 if len(mode) > 1 or mode not in "rw":
956 raise ValueError, "mode must be 'r' or 'w'."
957
958 try:
959 import bz2
960 except ImportError:
961 raise CompressionError, "bz2 module is not available"
962
963 pre, ext = os.path.splitext(name)
964 pre = os.path.basename(pre)
965 if ext == ".tbz2":
966 ext = ".tar"
967 if ext == ".bz2":
968 ext = ""
969 tarname = pre + ext
970
971 if fileobj is not None:
972 raise ValueError, "no support for external file objects"
973
974 try:
975 t = cls.taropen(tarname, mode, bz2.BZ2File(name, mode, compresslevel=compresslevel))
976 except IOError:
977 raise ReadError, "not a bzip2 file"
978 t._extfileobj = False
979 return t
980
981 bz2open = classmethod(bz2open)
982
983 # All *open() methods are registered here.
984 OPEN_METH = {
985 "tar": "taropen", # uncompressed tar
986 "gz": "gzopen", # gzip compressed tar
987 "bz2": "bz2open" # bzip2 compressed tar
988 }
989
990 #--------------------------------------------------------------------------
991 # The public methods which TarFile provides:
992
993 def close(self):
994 """Close the TarFile. In write-mode, two finishing zero blocks are
995 appended to the archive.
996 """
997 if self.closed:
998 return
999
1000 if self._mode in "aw":
1001 self.fileobj.write(NUL * (BLOCKSIZE * 2))
1002 self.offset += (BLOCKSIZE * 2)
1003 # fill up the end with zero-blocks
1004 # (like option -b20 for tar does)
1005 blocks, remainder = divmod(self.offset, RECORDSIZE)
1006 if remainder > 0:
1007 self.fileobj.write(NUL * (RECORDSIZE - remainder))
1008
1009 if not self._extfileobj:
1010 self.fileobj.close()
1011 self.closed = True
1012
1013 def getmember(self, name):
1014 """Return a TarInfo object for member `name'. If `name' can not be
1015 found in the archive, KeyError is raised. If a member occurs more
1016 than once in the archive, its last occurence is assumed to be the
1017 most up-to-date version.
1018 """
1019 self._check()
1020 if name not in self.membernames and not self._loaded:
1021 self._load()
1022 if name not in self.membernames:
1023 raise KeyError, "filename %r not found" % name
1024 return self._getmember(name)
1025
1026 def getmembers(self):
1027 """Return the members of the archive as a list of TarInfo objects. The
1028 list has the same order as the members in the archive.
1029 """
1030 self._check()
1031 if not self._loaded: # if we want to obtain a list of
1032 self._load() # all members, we first have to
1033 # scan the whole archive.
1034 return self.members
1035
1036 def getnames(self):
1037 """Return the members of the archive as a list of their names. It has
1038 the same order as the list returned by getmembers().
1039 """
1040 self._check()
1041 if not self._loaded:
1042 self._load()
1043 return self.membernames
1044
1045 def gettarinfo(self, name=None, arcname=None, fileobj=None):
1046 """Create a TarInfo object for either the file `name' or the file
1047 object `fileobj' (using os.fstat on its file descriptor). You can
1048 modify some of the TarInfo's attributes before you add it using
1049 addfile(). If given, `arcname' specifies an alternative name for the
1050 file in the archive.
1051 """
1052 self._check("aw")
1053
1054 # When fileobj is given, replace name by
1055 # fileobj's real name.
1056 if fileobj is not None:
1057 name = fileobj.name
1058
1059 # Building the name of the member in the archive.
1060 # Backward slashes are converted to forward slashes,
1061 # Absolute paths are turned to relative paths.
1062 if arcname is None:
1063 arcname = name
1064 arcname = normpath(arcname)
1065 drv, arcname = os.path.splitdrive(arcname)
1066 while arcname[0:1] == "/":
1067 arcname = arcname[1:]
1068
1069 # Now, fill the TarInfo object with
1070 # information specific for the file.
1071 tarinfo = TarInfo()
1072
1073 # Use os.stat or os.lstat, depending on platform
1074 # and if symlinks shall be resolved.
1075 if fileobj is None:
1076 if hasattr(os, "lstat") and not self.dereference:
1077 statres = os.lstat(name)
1078 else:
1079 statres = os.stat(name)
1080 else:
1081 statres = os.fstat(fileobj.fileno())
1082 linkname = ""
1083
1084 stmd = statres.st_mode
1085 if stat.S_ISREG(stmd):
1086 inode = (statres.st_ino, statres.st_dev)
1087 if inode in self.inodes and not self.dereference:
1088 # Is it a hardlink to an already
1089 # archived file?
1090 type = LNKTYPE
1091 linkname = self.inodes[inode]
1092 else:
1093 # The inode is added only if its valid.
1094 # For win32 it is always 0.
1095 type = REGTYPE
1096 if inode[0]:
1097 self.inodes[inode] = arcname
1098 elif stat.S_ISDIR(stmd):
1099 type = DIRTYPE
1100 if arcname[-1:] != "/":
1101 arcname += "/"
1102 elif stat.S_ISFIFO(stmd):
1103 type = FIFOTYPE
1104 elif stat.S_ISLNK(stmd):
1105 type = SYMTYPE
1106 linkname = os.readlink(name)
1107 elif stat.S_ISCHR(stmd):
1108 type = CHRTYPE
1109 elif stat.S_ISBLK(stmd):
1110 type = BLKTYPE
1111 else:
1112 return None
1113
1114 # Fill the TarInfo object with all
1115 # information we can get.
1116 tarinfo.name = arcname
1117 tarinfo.mode = stmd
1118 tarinfo.uid = statres.st_uid
1119 tarinfo.gid = statres.st_gid
1120 tarinfo.size = statres.st_size
1121 tarinfo.mtime = statres.st_mtime
1122 tarinfo.type = type
1123 tarinfo.linkname = linkname
1124 if pwd:
1125 try:
1126 tarinfo.uname = pwd.getpwuid(tarinfo.uid)[0]
1127 except KeyError:
1128 pass
1129 if grp:
1130 try:
1131 tarinfo.gname = grp.getgrgid(tarinfo.gid)[0]
1132 except KeyError:
1133 pass
1134
1135 if type in (CHRTYPE, BLKTYPE):
1136 if hasattr(os, "major") and hasattr(os, "minor"):
1137 tarinfo.devmajor = os.major(statres.st_rdev)
1138 tarinfo.devminor = os.minor(statres.st_rdev)
1139 return tarinfo
1140
1141 def list(self, verbose=True):
1142 """Print a table of contents to sys.stdout. If `verbose' is False, only
1143 the names of the members are printed. If it is True, an `ls -l'-like
1144 output is produced.
1145 """
1146 self._check()
1147
1148 for tarinfo in self:
1149 if verbose:
1150 print filemode(tarinfo.mode),
1151 print "%s/%s" % (tarinfo.uname or tarinfo.uid,
1152 tarinfo.gname or tarinfo.gid),
1153 if tarinfo.ischr() or tarinfo.isblk():
1154 print "%10s" % ("%d,%d" \
1155 % (tarinfo.devmajor, tarinfo.devminor)),
1156 else:
1157 print "%10d" % tarinfo.size,
1158 print "%d-%02d-%02d %02d:%02d:%02d" \
1159 % time.localtime(tarinfo.mtime)[:6],
1160
1161 print tarinfo.name,
1162
1163 if verbose:
1164 if tarinfo.issym():
1165 print "->", tarinfo.linkname,
1166 if tarinfo.islnk():
1167 print "link to", tarinfo.linkname,
1168 print
1169
1170 def add(self, name, arcname=None, recursive=True):
1171 """Add the file `name' to the archive. `name' may be any type of file
1172 (directory, fifo, symbolic link, etc.). If given, `arcname'
1173 specifies an alternative name for the file in the archive.
1174 Directories are added recursively by default. This can be avoided by
1175 setting `recursive' to False.
1176 """
1177 self._check("aw")
1178
1179 if arcname is None:
1180 arcname = name
1181
1182 # Skip if somebody tries to archive the archive...
1183 if self.name is not None \
1184 and os.path.abspath(name) == os.path.abspath(self.name):
1185 self._dbg(2, "tarfile: Skipped %r" % name)
1186 return
1187
1188 # Special case: The user wants to add the current
1189 # working directory.
1190 if name == ".":
1191 if recursive:
1192 if arcname == ".":
1193 arcname = ""
1194 for f in os.listdir("."):
1195 self.add(f, os.path.join(arcname, f))
1196 return
1197
1198 self._dbg(1, name)
1199
1200 # Create a TarInfo object from the file.
1201 tarinfo = self.gettarinfo(name, arcname)
1202
1203 if tarinfo is None:
1204 self._dbg(1, "tarfile: Unsupported type %r" % name)
1205 return
1206
1207 # Append the tar header and data to the archive.
1208 if tarinfo.isreg():
1209 f = file(name, "rb")
1210 self.addfile(tarinfo, f)
1211 f.close()
1212
1213 if tarinfo.type in (LNKTYPE, SYMTYPE, FIFOTYPE, CHRTYPE, BLKTYPE):
1214 tarinfo.size = 0L
1215 self.addfile(tarinfo)
1216
1217 if tarinfo.isdir():
1218 self.addfile(tarinfo)
1219 if recursive:
1220 for f in os.listdir(name):
1221 self.add(os.path.join(name, f), os.path.join(arcname, f))
1222
1223 def addfile(self, tarinfo, fileobj=None):
1224 """Add the TarInfo object `tarinfo' to the archive. If `fileobj' is
1225 given, tarinfo.size bytes are read from it and added to the archive.
1226 You can create TarInfo objects using gettarinfo().
1227 On Windows platforms, `fileobj' should always be opened with mode
1228 'rb' to avoid irritation about the file size.
1229 """
1230 self._check("aw")
1231
1232 tarinfo.name = normpath(tarinfo.name)
1233 if tarinfo.isdir():
1234 # directories should end with '/'
1235 tarinfo.name += "/"
1236
1237 if tarinfo.linkname:
1238 tarinfo.linkname = normpath(tarinfo.linkname)
1239
1240 if tarinfo.size > MAXSIZE_MEMBER:
1241 raise ValueError, "file is too large (>8GB)"
1242
1243 if len(tarinfo.linkname) > LENGTH_LINK:
1244 if self.posix:
1245 raise ValueError, "linkname is too long (>%d)" \
1246 % (LENGTH_LINK)
1247 else:
1248 self._create_gnulong(tarinfo.linkname, GNUTYPE_LONGLINK)
1249 tarinfo.linkname = tarinfo.linkname[:LENGTH_LINK -1]
1250 self._dbg(2, "tarfile: Created GNU tar extension LONGLINK")
1251
1252 if len(tarinfo.name) > LENGTH_NAME:
1253 if self.posix:
1254 prefix = tarinfo.name[:LENGTH_PREFIX + 1]
1255 while prefix and prefix[-1] != "/":
Tim Peters2c60f7a2003-01-29 03:49:43 +00001256 prefix = prefix[:-1]
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001257
1258 name = tarinfo.name[len(prefix):]
1259 prefix = prefix[:-1]
1260
1261 if not prefix or len(name) > LENGTH_NAME:
1262 raise ValueError, "name is too long (>%d)" \
1263 % (LENGTH_NAME)
1264
1265 tarinfo.name = name
1266 tarinfo.prefix = prefix
1267 else:
1268 self._create_gnulong(tarinfo.name, GNUTYPE_LONGNAME)
1269 tarinfo.name = tarinfo.name[:LENGTH_NAME - 1]
1270 self._dbg(2, "tarfile: Created GNU tar extension LONGNAME")
1271
1272 self.fileobj.write(tarinfo.tobuf())
1273 self.offset += BLOCKSIZE
1274
1275 # If there's data to follow, append it.
1276 if fileobj is not None:
1277 copyfileobj(fileobj, self.fileobj, tarinfo.size)
1278 blocks, remainder = divmod(tarinfo.size, BLOCKSIZE)
1279 if remainder > 0:
1280 self.fileobj.write(NUL * (BLOCKSIZE - remainder))
1281 blocks += 1
1282 self.offset += blocks * BLOCKSIZE
1283
1284 self.members.append(tarinfo)
1285 self.membernames.append(tarinfo.name)
1286 self.chunks.append(self.offset)
1287
1288 def extract(self, member, path=""):
1289 """Extract a member from the archive to the current working directory,
1290 using its full name. Its file information is extracted as accurately
1291 as possible. `member' may be a filename or a TarInfo object. You can
1292 specify a different directory using `path'.
1293 """
1294 self._check("r")
1295
1296 if isinstance(member, TarInfo):
1297 tarinfo = member
1298 else:
1299 tarinfo = self.getmember(member)
1300
1301 try:
1302 self._extract_member(tarinfo, os.path.join(path, tarinfo.name))
1303 except EnvironmentError, e:
1304 if self.errorlevel > 0:
1305 raise
1306 else:
1307 if e.filename is None:
1308 self._dbg(1, "tarfile: %s" % e.strerror)
1309 else:
1310 self._dbg(1, "tarfile: %s %r" % (e.strerror, e.filename))
1311 except ExtractError, e:
1312 if self.errorlevel > 1:
1313 raise
1314 else:
1315 self._dbg(1, "tarfile: %s" % e)
1316
1317 def extractfile(self, member):
1318 """Extract a member from the archive as a file object. `member' may be
1319 a filename or a TarInfo object. If `member' is a regular file, a
1320 file-like object is returned. If `member' is a link, a file-like
1321 object is constructed from the link's target. If `member' is none of
1322 the above, None is returned.
1323 The file-like object is read-only and provides the following
1324 methods: read(), readline(), readlines(), seek() and tell()
1325 """
1326 self._check("r")
1327
1328 if isinstance(member, TarInfo):
1329 tarinfo = member
1330 else:
1331 tarinfo = self.getmember(member)
1332
1333 if tarinfo.isreg():
1334 return self.fileobject(self, tarinfo)
1335
1336 elif tarinfo.type not in SUPPORTED_TYPES:
1337 # If a member's type is unknown, it is treated as a
1338 # regular file.
1339 return self.fileobject(self, tarinfo)
1340
1341 elif tarinfo.islnk() or tarinfo.issym():
1342 if isinstance(self.fileobj, _Stream):
1343 # A small but ugly workaround for the case that someone tries
1344 # to extract a (sym)link as a file-object from a non-seekable
1345 # stream of tar blocks.
1346 raise StreamError, "cannot extract (sym)link as file object"
1347 else:
1348 # A (sym)link's file object is it's target's file object.
1349 return self.extractfile(self._getmember(tarinfo.linkname,
1350 tarinfo))
1351 else:
1352 # If there's no data associated with the member (directory, chrdev,
1353 # blkdev, etc.), return None instead of a file object.
1354 return None
1355
1356 def _extract_member(self, tarinfo, targetpath):
1357 """Extract the TarInfo object tarinfo to a physical
1358 file called targetpath.
1359 """
1360 # Fetch the TarInfo object for the given name
1361 # and build the destination pathname, replacing
1362 # forward slashes to platform specific separators.
1363 if targetpath[-1:] == "/":
1364 targetpath = targetpath[:-1]
1365 targetpath = os.path.normpath(targetpath)
1366
1367 # Create all upper directories.
1368 upperdirs = os.path.dirname(targetpath)
1369 if upperdirs and not os.path.exists(upperdirs):
1370 ti = TarInfo()
1371 ti.name = upperdirs
1372 ti.type = DIRTYPE
1373 ti.mode = 0777
1374 ti.mtime = tarinfo.mtime
1375 ti.uid = tarinfo.uid
1376 ti.gid = tarinfo.gid
1377 ti.uname = tarinfo.uname
1378 ti.gname = tarinfo.gname
1379 try:
1380 self._extract_member(ti, ti.name)
1381 except:
1382 pass
1383
1384 if tarinfo.islnk() or tarinfo.issym():
1385 self._dbg(1, "%s -> %s" % (tarinfo.name, tarinfo.linkname))
1386 else:
1387 self._dbg(1, tarinfo.name)
1388
1389 if tarinfo.isreg():
1390 self.makefile(tarinfo, targetpath)
1391 elif tarinfo.isdir():
1392 self.makedir(tarinfo, targetpath)
1393 elif tarinfo.isfifo():
1394 self.makefifo(tarinfo, targetpath)
1395 elif tarinfo.ischr() or tarinfo.isblk():
1396 self.makedev(tarinfo, targetpath)
1397 elif tarinfo.islnk() or tarinfo.issym():
1398 self.makelink(tarinfo, targetpath)
1399 elif tarinfo.type not in SUPPORTED_TYPES:
1400 self.makeunknown(tarinfo, targetpath)
1401 else:
1402 self.makefile(tarinfo, targetpath)
1403
1404 self.chown(tarinfo, targetpath)
1405 if not tarinfo.issym():
1406 self.chmod(tarinfo, targetpath)
1407 self.utime(tarinfo, targetpath)
1408
1409 #--------------------------------------------------------------------------
1410 # Below are the different file methods. They are called via
1411 # _extract_member() when extract() is called. They can be replaced in a
1412 # subclass to implement other functionality.
1413
1414 def makedir(self, tarinfo, targetpath):
1415 """Make a directory called targetpath.
1416 """
1417 try:
1418 os.mkdir(targetpath)
1419 except EnvironmentError, e:
1420 if e.errno != errno.EEXIST:
1421 raise
1422
1423 def makefile(self, tarinfo, targetpath):
1424 """Make a file called targetpath.
1425 """
1426 source = self.extractfile(tarinfo)
1427 target = file(targetpath, "wb")
1428 copyfileobj(source, target)
1429 source.close()
1430 target.close()
1431
1432 def makeunknown(self, tarinfo, targetpath):
1433 """Make a file from a TarInfo object with an unknown type
1434 at targetpath.
1435 """
1436 self.makefile(tarinfo, targetpath)
1437 self._dbg(1, "tarfile: Unknown file type %r, " \
1438 "extracted as regular file." % tarinfo.type)
1439
1440 def makefifo(self, tarinfo, targetpath):
1441 """Make a fifo called targetpath.
1442 """
1443 if hasattr(os, "mkfifo"):
1444 os.mkfifo(targetpath)
1445 else:
1446 raise ExtractError, "fifo not supported by system"
1447
1448 def makedev(self, tarinfo, targetpath):
1449 """Make a character or block device called targetpath.
1450 """
1451 if not hasattr(os, "mknod") or not hasattr(os, "makedev"):
1452 raise ExtractError, "special devices not supported by system"
1453
1454 mode = tarinfo.mode
1455 if tarinfo.isblk():
1456 mode |= stat.S_IFBLK
1457 else:
1458 mode |= stat.S_IFCHR
1459
1460 os.mknod(targetpath, mode,
1461 os.makedev(tarinfo.devmajor, tarinfo.devminor))
1462
1463 def makelink(self, tarinfo, targetpath):
1464 """Make a (symbolic) link called targetpath. If it cannot be created
1465 (platform limitation), we try to make a copy of the referenced file
1466 instead of a link.
1467 """
1468 linkpath = tarinfo.linkname
1469 try:
1470 if tarinfo.issym():
1471 os.symlink(linkpath, targetpath)
1472 else:
1473 os.link(linkpath, targetpath)
1474 except AttributeError:
1475 if tarinfo.issym():
1476 linkpath = os.path.join(os.path.dirname(tarinfo.name),
1477 linkpath)
1478 linkpath = normpath(linkpath)
1479
1480 try:
1481 self._extract_member(self.getmember(linkpath), targetpath)
1482 except (EnvironmentError, KeyError), e:
1483 linkpath = os.path.normpath(linkpath)
1484 try:
1485 shutil.copy2(linkpath, targetpath)
1486 except EnvironmentError, e:
1487 raise IOError, "link could not be created"
1488
1489 def chown(self, tarinfo, targetpath):
1490 """Set owner of targetpath according to tarinfo.
1491 """
1492 if pwd and hasattr(os, "geteuid") and os.geteuid() == 0:
1493 # We have to be root to do so.
1494 try:
1495 g = grp.getgrnam(tarinfo.gname)[2]
1496 except KeyError:
1497 try:
1498 g = grp.getgrgid(tarinfo.gid)[2]
1499 except KeyError:
1500 g = os.getgid()
1501 try:
1502 u = pwd.getpwnam(tarinfo.uname)[2]
1503 except KeyError:
1504 try:
1505 u = pwd.getpwuid(tarinfo.uid)[2]
1506 except KeyError:
1507 u = os.getuid()
1508 try:
1509 if tarinfo.issym() and hasattr(os, "lchown"):
1510 os.lchown(targetpath, u, g)
1511 else:
Andrew MacIntyre7970d202003-02-19 12:51:34 +00001512 if sys.platform != "os2emx":
1513 os.chown(targetpath, u, g)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001514 except EnvironmentError, e:
1515 raise ExtractError, "could not change owner"
1516
1517 def chmod(self, tarinfo, targetpath):
1518 """Set file permissions of targetpath according to tarinfo.
1519 """
Jack Jansen834eff62003-03-07 12:47:06 +00001520 if hasattr(os, 'chmod'):
1521 try:
1522 os.chmod(targetpath, tarinfo.mode)
1523 except EnvironmentError, e:
1524 raise ExtractError, "could not change mode"
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001525
1526 def utime(self, tarinfo, targetpath):
1527 """Set modification time of targetpath according to tarinfo.
1528 """
Jack Jansen834eff62003-03-07 12:47:06 +00001529 if not hasattr(os, 'utime'):
Tim Petersf9347782003-03-07 15:36:41 +00001530 return
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001531 if sys.platform == "win32" and tarinfo.isdir():
1532 # According to msdn.microsoft.com, it is an error (EACCES)
1533 # to use utime() on directories.
1534 return
1535 try:
1536 os.utime(targetpath, (tarinfo.mtime, tarinfo.mtime))
1537 except EnvironmentError, e:
1538 raise ExtractError, "could not change modification time"
1539
1540 #--------------------------------------------------------------------------
1541
1542 def next(self):
1543 """Return the next member of the archive as a TarInfo object, when
1544 TarFile is opened for reading. Return None if there is no more
1545 available.
1546 """
1547 self._check("ra")
1548 if self.firstmember is not None:
1549 m = self.firstmember
1550 self.firstmember = None
1551 return m
1552
1553 # Read the next block.
1554 self.fileobj.seek(self.chunks[-1])
1555 while True:
1556 buf = self.fileobj.read(BLOCKSIZE)
1557 if not buf:
1558 return None
1559 try:
1560 tarinfo = TarInfo.frombuf(buf)
1561 except ValueError:
1562 if self.ignore_zeros:
1563 if buf.count(NUL) == BLOCKSIZE:
1564 adj = "empty"
1565 else:
1566 adj = "invalid"
1567 self._dbg(2, "0x%X: %s block" % (self.offset, adj))
1568 self.offset += BLOCKSIZE
1569 continue
1570 else:
1571 # Block is empty or unreadable.
1572 if self.chunks[-1] == 0:
1573 # If the first block is invalid. That does not
1574 # look like a tar archive we can handle.
1575 raise ReadError,"empty, unreadable or compressed file"
1576 return None
1577 break
1578
1579 # We shouldn't rely on this checksum, because some tar programs
1580 # calculate it differently and it is merely validating the
1581 # header block. We could just as well skip this part, which would
1582 # have a slight effect on performance...
1583 if tarinfo.chksum != calc_chksum(buf):
1584 self._dbg(1, "tarfile: Bad Checksum %r" % tarinfo.name)
1585
1586 # Set the TarInfo object's offset to the current position of the
1587 # TarFile and set self.offset to the position where the data blocks
1588 # should begin.
1589 tarinfo.offset = self.offset
1590 self.offset += BLOCKSIZE
1591
1592 # Check if the TarInfo object has a typeflag for which a callback
1593 # method is registered in the TYPE_METH. If so, then call it.
1594 if tarinfo.type in self.TYPE_METH:
1595 tarinfo = self.TYPE_METH[tarinfo.type](self, tarinfo)
1596 else:
1597 tarinfo.offset_data = self.offset
1598 if tarinfo.isreg() or tarinfo.type not in SUPPORTED_TYPES:
1599 # Skip the following data blocks.
1600 self.offset += self._block(tarinfo.size)
1601
1602 if tarinfo.isreg() and tarinfo.name[:-1] == "/":
1603 # some old tar programs don't know DIRTYPE
1604 tarinfo.type = DIRTYPE
1605
1606 self.members.append(tarinfo)
1607 self.membernames.append(tarinfo.name)
1608 self.chunks.append(self.offset)
1609 return tarinfo
1610
1611 #--------------------------------------------------------------------------
1612 # Below are some methods which are called for special typeflags in the
1613 # next() method, e.g. for unwrapping GNU longname/longlink blocks. They
1614 # are registered in TYPE_METH below. You can register your own methods
1615 # with this mapping.
1616 # A registered method is called with a TarInfo object as only argument.
1617 #
1618 # During its execution the method MUST perform the following tasks:
1619 # 1. set tarinfo.offset_data to the position where the data blocks begin,
1620 # if there is data to follow.
1621 # 2. set self.offset to the position where the next member's header will
1622 # begin.
1623 # 3. return a valid TarInfo object.
1624
1625 def proc_gnulong(self, tarinfo):
1626 """Evaluate the blocks that hold a GNU longname
1627 or longlink member.
1628 """
1629 buf = ""
1630 name = None
1631 linkname = None
1632 count = tarinfo.size
1633 while count > 0:
1634 block = self.fileobj.read(BLOCKSIZE)
1635 buf += block
1636 self.offset += BLOCKSIZE
1637 count -= BLOCKSIZE
1638
1639 if tarinfo.type == GNUTYPE_LONGNAME:
1640 name = nts(buf)
1641 if tarinfo.type == GNUTYPE_LONGLINK:
1642 linkname = nts(buf)
1643
1644 buf = self.fileobj.read(BLOCKSIZE)
1645
1646 tarinfo = TarInfo.frombuf(buf)
1647 tarinfo.offset = self.offset
1648 self.offset += BLOCKSIZE
1649 tarinfo.offset_data = self.offset
1650 tarinfo.name = name or tarinfo.name
1651 tarinfo.linkname = linkname or tarinfo.linkname
1652
1653 if tarinfo.isreg() or tarinfo.type not in SUPPORTED_TYPES:
1654 # Skip the following data blocks.
1655 self.offset += self._block(tarinfo.size)
1656 return tarinfo
1657
1658 def proc_sparse(self, tarinfo):
1659 """Analyze a GNU sparse header plus extra headers.
1660 """
1661 buf = tarinfo.tobuf()
1662 sp = _ringbuffer()
1663 pos = 386
1664 lastpos = 0L
1665 realpos = 0L
1666 # There are 4 possible sparse structs in the
1667 # first header.
1668 for i in xrange(4):
1669 try:
1670 offset = int(buf[pos:pos + 12], 8)
1671 numbytes = int(buf[pos + 12:pos + 24], 8)
1672 except ValueError:
1673 break
1674 if offset > lastpos:
1675 sp.append(_hole(lastpos, offset - lastpos))
1676 sp.append(_data(offset, numbytes, realpos))
1677 realpos += numbytes
1678 lastpos = offset + numbytes
1679 pos += 24
1680
1681 isextended = ord(buf[482])
1682 origsize = int(buf[483:495], 8)
1683
1684 # If the isextended flag is given,
1685 # there are extra headers to process.
1686 while isextended == 1:
1687 buf = self.fileobj.read(BLOCKSIZE)
1688 self.offset += BLOCKSIZE
1689 pos = 0
1690 for i in xrange(21):
1691 try:
1692 offset = int(buf[pos:pos + 12], 8)
1693 numbytes = int(buf[pos + 12:pos + 24], 8)
1694 except ValueError:
1695 break
1696 if offset > lastpos:
1697 sp.append(_hole(lastpos, offset - lastpos))
1698 sp.append(_data(offset, numbytes, realpos))
1699 realpos += numbytes
1700 lastpos = offset + numbytes
1701 pos += 24
1702 isextended = ord(buf[504])
1703
1704 if lastpos < origsize:
1705 sp.append(_hole(lastpos, origsize - lastpos))
1706
1707 tarinfo.sparse = sp
1708
1709 tarinfo.offset_data = self.offset
1710 self.offset += self._block(tarinfo.size)
1711 tarinfo.size = origsize
1712 return tarinfo
1713
1714 # The type mapping for the next() method. The keys are single character
1715 # strings, the typeflag. The values are methods which are called when
1716 # next() encounters such a typeflag.
1717 TYPE_METH = {
1718 GNUTYPE_LONGNAME: proc_gnulong,
1719 GNUTYPE_LONGLINK: proc_gnulong,
1720 GNUTYPE_SPARSE: proc_sparse
1721 }
1722
1723 #--------------------------------------------------------------------------
1724 # Little helper methods:
1725
1726 def _block(self, count):
1727 """Round up a byte count by BLOCKSIZE and return it,
1728 e.g. _block(834) => 1024.
1729 """
1730 blocks, remainder = divmod(count, BLOCKSIZE)
1731 if remainder:
1732 blocks += 1
1733 return blocks * BLOCKSIZE
1734
1735 def _getmember(self, name, tarinfo=None):
1736 """Find an archive member by name from bottom to top.
1737 If tarinfo is given, it is used as the starting point.
1738 """
1739 if tarinfo is None:
1740 end = len(self.members)
1741 else:
1742 end = self.members.index(tarinfo)
1743
1744 for i in xrange(end - 1, -1, -1):
1745 if name == self.membernames[i]:
1746 return self.members[i]
1747
1748 def _load(self):
1749 """Read through the entire archive file and look for readable
1750 members.
1751 """
1752 while True:
1753 tarinfo = self.next()
1754 if tarinfo is None:
1755 break
1756 self._loaded = True
1757
1758 def _check(self, mode=None):
1759 """Check if TarFile is still open, and if the operation's mode
1760 corresponds to TarFile's mode.
1761 """
1762 if self.closed:
1763 raise IOError, "%s is closed" % self.__class__.__name__
1764 if mode is not None and self._mode not in mode:
1765 raise IOError, "bad operation for mode %r" % self._mode
1766
1767 def __iter__(self):
1768 """Provide an iterator object.
1769 """
1770 if self._loaded:
1771 return iter(self.members)
1772 else:
1773 return TarIter(self)
1774
1775 def _create_gnulong(self, name, type):
1776 """Write a GNU longname/longlink member to the TarFile.
1777 It consists of an extended tar header, with the length
1778 of the longname as size, followed by data blocks,
1779 which contain the longname as a null terminated string.
1780 """
1781 tarinfo = TarInfo()
1782 tarinfo.name = "././@LongLink"
1783 tarinfo.type = type
1784 tarinfo.mode = 0
1785 tarinfo.size = len(name)
1786
1787 # write extended header
1788 self.fileobj.write(tarinfo.tobuf())
1789 # write name blocks
1790 self.fileobj.write(name)
1791 blocks, remainder = divmod(tarinfo.size, BLOCKSIZE)
1792 if remainder > 0:
1793 self.fileobj.write(NUL * (BLOCKSIZE - remainder))
1794 blocks += 1
1795 self.offset += blocks * BLOCKSIZE
1796
1797 def _dbg(self, level, msg):
1798 """Write debugging output to sys.stderr.
1799 """
1800 if level <= self.debug:
1801 print >> sys.stderr, msg
1802# class TarFile
1803
1804class TarIter:
1805 """Iterator Class.
1806
1807 for tarinfo in TarFile(...):
1808 suite...
1809 """
1810
1811 def __init__(self, tarfile):
1812 """Construct a TarIter object.
1813 """
1814 self.tarfile = tarfile
1815 def __iter__(self):
1816 """Return iterator object.
1817 """
1818 return self
1819 def next(self):
1820 """Return the next item using TarFile's next() method.
1821 When all members have been read, set TarFile as _loaded.
1822 """
1823 tarinfo = self.tarfile.next()
1824 if not tarinfo:
1825 self.tarfile._loaded = True
1826 raise StopIteration
1827 return tarinfo
1828
1829# Helper classes for sparse file support
1830class _section:
1831 """Base class for _data and _hole.
1832 """
1833 def __init__(self, offset, size):
1834 self.offset = offset
1835 self.size = size
1836 def __contains__(self, offset):
1837 return self.offset <= offset < self.offset + self.size
1838
1839class _data(_section):
1840 """Represent a data section in a sparse file.
1841 """
1842 def __init__(self, offset, size, realpos):
1843 _section.__init__(self, offset, size)
1844 self.realpos = realpos
1845
1846class _hole(_section):
1847 """Represent a hole section in a sparse file.
1848 """
1849 pass
1850
1851class _ringbuffer(list):
1852 """Ringbuffer class which increases performance
1853 over a regular list.
1854 """
1855 def __init__(self):
1856 self.idx = 0
1857 def find(self, offset):
1858 idx = self.idx
1859 while True:
1860 item = self[idx]
1861 if offset in item:
1862 break
1863 idx += 1
1864 if idx == len(self):
1865 idx = 0
1866 if idx == self.idx:
1867 # End of File
1868 return None
1869 self.idx = idx
1870 return item
1871
1872#---------------------------------------------
1873# zipfile compatible TarFile class
1874#---------------------------------------------
1875TAR_PLAIN = 0 # zipfile.ZIP_STORED
1876TAR_GZIPPED = 8 # zipfile.ZIP_DEFLATED
1877class TarFileCompat:
1878 """TarFile class compatible with standard module zipfile's
1879 ZipFile class.
1880 """
1881 def __init__(self, file, mode="r", compression=TAR_PLAIN):
1882 if compression == TAR_PLAIN:
1883 self.tarfile = TarFile.taropen(file, mode)
1884 elif compression == TAR_GZIPPED:
1885 self.tarfile = TarFile.gzopen(file, mode)
1886 else:
1887 raise ValueError, "unknown compression constant"
1888 if mode[0:1] == "r":
1889 members = self.tarfile.getmembers()
1890 for i in xrange(len(members)):
1891 m = members[i]
1892 m.filename = m.name
1893 m.file_size = m.size
1894 m.date_time = time.gmtime(m.mtime)[:6]
1895 def namelist(self):
1896 return map(lambda m: m.name, self.infolist())
1897 def infolist(self):
1898 return filter(lambda m: m.type in REGULAR_TYPES,
1899 self.tarfile.getmembers())
1900 def printdir(self):
1901 self.tarfile.list()
1902 def testzip(self):
1903 return
1904 def getinfo(self, name):
1905 return self.tarfile.getmember(name)
1906 def read(self, name):
1907 return self.tarfile.extractfile(self.tarfile.getmember(name)).read()
1908 def write(self, filename, arcname=None, compress_type=None):
1909 self.tarfile.add(filename, arcname)
1910 def writestr(self, zinfo, bytes):
1911 import StringIO
1912 import calendar
1913 zinfo.name = zinfo.filename
1914 zinfo.size = zinfo.file_size
1915 zinfo.mtime = calendar.timegm(zinfo.date_time)
1916 self.tarfile.addfile(zinfo, StringIO.StringIO(bytes))
1917 def close(self):
1918 self.tarfile.close()
1919#class TarFileCompat
1920
1921#--------------------
1922# exported functions
1923#--------------------
1924def is_tarfile(name):
1925 """Return True if name points to a tar archive that we
1926 are able to handle, else return False.
1927 """
1928 try:
1929 t = open(name)
1930 t.close()
1931 return True
1932 except TarError:
1933 return False
1934
1935open = TarFile.open