blob: af733deb3233016689260f58acc4ce48720553c9 [file] [log] [blame]
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001#!/usr/bin/env python
2# -*- coding: iso-8859-1 -*-
3#-------------------------------------------------------------------
4# tarfile.py
5#-------------------------------------------------------------------
6# Copyright (C) 2002 Lars Gustäbel <lars@gustaebel.de>
7# All rights reserved.
8#
9# Permission is hereby granted, free of charge, to any person
10# obtaining a copy of this software and associated documentation
11# files (the "Software"), to deal in the Software without
12# restriction, including without limitation the rights to use,
13# copy, modify, merge, publish, distribute, sublicense, and/or sell
14# copies of the Software, and to permit persons to whom the
15# Software is furnished to do so, subject to the following
16# conditions:
17#
18# The above copyright notice and this permission notice shall be
19# included in all copies or substantial portions of the Software.
20#
21# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
22# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
23# OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
24# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
25# HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
26# WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
27# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
28# OTHER DEALINGS IN THE SOFTWARE.
29#
30"""Read from and write to tar format archives.
31"""
32
33__version__ = "$Revision$"
34# $Source$
35
36version = "0.6.4"
37__author__ = "Lars Gustäbel (lars@gustaebel.de)"
38__date__ = "$Date$"
39__cvsid__ = "$Id$"
40__credits__ = "Gustavo Niemeyer, Niels Gustäbel, Richard Townsend."
41
42#---------
43# Imports
44#---------
45import sys
46import os
47import shutil
48import stat
49import errno
50import time
51import struct
52
Jack Jansencfc49022003-03-07 13:37:32 +000053if sys.platform == 'mac':
54 # This module needs work for MacOS9, especially in the area of pathname
55 # handling. In many places it is assumed a simple substitution of / by the
56 # local os.path.sep is good enough to convert pathnames, but this does not
57 # work with the mac rooted:path:name versus :nonrooted:path:name syntax
58 raise ImportError, "tarfile does not work for platform==mac"
59
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +000060try:
61 import grp, pwd
62except ImportError:
63 grp = pwd = None
64
65# from tarfile import *
66__all__ = ["TarFile", "TarInfo", "is_tarfile", "TarError"]
67
68#---------------------------------------------------------
69# tar constants
70#---------------------------------------------------------
71NUL = "\0" # the null character
72BLOCKSIZE = 512 # length of processing blocks
73RECORDSIZE = BLOCKSIZE * 20 # length of records
74MAGIC = "ustar" # magic tar string
75VERSION = "00" # version number
76
77LENGTH_NAME = 100 # maximum length of a filename
78LENGTH_LINK = 100 # maximum length of a linkname
79LENGTH_PREFIX = 155 # maximum length of the prefix field
80MAXSIZE_MEMBER = 077777777777L # maximum size of a file (11 octal digits)
81
82REGTYPE = "0" # regular file
83AREGTYPE = "\0" # regular file
84LNKTYPE = "1" # link (inside tarfile)
85SYMTYPE = "2" # symbolic link
86CHRTYPE = "3" # character special device
87BLKTYPE = "4" # block special device
88DIRTYPE = "5" # directory
89FIFOTYPE = "6" # fifo special device
90CONTTYPE = "7" # contiguous file
91
92GNUTYPE_LONGNAME = "L" # GNU tar extension for longnames
93GNUTYPE_LONGLINK = "K" # GNU tar extension for longlink
94GNUTYPE_SPARSE = "S" # GNU tar extension for sparse file
95
96#---------------------------------------------------------
97# tarfile constants
98#---------------------------------------------------------
99SUPPORTED_TYPES = (REGTYPE, AREGTYPE, LNKTYPE, # file types that tarfile
100 SYMTYPE, DIRTYPE, FIFOTYPE, # can cope with.
101 CONTTYPE, CHRTYPE, BLKTYPE,
102 GNUTYPE_LONGNAME, GNUTYPE_LONGLINK,
103 GNUTYPE_SPARSE)
104
105REGULAR_TYPES = (REGTYPE, AREGTYPE, # file types that somehow
106 CONTTYPE, GNUTYPE_SPARSE) # represent regular files
107
108#---------------------------------------------------------
109# Bits used in the mode field, values in octal.
110#---------------------------------------------------------
111S_IFLNK = 0120000 # symbolic link
112S_IFREG = 0100000 # regular file
113S_IFBLK = 0060000 # block device
114S_IFDIR = 0040000 # directory
115S_IFCHR = 0020000 # character device
116S_IFIFO = 0010000 # fifo
117
118TSUID = 04000 # set UID on execution
119TSGID = 02000 # set GID on execution
120TSVTX = 01000 # reserved
121
122TUREAD = 0400 # read by owner
123TUWRITE = 0200 # write by owner
124TUEXEC = 0100 # execute/search by owner
125TGREAD = 0040 # read by group
126TGWRITE = 0020 # write by group
127TGEXEC = 0010 # execute/search by group
128TOREAD = 0004 # read by other
129TOWRITE = 0002 # write by other
130TOEXEC = 0001 # execute/search by other
131
132#---------------------------------------------------------
133# Some useful functions
134#---------------------------------------------------------
135def nts(s):
136 """Convert a null-terminated string buffer to a python string.
137 """
138 return s.split(NUL, 1)[0]
139
140def calc_chksum(buf):
141 """Calculate the checksum for a member's header. It's a simple addition
142 of all bytes, treating the chksum field as if filled with spaces.
143 buf is a 512 byte long string buffer which holds the header.
144 """
145 chk = 256 # chksum field is treated as blanks,
146 # so the initial value is 8 * ord(" ")
147 for c in buf[:148]: chk += ord(c) # sum up all bytes before chksum
148 for c in buf[156:]: chk += ord(c) # sum up all bytes after chksum
149 return chk
150
151def copyfileobj(src, dst, length=None):
152 """Copy length bytes from fileobj src to fileobj dst.
153 If length is None, copy the entire content.
154 """
155 if length == 0:
156 return
157 if length is None:
158 shutil.copyfileobj(src, dst)
159 return
160
161 BUFSIZE = 16 * 1024
162 blocks, remainder = divmod(length, BUFSIZE)
163 for b in xrange(blocks):
164 buf = src.read(BUFSIZE)
165 if len(buf) < BUFSIZE:
166 raise IOError, "end of file reached"
167 dst.write(buf)
168
169 if remainder != 0:
170 buf = src.read(remainder)
171 if len(buf) < remainder:
172 raise IOError, "end of file reached"
173 dst.write(buf)
174 return
175
176filemode_table = (
177 (S_IFLNK, "l",
178 S_IFREG, "-",
179 S_IFBLK, "b",
180 S_IFDIR, "d",
181 S_IFCHR, "c",
182 S_IFIFO, "p"),
183 (TUREAD, "r"),
184 (TUWRITE, "w"),
185 (TUEXEC, "x", TSUID, "S", TUEXEC|TSUID, "s"),
186 (TGREAD, "r"),
187 (TGWRITE, "w"),
188 (TGEXEC, "x", TSGID, "S", TGEXEC|TSGID, "s"),
189 (TOREAD, "r"),
190 (TOWRITE, "w"),
191 (TOEXEC, "x", TSVTX, "T", TOEXEC|TSVTX, "t"))
192
193def filemode(mode):
194 """Convert a file's mode to a string of the form
195 -rwxrwxrwx.
196 Used by TarFile.list()
197 """
198 s = ""
199 for t in filemode_table:
200 while True:
201 if mode & t[0] == t[0]:
202 s += t[1]
203 elif len(t) > 2:
204 t = t[2:]
205 continue
206 else:
207 s += "-"
208 break
209 return s
210
211if os.sep != "/":
212 normpath = lambda path: os.path.normpath(path).replace(os.sep, "/")
213else:
214 normpath = os.path.normpath
215
216class TarError(Exception):
217 """Base exception."""
218 pass
219class ExtractError(TarError):
220 """General exception for extract errors."""
221 pass
222class ReadError(TarError):
223 """Exception for unreadble tar archives."""
224 pass
225class CompressionError(TarError):
226 """Exception for unavailable compression methods."""
227 pass
228class StreamError(TarError):
229 """Exception for unsupported operations on stream-like TarFiles."""
230 pass
231
232#---------------------------
233# internal stream interface
234#---------------------------
235class _LowLevelFile:
236 """Low-level file object. Supports reading and writing.
237 It is used instead of a regular file object for streaming
238 access.
239 """
240
241 def __init__(self, name, mode):
242 mode = {
243 "r": os.O_RDONLY,
244 "w": os.O_WRONLY | os.O_CREAT | os.O_TRUNC,
245 }[mode]
246 if hasattr(os, "O_BINARY"):
247 mode |= os.O_BINARY
248 self.fd = os.open(name, mode)
249
250 def close(self):
251 os.close(self.fd)
252
253 def read(self, size):
254 return os.read(self.fd, size)
255
256 def write(self, s):
257 os.write(self.fd, s)
258
259class _Stream:
260 """Class that serves as an adapter between TarFile and
261 a stream-like object. The stream-like object only
262 needs to have a read() or write() method and is accessed
263 blockwise. Use of gzip or bzip2 compression is possible.
264 A stream-like object could be for example: sys.stdin,
265 sys.stdout, a socket, a tape device etc.
266
267 _Stream is intended to be used only internally.
268 """
269
270 def __init__(self, name, mode, type, fileobj, bufsize):
271 """Construct a _Stream object.
272 """
273 self._extfileobj = True
274 if fileobj is None:
275 fileobj = _LowLevelFile(name, mode)
276 self._extfileobj = False
277
278 self.name = name or ""
279 self.mode = mode
280 self.type = type
281 self.fileobj = fileobj
282 self.bufsize = bufsize
283 self.buf = ""
284 self.pos = 0L
285 self.closed = False
286
287 if type == "gz":
288 try:
289 import zlib
290 except ImportError:
291 raise CompressionError, "zlib module is not available"
292 self.zlib = zlib
293 self.crc = zlib.crc32("")
294 if mode == "r":
295 self._init_read_gz()
296 else:
297 self._init_write_gz()
298
299 if type == "bz2":
300 try:
301 import bz2
302 except ImportError:
303 raise CompressionError, "bz2 module is not available"
304 if mode == "r":
305 self.dbuf = ""
306 self.cmp = bz2.BZ2Decompressor()
307 else:
308 self.cmp = bz2.BZ2Compressor()
309
310 def __del__(self):
311 if not self.closed:
312 self.close()
313
314 def _init_write_gz(self):
315 """Initialize for writing with gzip compression.
316 """
317 self.cmp = self.zlib.compressobj(9, self.zlib.DEFLATED,
318 -self.zlib.MAX_WBITS,
319 self.zlib.DEF_MEM_LEVEL,
320 0)
321 timestamp = struct.pack("<L", long(time.time()))
322 self.__write("\037\213\010\010%s\002\377" % timestamp)
323 if self.name.endswith(".gz"):
324 self.name = self.name[:-3]
325 self.__write(self.name + NUL)
326
327 def write(self, s):
328 """Write string s to the stream.
329 """
330 if self.type == "gz":
331 self.crc = self.zlib.crc32(s, self.crc)
332 self.pos += len(s)
333 if self.type != "tar":
334 s = self.cmp.compress(s)
335 self.__write(s)
336
337 def __write(self, s):
338 """Write string s to the stream if a whole new block
339 is ready to be written.
340 """
341 self.buf += s
342 while len(self.buf) > self.bufsize:
343 self.fileobj.write(self.buf[:self.bufsize])
344 self.buf = self.buf[self.bufsize:]
345
346 def close(self):
347 """Close the _Stream object. No operation should be
348 done on it afterwards.
349 """
350 if self.closed:
351 return
352
353 if self.mode == "w" and self.buf:
354 if self.type != "tar":
355 self.buf += self.cmp.flush()
356 self.fileobj.write(self.buf)
357 self.buf = ""
358 if self.type == "gz":
359 self.fileobj.write(struct.pack("<l", self.crc))
360 self.fileobj.write(struct.pack("<L", self.pos))
361
362 if not self._extfileobj:
363 self.fileobj.close()
364
365 self.closed = True
366
367 def _init_read_gz(self):
368 """Initialize for reading a gzip compressed fileobj.
369 """
370 self.cmp = self.zlib.decompressobj(-self.zlib.MAX_WBITS)
371 self.dbuf = ""
372
373 # taken from gzip.GzipFile with some alterations
374 if self.__read(2) != "\037\213":
375 raise ReadError, "not a gzip file"
376 if self.__read(1) != "\010":
377 raise CompressionError, "unsupported compression method"
378
379 flag = ord(self.__read(1))
380 self.__read(6)
381
382 if flag & 4:
383 xlen = ord(self.__read(1)) + 256 * ord(self.__read(1))
384 self.read(xlen)
385 if flag & 8:
386 while True:
387 s = self.__read(1)
388 if not s or s == NUL:
389 break
390 if flag & 16:
391 while True:
392 s = self.__read(1)
393 if not s or s == NUL:
394 break
395 if flag & 2:
396 self.__read(2)
397
398 def tell(self):
399 """Return the stream's file pointer position.
400 """
401 return self.pos
402
403 def seek(self, pos=0):
404 """Set the stream's file pointer to pos. Negative seeking
405 is forbidden.
406 """
407 if pos - self.pos >= 0:
408 blocks, remainder = divmod(pos - self.pos, self.bufsize)
409 for i in xrange(blocks):
410 self.read(self.bufsize)
411 self.read(remainder)
412 else:
413 raise StreamError, "seeking backwards is not allowed"
414 return self.pos
415
416 def read(self, size=None):
417 """Return the next size number of bytes from the stream.
418 If size is not defined, return all bytes of the stream
419 up to EOF.
420 """
421 if size is None:
422 t = []
423 while True:
424 buf = self._read(self.bufsize)
425 if not buf:
426 break
427 t.append(buf)
428 buf = "".join(t)
429 else:
430 buf = self._read(size)
431 self.pos += len(buf)
432 return buf
433
434 def _read(self, size):
435 """Return size bytes from the stream.
436 """
437 if self.type == "tar":
438 return self.__read(size)
439
440 c = len(self.dbuf)
441 t = [self.dbuf]
442 while c < size:
443 buf = self.__read(self.bufsize)
444 if not buf:
445 break
446 buf = self.cmp.decompress(buf)
447 t.append(buf)
448 c += len(buf)
449 t = "".join(t)
450 self.dbuf = t[size:]
451 return t[:size]
452
453 def __read(self, size):
454 """Return size bytes from stream. If internal buffer is empty,
455 read another block from the stream.
456 """
457 c = len(self.buf)
458 t = [self.buf]
459 while c < size:
460 buf = self.fileobj.read(self.bufsize)
461 if not buf:
462 break
463 t.append(buf)
464 c += len(buf)
465 t = "".join(t)
466 self.buf = t[size:]
467 return t[:size]
468# class _Stream
469
470#------------------------
471# Extraction file object
472#------------------------
473class ExFileObject(object):
474 """File-like object for reading an archive member.
475 Is returned by TarFile.extractfile(). Support for
476 sparse files included.
477 """
478
479 def __init__(self, tarfile, tarinfo):
480 self.fileobj = tarfile.fileobj
481 self.name = tarinfo.name
482 self.mode = "r"
483 self.closed = False
484 self.offset = tarinfo.offset_data
485 self.size = tarinfo.size
486 self.pos = 0L
487 self.linebuffer = ""
488 if tarinfo.issparse():
489 self.sparse = tarinfo.sparse
490 self.read = self._readsparse
491 else:
492 self.read = self._readnormal
493
494 def __read(self, size):
495 """Overloadable read method.
496 """
497 return self.fileobj.read(size)
498
499 def readline(self, size=-1):
500 """Read a line with approx. size. If size is negative,
501 read a whole line. readline() and read() must not
502 be mixed up (!).
503 """
504 if size < 0:
505 size = sys.maxint
506
507 nl = self.linebuffer.find("\n")
508 if nl >= 0:
509 nl = min(nl, size)
510 else:
511 size -= len(self.linebuffer)
512 while nl < 0:
513 buf = self.read(min(size, 100))
514 if not buf:
515 break
516 self.linebuffer += buf
517 size -= len(buf)
518 if size <= 0:
519 break
520 nl = self.linebuffer.find("\n")
521 if nl == -1:
522 s = self.linebuffer
523 self.linebuffer = ""
524 return s
525 buf = self.linebuffer[:nl]
526 self.linebuffer = self.linebuffer[nl + 1:]
527 while buf[-1:] == "\r":
528 buf = buf[:-1]
529 return buf + "\n"
530
531 def readlines(self):
532 """Return a list with all (following) lines.
533 """
534 result = []
535 while True:
536 line = self.readline()
537 if not line: break
538 result.append(line)
539 return result
540
541 def _readnormal(self, size=None):
542 """Read operation for regular files.
543 """
544 if self.closed:
545 raise ValueError, "file is closed"
546 self.fileobj.seek(self.offset + self.pos)
547 bytesleft = self.size - self.pos
548 if size is None:
549 bytestoread = bytesleft
550 else:
551 bytestoread = min(size, bytesleft)
552 self.pos += bytestoread
553 return self.__read(bytestoread)
554
555 def _readsparse(self, size=None):
556 """Read operation for sparse files.
557 """
558 if self.closed:
559 raise ValueError, "file is closed"
560
561 if size is None:
562 size = self.size - self.pos
563
564 data = []
565 while size > 0:
566 buf = self._readsparsesection(size)
567 if not buf:
568 break
569 size -= len(buf)
570 data.append(buf)
571 return "".join(data)
572
573 def _readsparsesection(self, size):
574 """Read a single section of a sparse file.
575 """
576 section = self.sparse.find(self.pos)
577
578 if section is None:
579 return ""
580
581 toread = min(size, section.offset + section.size - self.pos)
582 if isinstance(section, _data):
583 realpos = section.realpos + self.pos - section.offset
584 self.pos += toread
585 self.fileobj.seek(self.offset + realpos)
586 return self.__read(toread)
587 else:
588 self.pos += toread
589 return NUL * toread
590
591 def tell(self):
592 """Return the current file position.
593 """
594 return self.pos
595
596 def seek(self, pos, whence=0):
597 """Seek to a position in the file.
598 """
599 self.linebuffer = ""
600 if whence == 0:
601 self.pos = min(max(pos, 0), self.size)
602 if whence == 1:
603 if pos < 0:
604 self.pos = max(self.pos + pos, 0)
605 else:
606 self.pos = min(self.pos + pos, self.size)
607 if whence == 2:
608 self.pos = max(min(self.size + pos, self.size), 0)
609
610 def close(self):
611 """Close the file object.
612 """
613 self.closed = True
614#class ExFileObject
615
616#------------------
617# Exported Classes
618#------------------
619class TarInfo(object):
620 """Informational class which holds the details about an
621 archive member given by a tar header block.
622 TarInfo objects are returned by TarFile.getmember(),
623 TarFile.getmembers() and TarFile.gettarinfo() and are
624 usually created internally.
625 """
626
627 def __init__(self, name=""):
628 """Construct a TarInfo object. name is the optional name
629 of the member.
630 """
631
632 self.name = name # member name (dirnames must end with '/')
633 self.mode = 0666 # file permissions
634 self.uid = 0 # user id
635 self.gid = 0 # group id
636 self.size = 0 # file size
637 self.mtime = 0 # modification time
638 self.chksum = 0 # header checksum
639 self.type = REGTYPE # member type
640 self.linkname = "" # link name
641 self.uname = "user" # user name
642 self.gname = "group" # group name
643 self.devmajor = 0 #-
644 self.devminor = 0 #-for use with CHRTYPE and BLKTYPE
645 self.prefix = "" # prefix to filename or holding information
646 # about sparse files
647
648 self.offset = 0 # the tar header starts here
649 self.offset_data = 0 # the file's data starts here
650
651 def __repr__(self):
652 return "<%s %r at %#x>" % (self.__class__.__name__,self.name,id(self))
653
654 def frombuf(cls, buf):
655 """Construct a TarInfo object from a 512 byte string buffer.
656 """
657 tarinfo = cls()
658 tarinfo.name = nts(buf[0:100])
659 tarinfo.mode = int(buf[100:108], 8)
660 tarinfo.uid = int(buf[108:116],8)
661 tarinfo.gid = int(buf[116:124],8)
662 tarinfo.size = long(buf[124:136], 8)
663 tarinfo.mtime = long(buf[136:148], 8)
664 tarinfo.chksum = int(buf[148:156], 8)
665 tarinfo.type = buf[156:157]
666 tarinfo.linkname = nts(buf[157:257])
667 tarinfo.uname = nts(buf[265:297])
668 tarinfo.gname = nts(buf[297:329])
669 try:
670 tarinfo.devmajor = int(buf[329:337], 8)
671 tarinfo.devminor = int(buf[337:345], 8)
672 except ValueError:
673 tarinfo.devmajor = tarinfo.devmajor = 0
674
675 # The prefix field is used for filenames > 100 in
676 # the POSIX standard.
677 # name = prefix + "/" + name
678 prefix = buf[345:500]
679 while prefix and prefix[-1] == NUL:
680 prefix = prefix[:-1]
681 if len(prefix.split(NUL)) == 1:
682 tarinfo.prefix = prefix
683 tarinfo.name = normpath(os.path.join(tarinfo.prefix, tarinfo.name))
684 else:
685 tarinfo.prefix = buf[345:500]
686
687 # Directory names should have a '/' at the end.
688 if tarinfo.isdir() and tarinfo.name[-1:] != "/":
689 tarinfo.name += "/"
690 return tarinfo
691
692 frombuf = classmethod(frombuf)
693
694 def tobuf(self):
695 """Return a tar header block as a 512 byte string.
696 """
697 name = self.name
698
699 # The following code was contributed by Detlef Lannert.
700 parts = []
701 for value, fieldsize in (
702 (name, 100),
703 ("%07o" % (self.mode & 07777), 8),
704 ("%07o" % self.uid, 8),
705 ("%07o" % self.gid, 8),
706 ("%011o" % self.size, 12),
707 ("%011o" % self.mtime, 12),
708 (" ", 8),
709 (self.type, 1),
710 (self.linkname, 100),
711 (MAGIC, 6),
712 (VERSION, 2),
713 (self.uname, 32),
714 (self.gname, 32),
715 ("%07o" % self.devmajor, 8),
716 ("%07o" % self.devminor, 8),
717 (self.prefix, 155)
718 ):
719 l = len(value)
720 parts.append(value + (fieldsize - l) * NUL)
721
722 buf = "".join(parts)
723 chksum = calc_chksum(buf)
724 buf = buf[:148] + "%06o\0" % chksum + buf[155:]
725 buf += (BLOCKSIZE - len(buf)) * NUL
726 self.buf = buf
727 return buf
728
729 def isreg(self):
730 return self.type in REGULAR_TYPES
731 def isfile(self):
732 return self.isreg()
733 def isdir(self):
734 return self.type == DIRTYPE
735 def issym(self):
736 return self.type == SYMTYPE
737 def islnk(self):
738 return self.type == LNKTYPE
739 def ischr(self):
740 return self.type == CHRTYPE
741 def isblk(self):
742 return self.type == BLKTYPE
743 def isfifo(self):
744 return self.type == FIFOTYPE
745 def issparse(self):
746 return self.type == GNUTYPE_SPARSE
747 def isdev(self):
748 return self.type in (CHRTYPE, BLKTYPE, FIFOTYPE)
749# class TarInfo
750
751class TarFile(object):
752 """The TarFile Class provides an interface to tar archives.
753 """
754
755 debug = 0 # May be set from 0 (no msgs) to 3 (all msgs)
756
757 dereference = False # If true, add content of linked file to the
758 # tar file, else the link.
759
760 ignore_zeros = False # If true, skips empty or invalid blocks and
761 # continues processing.
762
763 errorlevel = 0 # If 0, fatal errors only appear in debug
764 # messages (if debug >= 0). If > 0, errors
765 # are passed to the caller as exceptions.
766
767 posix = True # If True, generates POSIX.1-1990-compliant
768 # archives (no GNU extensions!)
769
770 fileobject = ExFileObject
771
772 def __init__(self, name=None, mode="r", fileobj=None):
773 """Open an (uncompressed) tar archive `name'. `mode' is either 'r' to
774 read from an existing archive, 'a' to append data to an existing
775 file or 'w' to create a new file overwriting an existing one. `mode'
776 defaults to 'r'.
777 If `fileobj' is given, it is used for reading or writing data. If it
778 can be determined, `mode' is overridden by `fileobj's mode.
779 `fileobj' is not closed, when TarFile is closed.
780 """
781 self.name = name
782
783 if len(mode) > 1 or mode not in "raw":
784 raise ValueError, "mode must be 'r', 'a' or 'w'"
785 self._mode = mode
786 self.mode = {"r": "rb", "a": "r+b", "w": "wb"}[mode]
787
788 if not fileobj:
789 fileobj = file(self.name, self.mode)
790 self._extfileobj = False
791 else:
792 if self.name is None and hasattr(fileobj, "name"):
793 self.name = fileobj.name
794 if hasattr(fileobj, "mode"):
795 self.mode = fileobj.mode
796 self._extfileobj = True
797 self.fileobj = fileobj
798
799 # Init datastructures
800 self.closed = False
801 self.members = [] # list of members as TarInfo objects
802 self.membernames = [] # names of members
803 self.chunks = [0] # chunk cache
804 self._loaded = False # flag if all members have been read
805 self.offset = 0L # current position in the archive file
806 self.inodes = {} # dictionary caching the inodes of
807 # archive members already added
808
809 if self._mode == "r":
810 self.firstmember = None
811 self.firstmember = self.next()
812
813 if self._mode == "a":
814 # Move to the end of the archive,
815 # before the first empty block.
816 self.firstmember = None
817 while True:
818 try:
819 tarinfo = self.next()
820 except ReadError:
821 self.fileobj.seek(0)
822 break
823 if tarinfo is None:
824 self.fileobj.seek(- BLOCKSIZE, 1)
825 break
826
827 if self._mode in "aw":
828 self._loaded = True
829
830 #--------------------------------------------------------------------------
831 # Below are the classmethods which act as alternate constructors to the
832 # TarFile class. The open() method is the only one that is needed for
833 # public use; it is the "super"-constructor and is able to select an
834 # adequate "sub"-constructor for a particular compression using the mapping
835 # from OPEN_METH.
836 #
837 # This concept allows one to subclass TarFile without losing the comfort of
838 # the super-constructor. A sub-constructor is registered and made available
839 # by adding it to the mapping in OPEN_METH.
840
841 def open(cls, name=None, mode="r", fileobj=None, bufsize=20*512):
842 """Open a tar archive for reading, writing or appending. Return
843 an appropriate TarFile class.
844
845 mode:
846 'r' open for reading with transparent compression
847 'r:' open for reading exclusively uncompressed
848 'r:gz' open for reading with gzip compression
849 'r:bz2' open for reading with bzip2 compression
850 'a' or 'a:' open for appending
851 'w' or 'w:' open for writing without compression
852 'w:gz' open for writing with gzip compression
853 'w:bz2' open for writing with bzip2 compression
854 'r|' open an uncompressed stream of tar blocks for reading
855 'r|gz' open a gzip compressed stream of tar blocks
856 'r|bz2' open a bzip2 compressed stream of tar blocks
857 'w|' open an uncompressed stream for writing
858 'w|gz' open a gzip compressed stream for writing
859 'w|bz2' open a bzip2 compressed stream for writing
860 """
861
862 if not name and not fileobj:
863 raise ValueError, "nothing to open"
864
865 if ":" in mode:
866 filemode, comptype = mode.split(":", 1)
867 filemode = filemode or "r"
868 comptype = comptype or "tar"
869
870 # Select the *open() function according to
871 # given compression.
872 if comptype in cls.OPEN_METH:
873 func = getattr(cls, cls.OPEN_METH[comptype])
874 else:
875 raise CompressionError, "unknown compression type %r" % comptype
876 return func(name, filemode, fileobj)
877
878 elif "|" in mode:
879 filemode, comptype = mode.split("|", 1)
880 filemode = filemode or "r"
881 comptype = comptype or "tar"
882
883 if filemode not in "rw":
884 raise ValueError, "mode must be 'r' or 'w'"
885
886 t = cls(name, filemode,
887 _Stream(name, filemode, comptype, fileobj, bufsize))
888 t._extfileobj = False
889 return t
890
891 elif mode == "r":
892 # Find out which *open() is appropriate for opening the file.
893 for comptype in cls.OPEN_METH:
894 func = getattr(cls, cls.OPEN_METH[comptype])
895 try:
896 return func(name, "r", fileobj)
897 except (ReadError, CompressionError):
898 continue
899 raise ReadError, "file could not be opened successfully"
900
901 elif mode in "aw":
902 return cls.taropen(name, mode, fileobj)
903
904 raise ValueError, "undiscernible mode"
905
906 open = classmethod(open)
907
908 def taropen(cls, name, mode="r", fileobj=None):
909 """Open uncompressed tar archive name for reading or writing.
910 """
911 if len(mode) > 1 or mode not in "raw":
912 raise ValueError, "mode must be 'r', 'a' or 'w'"
913 return cls(name, mode, fileobj)
914
915 taropen = classmethod(taropen)
916
917 def gzopen(cls, name, mode="r", fileobj=None, compresslevel=9):
918 """Open gzip compressed tar archive name for reading or writing.
919 Appending is not allowed.
920 """
921 if len(mode) > 1 or mode not in "rw":
922 raise ValueError, "mode must be 'r' or 'w'"
923
924 try:
925 import gzip
Neal Norwitz4ec68242003-04-11 03:05:56 +0000926 gzip.GzipFile
927 except (ImportError, AttributeError):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000928 raise CompressionError, "gzip module is not available"
929
930 pre, ext = os.path.splitext(name)
931 pre = os.path.basename(pre)
932 if ext == ".tgz":
933 ext = ".tar"
934 if ext == ".gz":
935 ext = ""
936 tarname = pre + ext
937
938 if fileobj is None:
939 fileobj = file(name, mode + "b")
940
941 if mode != "r":
942 name = tarname
943
944 try:
945 t = cls.taropen(tarname, mode,
946 gzip.GzipFile(name, mode, compresslevel, fileobj)
947 )
948 except IOError:
949 raise ReadError, "not a gzip file"
950 t._extfileobj = False
951 return t
952
953 gzopen = classmethod(gzopen)
954
955 def bz2open(cls, name, mode="r", fileobj=None, compresslevel=9):
956 """Open bzip2 compressed tar archive name for reading or writing.
957 Appending is not allowed.
958 """
959 if len(mode) > 1 or mode not in "rw":
960 raise ValueError, "mode must be 'r' or 'w'."
961
962 try:
963 import bz2
964 except ImportError:
965 raise CompressionError, "bz2 module is not available"
966
967 pre, ext = os.path.splitext(name)
968 pre = os.path.basename(pre)
969 if ext == ".tbz2":
970 ext = ".tar"
971 if ext == ".bz2":
972 ext = ""
973 tarname = pre + ext
974
975 if fileobj is not None:
976 raise ValueError, "no support for external file objects"
977
978 try:
979 t = cls.taropen(tarname, mode, bz2.BZ2File(name, mode, compresslevel=compresslevel))
980 except IOError:
981 raise ReadError, "not a bzip2 file"
982 t._extfileobj = False
983 return t
984
985 bz2open = classmethod(bz2open)
986
987 # All *open() methods are registered here.
988 OPEN_METH = {
989 "tar": "taropen", # uncompressed tar
990 "gz": "gzopen", # gzip compressed tar
991 "bz2": "bz2open" # bzip2 compressed tar
992 }
993
994 #--------------------------------------------------------------------------
995 # The public methods which TarFile provides:
996
997 def close(self):
998 """Close the TarFile. In write-mode, two finishing zero blocks are
999 appended to the archive.
1000 """
1001 if self.closed:
1002 return
1003
1004 if self._mode in "aw":
1005 self.fileobj.write(NUL * (BLOCKSIZE * 2))
1006 self.offset += (BLOCKSIZE * 2)
1007 # fill up the end with zero-blocks
1008 # (like option -b20 for tar does)
1009 blocks, remainder = divmod(self.offset, RECORDSIZE)
1010 if remainder > 0:
1011 self.fileobj.write(NUL * (RECORDSIZE - remainder))
1012
1013 if not self._extfileobj:
1014 self.fileobj.close()
1015 self.closed = True
1016
1017 def getmember(self, name):
1018 """Return a TarInfo object for member `name'. If `name' can not be
1019 found in the archive, KeyError is raised. If a member occurs more
1020 than once in the archive, its last occurence is assumed to be the
1021 most up-to-date version.
1022 """
1023 self._check()
1024 if name not in self.membernames and not self._loaded:
1025 self._load()
1026 if name not in self.membernames:
1027 raise KeyError, "filename %r not found" % name
1028 return self._getmember(name)
1029
1030 def getmembers(self):
1031 """Return the members of the archive as a list of TarInfo objects. The
1032 list has the same order as the members in the archive.
1033 """
1034 self._check()
1035 if not self._loaded: # if we want to obtain a list of
1036 self._load() # all members, we first have to
1037 # scan the whole archive.
1038 return self.members
1039
1040 def getnames(self):
1041 """Return the members of the archive as a list of their names. It has
1042 the same order as the list returned by getmembers().
1043 """
1044 self._check()
1045 if not self._loaded:
1046 self._load()
1047 return self.membernames
1048
1049 def gettarinfo(self, name=None, arcname=None, fileobj=None):
1050 """Create a TarInfo object for either the file `name' or the file
1051 object `fileobj' (using os.fstat on its file descriptor). You can
1052 modify some of the TarInfo's attributes before you add it using
1053 addfile(). If given, `arcname' specifies an alternative name for the
1054 file in the archive.
1055 """
1056 self._check("aw")
1057
1058 # When fileobj is given, replace name by
1059 # fileobj's real name.
1060 if fileobj is not None:
1061 name = fileobj.name
1062
1063 # Building the name of the member in the archive.
1064 # Backward slashes are converted to forward slashes,
1065 # Absolute paths are turned to relative paths.
1066 if arcname is None:
1067 arcname = name
1068 arcname = normpath(arcname)
1069 drv, arcname = os.path.splitdrive(arcname)
1070 while arcname[0:1] == "/":
1071 arcname = arcname[1:]
1072
1073 # Now, fill the TarInfo object with
1074 # information specific for the file.
1075 tarinfo = TarInfo()
1076
1077 # Use os.stat or os.lstat, depending on platform
1078 # and if symlinks shall be resolved.
1079 if fileobj is None:
1080 if hasattr(os, "lstat") and not self.dereference:
1081 statres = os.lstat(name)
1082 else:
1083 statres = os.stat(name)
1084 else:
1085 statres = os.fstat(fileobj.fileno())
1086 linkname = ""
1087
1088 stmd = statres.st_mode
1089 if stat.S_ISREG(stmd):
1090 inode = (statres.st_ino, statres.st_dev)
1091 if inode in self.inodes and not self.dereference:
1092 # Is it a hardlink to an already
1093 # archived file?
1094 type = LNKTYPE
1095 linkname = self.inodes[inode]
1096 else:
1097 # The inode is added only if its valid.
1098 # For win32 it is always 0.
1099 type = REGTYPE
1100 if inode[0]:
1101 self.inodes[inode] = arcname
1102 elif stat.S_ISDIR(stmd):
1103 type = DIRTYPE
1104 if arcname[-1:] != "/":
1105 arcname += "/"
1106 elif stat.S_ISFIFO(stmd):
1107 type = FIFOTYPE
1108 elif stat.S_ISLNK(stmd):
1109 type = SYMTYPE
1110 linkname = os.readlink(name)
1111 elif stat.S_ISCHR(stmd):
1112 type = CHRTYPE
1113 elif stat.S_ISBLK(stmd):
1114 type = BLKTYPE
1115 else:
1116 return None
1117
1118 # Fill the TarInfo object with all
1119 # information we can get.
1120 tarinfo.name = arcname
1121 tarinfo.mode = stmd
1122 tarinfo.uid = statres.st_uid
1123 tarinfo.gid = statres.st_gid
1124 tarinfo.size = statres.st_size
1125 tarinfo.mtime = statres.st_mtime
1126 tarinfo.type = type
1127 tarinfo.linkname = linkname
1128 if pwd:
1129 try:
1130 tarinfo.uname = pwd.getpwuid(tarinfo.uid)[0]
1131 except KeyError:
1132 pass
1133 if grp:
1134 try:
1135 tarinfo.gname = grp.getgrgid(tarinfo.gid)[0]
1136 except KeyError:
1137 pass
1138
1139 if type in (CHRTYPE, BLKTYPE):
1140 if hasattr(os, "major") and hasattr(os, "minor"):
1141 tarinfo.devmajor = os.major(statres.st_rdev)
1142 tarinfo.devminor = os.minor(statres.st_rdev)
1143 return tarinfo
1144
1145 def list(self, verbose=True):
1146 """Print a table of contents to sys.stdout. If `verbose' is False, only
1147 the names of the members are printed. If it is True, an `ls -l'-like
1148 output is produced.
1149 """
1150 self._check()
1151
1152 for tarinfo in self:
1153 if verbose:
1154 print filemode(tarinfo.mode),
1155 print "%s/%s" % (tarinfo.uname or tarinfo.uid,
1156 tarinfo.gname or tarinfo.gid),
1157 if tarinfo.ischr() or tarinfo.isblk():
1158 print "%10s" % ("%d,%d" \
1159 % (tarinfo.devmajor, tarinfo.devminor)),
1160 else:
1161 print "%10d" % tarinfo.size,
1162 print "%d-%02d-%02d %02d:%02d:%02d" \
1163 % time.localtime(tarinfo.mtime)[:6],
1164
1165 print tarinfo.name,
1166
1167 if verbose:
1168 if tarinfo.issym():
1169 print "->", tarinfo.linkname,
1170 if tarinfo.islnk():
1171 print "link to", tarinfo.linkname,
1172 print
1173
1174 def add(self, name, arcname=None, recursive=True):
1175 """Add the file `name' to the archive. `name' may be any type of file
1176 (directory, fifo, symbolic link, etc.). If given, `arcname'
1177 specifies an alternative name for the file in the archive.
1178 Directories are added recursively by default. This can be avoided by
1179 setting `recursive' to False.
1180 """
1181 self._check("aw")
1182
1183 if arcname is None:
1184 arcname = name
1185
1186 # Skip if somebody tries to archive the archive...
1187 if self.name is not None \
1188 and os.path.abspath(name) == os.path.abspath(self.name):
1189 self._dbg(2, "tarfile: Skipped %r" % name)
1190 return
1191
1192 # Special case: The user wants to add the current
1193 # working directory.
1194 if name == ".":
1195 if recursive:
1196 if arcname == ".":
1197 arcname = ""
1198 for f in os.listdir("."):
1199 self.add(f, os.path.join(arcname, f))
1200 return
1201
1202 self._dbg(1, name)
1203
1204 # Create a TarInfo object from the file.
1205 tarinfo = self.gettarinfo(name, arcname)
1206
1207 if tarinfo is None:
1208 self._dbg(1, "tarfile: Unsupported type %r" % name)
1209 return
1210
1211 # Append the tar header and data to the archive.
1212 if tarinfo.isreg():
1213 f = file(name, "rb")
1214 self.addfile(tarinfo, f)
1215 f.close()
1216
1217 if tarinfo.type in (LNKTYPE, SYMTYPE, FIFOTYPE, CHRTYPE, BLKTYPE):
1218 tarinfo.size = 0L
1219 self.addfile(tarinfo)
1220
1221 if tarinfo.isdir():
1222 self.addfile(tarinfo)
1223 if recursive:
1224 for f in os.listdir(name):
1225 self.add(os.path.join(name, f), os.path.join(arcname, f))
1226
1227 def addfile(self, tarinfo, fileobj=None):
1228 """Add the TarInfo object `tarinfo' to the archive. If `fileobj' is
1229 given, tarinfo.size bytes are read from it and added to the archive.
1230 You can create TarInfo objects using gettarinfo().
1231 On Windows platforms, `fileobj' should always be opened with mode
1232 'rb' to avoid irritation about the file size.
1233 """
1234 self._check("aw")
1235
1236 tarinfo.name = normpath(tarinfo.name)
1237 if tarinfo.isdir():
1238 # directories should end with '/'
1239 tarinfo.name += "/"
1240
1241 if tarinfo.linkname:
1242 tarinfo.linkname = normpath(tarinfo.linkname)
1243
1244 if tarinfo.size > MAXSIZE_MEMBER:
1245 raise ValueError, "file is too large (>8GB)"
1246
1247 if len(tarinfo.linkname) > LENGTH_LINK:
1248 if self.posix:
1249 raise ValueError, "linkname is too long (>%d)" \
1250 % (LENGTH_LINK)
1251 else:
1252 self._create_gnulong(tarinfo.linkname, GNUTYPE_LONGLINK)
1253 tarinfo.linkname = tarinfo.linkname[:LENGTH_LINK -1]
1254 self._dbg(2, "tarfile: Created GNU tar extension LONGLINK")
1255
1256 if len(tarinfo.name) > LENGTH_NAME:
1257 if self.posix:
1258 prefix = tarinfo.name[:LENGTH_PREFIX + 1]
1259 while prefix and prefix[-1] != "/":
Tim Peters2c60f7a2003-01-29 03:49:43 +00001260 prefix = prefix[:-1]
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001261
1262 name = tarinfo.name[len(prefix):]
1263 prefix = prefix[:-1]
1264
1265 if not prefix or len(name) > LENGTH_NAME:
1266 raise ValueError, "name is too long (>%d)" \
1267 % (LENGTH_NAME)
1268
1269 tarinfo.name = name
1270 tarinfo.prefix = prefix
1271 else:
1272 self._create_gnulong(tarinfo.name, GNUTYPE_LONGNAME)
1273 tarinfo.name = tarinfo.name[:LENGTH_NAME - 1]
1274 self._dbg(2, "tarfile: Created GNU tar extension LONGNAME")
1275
1276 self.fileobj.write(tarinfo.tobuf())
1277 self.offset += BLOCKSIZE
1278
1279 # If there's data to follow, append it.
1280 if fileobj is not None:
1281 copyfileobj(fileobj, self.fileobj, tarinfo.size)
1282 blocks, remainder = divmod(tarinfo.size, BLOCKSIZE)
1283 if remainder > 0:
1284 self.fileobj.write(NUL * (BLOCKSIZE - remainder))
1285 blocks += 1
1286 self.offset += blocks * BLOCKSIZE
1287
1288 self.members.append(tarinfo)
1289 self.membernames.append(tarinfo.name)
1290 self.chunks.append(self.offset)
1291
1292 def extract(self, member, path=""):
1293 """Extract a member from the archive to the current working directory,
1294 using its full name. Its file information is extracted as accurately
1295 as possible. `member' may be a filename or a TarInfo object. You can
1296 specify a different directory using `path'.
1297 """
1298 self._check("r")
1299
1300 if isinstance(member, TarInfo):
1301 tarinfo = member
1302 else:
1303 tarinfo = self.getmember(member)
1304
1305 try:
1306 self._extract_member(tarinfo, os.path.join(path, tarinfo.name))
1307 except EnvironmentError, e:
1308 if self.errorlevel > 0:
1309 raise
1310 else:
1311 if e.filename is None:
1312 self._dbg(1, "tarfile: %s" % e.strerror)
1313 else:
1314 self._dbg(1, "tarfile: %s %r" % (e.strerror, e.filename))
1315 except ExtractError, e:
1316 if self.errorlevel > 1:
1317 raise
1318 else:
1319 self._dbg(1, "tarfile: %s" % e)
1320
1321 def extractfile(self, member):
1322 """Extract a member from the archive as a file object. `member' may be
1323 a filename or a TarInfo object. If `member' is a regular file, a
1324 file-like object is returned. If `member' is a link, a file-like
1325 object is constructed from the link's target. If `member' is none of
1326 the above, None is returned.
1327 The file-like object is read-only and provides the following
1328 methods: read(), readline(), readlines(), seek() and tell()
1329 """
1330 self._check("r")
1331
1332 if isinstance(member, TarInfo):
1333 tarinfo = member
1334 else:
1335 tarinfo = self.getmember(member)
1336
1337 if tarinfo.isreg():
1338 return self.fileobject(self, tarinfo)
1339
1340 elif tarinfo.type not in SUPPORTED_TYPES:
1341 # If a member's type is unknown, it is treated as a
1342 # regular file.
1343 return self.fileobject(self, tarinfo)
1344
1345 elif tarinfo.islnk() or tarinfo.issym():
1346 if isinstance(self.fileobj, _Stream):
1347 # A small but ugly workaround for the case that someone tries
1348 # to extract a (sym)link as a file-object from a non-seekable
1349 # stream of tar blocks.
1350 raise StreamError, "cannot extract (sym)link as file object"
1351 else:
1352 # A (sym)link's file object is it's target's file object.
1353 return self.extractfile(self._getmember(tarinfo.linkname,
1354 tarinfo))
1355 else:
1356 # If there's no data associated with the member (directory, chrdev,
1357 # blkdev, etc.), return None instead of a file object.
1358 return None
1359
1360 def _extract_member(self, tarinfo, targetpath):
1361 """Extract the TarInfo object tarinfo to a physical
1362 file called targetpath.
1363 """
1364 # Fetch the TarInfo object for the given name
1365 # and build the destination pathname, replacing
1366 # forward slashes to platform specific separators.
1367 if targetpath[-1:] == "/":
1368 targetpath = targetpath[:-1]
1369 targetpath = os.path.normpath(targetpath)
1370
1371 # Create all upper directories.
1372 upperdirs = os.path.dirname(targetpath)
1373 if upperdirs and not os.path.exists(upperdirs):
1374 ti = TarInfo()
1375 ti.name = upperdirs
1376 ti.type = DIRTYPE
1377 ti.mode = 0777
1378 ti.mtime = tarinfo.mtime
1379 ti.uid = tarinfo.uid
1380 ti.gid = tarinfo.gid
1381 ti.uname = tarinfo.uname
1382 ti.gname = tarinfo.gname
1383 try:
1384 self._extract_member(ti, ti.name)
1385 except:
1386 pass
1387
1388 if tarinfo.islnk() or tarinfo.issym():
1389 self._dbg(1, "%s -> %s" % (tarinfo.name, tarinfo.linkname))
1390 else:
1391 self._dbg(1, tarinfo.name)
1392
1393 if tarinfo.isreg():
1394 self.makefile(tarinfo, targetpath)
1395 elif tarinfo.isdir():
1396 self.makedir(tarinfo, targetpath)
1397 elif tarinfo.isfifo():
1398 self.makefifo(tarinfo, targetpath)
1399 elif tarinfo.ischr() or tarinfo.isblk():
1400 self.makedev(tarinfo, targetpath)
1401 elif tarinfo.islnk() or tarinfo.issym():
1402 self.makelink(tarinfo, targetpath)
1403 elif tarinfo.type not in SUPPORTED_TYPES:
1404 self.makeunknown(tarinfo, targetpath)
1405 else:
1406 self.makefile(tarinfo, targetpath)
1407
1408 self.chown(tarinfo, targetpath)
1409 if not tarinfo.issym():
1410 self.chmod(tarinfo, targetpath)
1411 self.utime(tarinfo, targetpath)
1412
1413 #--------------------------------------------------------------------------
1414 # Below are the different file methods. They are called via
1415 # _extract_member() when extract() is called. They can be replaced in a
1416 # subclass to implement other functionality.
1417
1418 def makedir(self, tarinfo, targetpath):
1419 """Make a directory called targetpath.
1420 """
1421 try:
1422 os.mkdir(targetpath)
1423 except EnvironmentError, e:
1424 if e.errno != errno.EEXIST:
1425 raise
1426
1427 def makefile(self, tarinfo, targetpath):
1428 """Make a file called targetpath.
1429 """
1430 source = self.extractfile(tarinfo)
1431 target = file(targetpath, "wb")
1432 copyfileobj(source, target)
1433 source.close()
1434 target.close()
1435
1436 def makeunknown(self, tarinfo, targetpath):
1437 """Make a file from a TarInfo object with an unknown type
1438 at targetpath.
1439 """
1440 self.makefile(tarinfo, targetpath)
1441 self._dbg(1, "tarfile: Unknown file type %r, " \
1442 "extracted as regular file." % tarinfo.type)
1443
1444 def makefifo(self, tarinfo, targetpath):
1445 """Make a fifo called targetpath.
1446 """
1447 if hasattr(os, "mkfifo"):
1448 os.mkfifo(targetpath)
1449 else:
1450 raise ExtractError, "fifo not supported by system"
1451
1452 def makedev(self, tarinfo, targetpath):
1453 """Make a character or block device called targetpath.
1454 """
1455 if not hasattr(os, "mknod") or not hasattr(os, "makedev"):
1456 raise ExtractError, "special devices not supported by system"
1457
1458 mode = tarinfo.mode
1459 if tarinfo.isblk():
1460 mode |= stat.S_IFBLK
1461 else:
1462 mode |= stat.S_IFCHR
1463
1464 os.mknod(targetpath, mode,
1465 os.makedev(tarinfo.devmajor, tarinfo.devminor))
1466
1467 def makelink(self, tarinfo, targetpath):
1468 """Make a (symbolic) link called targetpath. If it cannot be created
1469 (platform limitation), we try to make a copy of the referenced file
1470 instead of a link.
1471 """
1472 linkpath = tarinfo.linkname
1473 try:
1474 if tarinfo.issym():
1475 os.symlink(linkpath, targetpath)
1476 else:
1477 os.link(linkpath, targetpath)
1478 except AttributeError:
1479 if tarinfo.issym():
1480 linkpath = os.path.join(os.path.dirname(tarinfo.name),
1481 linkpath)
1482 linkpath = normpath(linkpath)
1483
1484 try:
1485 self._extract_member(self.getmember(linkpath), targetpath)
1486 except (EnvironmentError, KeyError), e:
1487 linkpath = os.path.normpath(linkpath)
1488 try:
1489 shutil.copy2(linkpath, targetpath)
1490 except EnvironmentError, e:
1491 raise IOError, "link could not be created"
1492
1493 def chown(self, tarinfo, targetpath):
1494 """Set owner of targetpath according to tarinfo.
1495 """
1496 if pwd and hasattr(os, "geteuid") and os.geteuid() == 0:
1497 # We have to be root to do so.
1498 try:
1499 g = grp.getgrnam(tarinfo.gname)[2]
1500 except KeyError:
1501 try:
1502 g = grp.getgrgid(tarinfo.gid)[2]
1503 except KeyError:
1504 g = os.getgid()
1505 try:
1506 u = pwd.getpwnam(tarinfo.uname)[2]
1507 except KeyError:
1508 try:
1509 u = pwd.getpwuid(tarinfo.uid)[2]
1510 except KeyError:
1511 u = os.getuid()
1512 try:
1513 if tarinfo.issym() and hasattr(os, "lchown"):
1514 os.lchown(targetpath, u, g)
1515 else:
Andrew MacIntyre7970d202003-02-19 12:51:34 +00001516 if sys.platform != "os2emx":
1517 os.chown(targetpath, u, g)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001518 except EnvironmentError, e:
1519 raise ExtractError, "could not change owner"
1520
1521 def chmod(self, tarinfo, targetpath):
1522 """Set file permissions of targetpath according to tarinfo.
1523 """
Jack Jansen834eff62003-03-07 12:47:06 +00001524 if hasattr(os, 'chmod'):
1525 try:
1526 os.chmod(targetpath, tarinfo.mode)
1527 except EnvironmentError, e:
1528 raise ExtractError, "could not change mode"
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001529
1530 def utime(self, tarinfo, targetpath):
1531 """Set modification time of targetpath according to tarinfo.
1532 """
Jack Jansen834eff62003-03-07 12:47:06 +00001533 if not hasattr(os, 'utime'):
Tim Petersf9347782003-03-07 15:36:41 +00001534 return
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001535 if sys.platform == "win32" and tarinfo.isdir():
1536 # According to msdn.microsoft.com, it is an error (EACCES)
1537 # to use utime() on directories.
1538 return
1539 try:
1540 os.utime(targetpath, (tarinfo.mtime, tarinfo.mtime))
1541 except EnvironmentError, e:
1542 raise ExtractError, "could not change modification time"
1543
1544 #--------------------------------------------------------------------------
1545
1546 def next(self):
1547 """Return the next member of the archive as a TarInfo object, when
1548 TarFile is opened for reading. Return None if there is no more
1549 available.
1550 """
1551 self._check("ra")
1552 if self.firstmember is not None:
1553 m = self.firstmember
1554 self.firstmember = None
1555 return m
1556
1557 # Read the next block.
1558 self.fileobj.seek(self.chunks[-1])
1559 while True:
1560 buf = self.fileobj.read(BLOCKSIZE)
1561 if not buf:
1562 return None
1563 try:
1564 tarinfo = TarInfo.frombuf(buf)
1565 except ValueError:
1566 if self.ignore_zeros:
1567 if buf.count(NUL) == BLOCKSIZE:
1568 adj = "empty"
1569 else:
1570 adj = "invalid"
1571 self._dbg(2, "0x%X: %s block" % (self.offset, adj))
1572 self.offset += BLOCKSIZE
1573 continue
1574 else:
1575 # Block is empty or unreadable.
1576 if self.chunks[-1] == 0:
1577 # If the first block is invalid. That does not
1578 # look like a tar archive we can handle.
1579 raise ReadError,"empty, unreadable or compressed file"
1580 return None
1581 break
1582
1583 # We shouldn't rely on this checksum, because some tar programs
1584 # calculate it differently and it is merely validating the
1585 # header block. We could just as well skip this part, which would
1586 # have a slight effect on performance...
1587 if tarinfo.chksum != calc_chksum(buf):
1588 self._dbg(1, "tarfile: Bad Checksum %r" % tarinfo.name)
1589
1590 # Set the TarInfo object's offset to the current position of the
1591 # TarFile and set self.offset to the position where the data blocks
1592 # should begin.
1593 tarinfo.offset = self.offset
1594 self.offset += BLOCKSIZE
1595
1596 # Check if the TarInfo object has a typeflag for which a callback
1597 # method is registered in the TYPE_METH. If so, then call it.
1598 if tarinfo.type in self.TYPE_METH:
1599 tarinfo = self.TYPE_METH[tarinfo.type](self, tarinfo)
1600 else:
1601 tarinfo.offset_data = self.offset
1602 if tarinfo.isreg() or tarinfo.type not in SUPPORTED_TYPES:
1603 # Skip the following data blocks.
1604 self.offset += self._block(tarinfo.size)
1605
1606 if tarinfo.isreg() and tarinfo.name[:-1] == "/":
1607 # some old tar programs don't know DIRTYPE
1608 tarinfo.type = DIRTYPE
1609
1610 self.members.append(tarinfo)
1611 self.membernames.append(tarinfo.name)
1612 self.chunks.append(self.offset)
1613 return tarinfo
1614
1615 #--------------------------------------------------------------------------
1616 # Below are some methods which are called for special typeflags in the
1617 # next() method, e.g. for unwrapping GNU longname/longlink blocks. They
1618 # are registered in TYPE_METH below. You can register your own methods
1619 # with this mapping.
1620 # A registered method is called with a TarInfo object as only argument.
1621 #
1622 # During its execution the method MUST perform the following tasks:
1623 # 1. set tarinfo.offset_data to the position where the data blocks begin,
1624 # if there is data to follow.
1625 # 2. set self.offset to the position where the next member's header will
1626 # begin.
1627 # 3. return a valid TarInfo object.
1628
1629 def proc_gnulong(self, tarinfo):
1630 """Evaluate the blocks that hold a GNU longname
1631 or longlink member.
1632 """
1633 buf = ""
1634 name = None
1635 linkname = None
1636 count = tarinfo.size
1637 while count > 0:
1638 block = self.fileobj.read(BLOCKSIZE)
1639 buf += block
1640 self.offset += BLOCKSIZE
1641 count -= BLOCKSIZE
1642
1643 if tarinfo.type == GNUTYPE_LONGNAME:
1644 name = nts(buf)
1645 if tarinfo.type == GNUTYPE_LONGLINK:
1646 linkname = nts(buf)
1647
1648 buf = self.fileobj.read(BLOCKSIZE)
1649
1650 tarinfo = TarInfo.frombuf(buf)
1651 tarinfo.offset = self.offset
1652 self.offset += BLOCKSIZE
1653 tarinfo.offset_data = self.offset
1654 tarinfo.name = name or tarinfo.name
1655 tarinfo.linkname = linkname or tarinfo.linkname
1656
1657 if tarinfo.isreg() or tarinfo.type not in SUPPORTED_TYPES:
1658 # Skip the following data blocks.
1659 self.offset += self._block(tarinfo.size)
1660 return tarinfo
1661
1662 def proc_sparse(self, tarinfo):
1663 """Analyze a GNU sparse header plus extra headers.
1664 """
1665 buf = tarinfo.tobuf()
1666 sp = _ringbuffer()
1667 pos = 386
1668 lastpos = 0L
1669 realpos = 0L
1670 # There are 4 possible sparse structs in the
1671 # first header.
1672 for i in xrange(4):
1673 try:
1674 offset = int(buf[pos:pos + 12], 8)
1675 numbytes = int(buf[pos + 12:pos + 24], 8)
1676 except ValueError:
1677 break
1678 if offset > lastpos:
1679 sp.append(_hole(lastpos, offset - lastpos))
1680 sp.append(_data(offset, numbytes, realpos))
1681 realpos += numbytes
1682 lastpos = offset + numbytes
1683 pos += 24
1684
1685 isextended = ord(buf[482])
1686 origsize = int(buf[483:495], 8)
1687
1688 # If the isextended flag is given,
1689 # there are extra headers to process.
1690 while isextended == 1:
1691 buf = self.fileobj.read(BLOCKSIZE)
1692 self.offset += BLOCKSIZE
1693 pos = 0
1694 for i in xrange(21):
1695 try:
1696 offset = int(buf[pos:pos + 12], 8)
1697 numbytes = int(buf[pos + 12:pos + 24], 8)
1698 except ValueError:
1699 break
1700 if offset > lastpos:
1701 sp.append(_hole(lastpos, offset - lastpos))
1702 sp.append(_data(offset, numbytes, realpos))
1703 realpos += numbytes
1704 lastpos = offset + numbytes
1705 pos += 24
1706 isextended = ord(buf[504])
1707
1708 if lastpos < origsize:
1709 sp.append(_hole(lastpos, origsize - lastpos))
1710
1711 tarinfo.sparse = sp
1712
1713 tarinfo.offset_data = self.offset
1714 self.offset += self._block(tarinfo.size)
1715 tarinfo.size = origsize
1716 return tarinfo
1717
1718 # The type mapping for the next() method. The keys are single character
1719 # strings, the typeflag. The values are methods which are called when
1720 # next() encounters such a typeflag.
1721 TYPE_METH = {
1722 GNUTYPE_LONGNAME: proc_gnulong,
1723 GNUTYPE_LONGLINK: proc_gnulong,
1724 GNUTYPE_SPARSE: proc_sparse
1725 }
1726
1727 #--------------------------------------------------------------------------
1728 # Little helper methods:
1729
1730 def _block(self, count):
1731 """Round up a byte count by BLOCKSIZE and return it,
1732 e.g. _block(834) => 1024.
1733 """
1734 blocks, remainder = divmod(count, BLOCKSIZE)
1735 if remainder:
1736 blocks += 1
1737 return blocks * BLOCKSIZE
1738
1739 def _getmember(self, name, tarinfo=None):
1740 """Find an archive member by name from bottom to top.
1741 If tarinfo is given, it is used as the starting point.
1742 """
1743 if tarinfo is None:
1744 end = len(self.members)
1745 else:
1746 end = self.members.index(tarinfo)
1747
1748 for i in xrange(end - 1, -1, -1):
1749 if name == self.membernames[i]:
1750 return self.members[i]
1751
1752 def _load(self):
1753 """Read through the entire archive file and look for readable
1754 members.
1755 """
1756 while True:
1757 tarinfo = self.next()
1758 if tarinfo is None:
1759 break
1760 self._loaded = True
1761
1762 def _check(self, mode=None):
1763 """Check if TarFile is still open, and if the operation's mode
1764 corresponds to TarFile's mode.
1765 """
1766 if self.closed:
1767 raise IOError, "%s is closed" % self.__class__.__name__
1768 if mode is not None and self._mode not in mode:
1769 raise IOError, "bad operation for mode %r" % self._mode
1770
1771 def __iter__(self):
1772 """Provide an iterator object.
1773 """
1774 if self._loaded:
1775 return iter(self.members)
1776 else:
1777 return TarIter(self)
1778
1779 def _create_gnulong(self, name, type):
1780 """Write a GNU longname/longlink member to the TarFile.
1781 It consists of an extended tar header, with the length
1782 of the longname as size, followed by data blocks,
1783 which contain the longname as a null terminated string.
1784 """
1785 tarinfo = TarInfo()
1786 tarinfo.name = "././@LongLink"
1787 tarinfo.type = type
1788 tarinfo.mode = 0
1789 tarinfo.size = len(name)
1790
1791 # write extended header
1792 self.fileobj.write(tarinfo.tobuf())
1793 # write name blocks
1794 self.fileobj.write(name)
1795 blocks, remainder = divmod(tarinfo.size, BLOCKSIZE)
1796 if remainder > 0:
1797 self.fileobj.write(NUL * (BLOCKSIZE - remainder))
1798 blocks += 1
1799 self.offset += blocks * BLOCKSIZE
1800
1801 def _dbg(self, level, msg):
1802 """Write debugging output to sys.stderr.
1803 """
1804 if level <= self.debug:
1805 print >> sys.stderr, msg
1806# class TarFile
1807
1808class TarIter:
1809 """Iterator Class.
1810
1811 for tarinfo in TarFile(...):
1812 suite...
1813 """
1814
1815 def __init__(self, tarfile):
1816 """Construct a TarIter object.
1817 """
1818 self.tarfile = tarfile
1819 def __iter__(self):
1820 """Return iterator object.
1821 """
1822 return self
1823 def next(self):
1824 """Return the next item using TarFile's next() method.
1825 When all members have been read, set TarFile as _loaded.
1826 """
1827 tarinfo = self.tarfile.next()
1828 if not tarinfo:
1829 self.tarfile._loaded = True
1830 raise StopIteration
1831 return tarinfo
1832
1833# Helper classes for sparse file support
1834class _section:
1835 """Base class for _data and _hole.
1836 """
1837 def __init__(self, offset, size):
1838 self.offset = offset
1839 self.size = size
1840 def __contains__(self, offset):
1841 return self.offset <= offset < self.offset + self.size
1842
1843class _data(_section):
1844 """Represent a data section in a sparse file.
1845 """
1846 def __init__(self, offset, size, realpos):
1847 _section.__init__(self, offset, size)
1848 self.realpos = realpos
1849
1850class _hole(_section):
1851 """Represent a hole section in a sparse file.
1852 """
1853 pass
1854
1855class _ringbuffer(list):
1856 """Ringbuffer class which increases performance
1857 over a regular list.
1858 """
1859 def __init__(self):
1860 self.idx = 0
1861 def find(self, offset):
1862 idx = self.idx
1863 while True:
1864 item = self[idx]
1865 if offset in item:
1866 break
1867 idx += 1
1868 if idx == len(self):
1869 idx = 0
1870 if idx == self.idx:
1871 # End of File
1872 return None
1873 self.idx = idx
1874 return item
1875
1876#---------------------------------------------
1877# zipfile compatible TarFile class
1878#---------------------------------------------
1879TAR_PLAIN = 0 # zipfile.ZIP_STORED
1880TAR_GZIPPED = 8 # zipfile.ZIP_DEFLATED
1881class TarFileCompat:
1882 """TarFile class compatible with standard module zipfile's
1883 ZipFile class.
1884 """
1885 def __init__(self, file, mode="r", compression=TAR_PLAIN):
1886 if compression == TAR_PLAIN:
1887 self.tarfile = TarFile.taropen(file, mode)
1888 elif compression == TAR_GZIPPED:
1889 self.tarfile = TarFile.gzopen(file, mode)
1890 else:
1891 raise ValueError, "unknown compression constant"
1892 if mode[0:1] == "r":
1893 members = self.tarfile.getmembers()
1894 for i in xrange(len(members)):
1895 m = members[i]
1896 m.filename = m.name
1897 m.file_size = m.size
1898 m.date_time = time.gmtime(m.mtime)[:6]
1899 def namelist(self):
1900 return map(lambda m: m.name, self.infolist())
1901 def infolist(self):
1902 return filter(lambda m: m.type in REGULAR_TYPES,
1903 self.tarfile.getmembers())
1904 def printdir(self):
1905 self.tarfile.list()
1906 def testzip(self):
1907 return
1908 def getinfo(self, name):
1909 return self.tarfile.getmember(name)
1910 def read(self, name):
1911 return self.tarfile.extractfile(self.tarfile.getmember(name)).read()
1912 def write(self, filename, arcname=None, compress_type=None):
1913 self.tarfile.add(filename, arcname)
1914 def writestr(self, zinfo, bytes):
1915 import StringIO
1916 import calendar
1917 zinfo.name = zinfo.filename
1918 zinfo.size = zinfo.file_size
1919 zinfo.mtime = calendar.timegm(zinfo.date_time)
1920 self.tarfile.addfile(zinfo, StringIO.StringIO(bytes))
1921 def close(self):
1922 self.tarfile.close()
1923#class TarFileCompat
1924
1925#--------------------
1926# exported functions
1927#--------------------
1928def is_tarfile(name):
1929 """Return True if name points to a tar archive that we
1930 are able to handle, else return False.
1931 """
1932 try:
1933 t = open(name)
1934 t.close()
1935 return True
1936 except TarError:
1937 return False
1938
1939open = TarFile.open