blob: 9dd8601cb63e445b58cae9001af8a44b2fa40367 [file] [log] [blame]
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001#!/usr/bin/env python
2# -*- coding: iso-8859-1 -*-
3#-------------------------------------------------------------------
4# tarfile.py
5#-------------------------------------------------------------------
6# Copyright (C) 2002 Lars Gustäbel <lars@gustaebel.de>
7# All rights reserved.
8#
9# Permission is hereby granted, free of charge, to any person
10# obtaining a copy of this software and associated documentation
11# files (the "Software"), to deal in the Software without
12# restriction, including without limitation the rights to use,
13# copy, modify, merge, publish, distribute, sublicense, and/or sell
14# copies of the Software, and to permit persons to whom the
15# Software is furnished to do so, subject to the following
16# conditions:
17#
18# The above copyright notice and this permission notice shall be
19# included in all copies or substantial portions of the Software.
20#
21# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
22# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
23# OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
24# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
25# HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
26# WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
27# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
28# OTHER DEALINGS IN THE SOFTWARE.
29#
30"""Read from and write to tar format archives.
31"""
32
33__version__ = "$Revision$"
34# $Source$
35
36version = "0.6.4"
37__author__ = "Lars Gustäbel (lars@gustaebel.de)"
38__date__ = "$Date$"
39__cvsid__ = "$Id$"
40__credits__ = "Gustavo Niemeyer, Niels Gustäbel, Richard Townsend."
41
42#---------
43# Imports
44#---------
45import sys
46import os
47import shutil
48import stat
49import errno
50import time
51import struct
52
Jack Jansencfc49022003-03-07 13:37:32 +000053if sys.platform == 'mac':
54 # This module needs work for MacOS9, especially in the area of pathname
55 # handling. In many places it is assumed a simple substitution of / by the
56 # local os.path.sep is good enough to convert pathnames, but this does not
57 # work with the mac rooted:path:name versus :nonrooted:path:name syntax
58 raise ImportError, "tarfile does not work for platform==mac"
59
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +000060try:
61 import grp, pwd
62except ImportError:
63 grp = pwd = None
64
65# from tarfile import *
66__all__ = ["TarFile", "TarInfo", "is_tarfile", "TarError"]
67
68#---------------------------------------------------------
69# tar constants
70#---------------------------------------------------------
71NUL = "\0" # the null character
72BLOCKSIZE = 512 # length of processing blocks
73RECORDSIZE = BLOCKSIZE * 20 # length of records
74MAGIC = "ustar" # magic tar string
75VERSION = "00" # version number
76
77LENGTH_NAME = 100 # maximum length of a filename
78LENGTH_LINK = 100 # maximum length of a linkname
79LENGTH_PREFIX = 155 # maximum length of the prefix field
80MAXSIZE_MEMBER = 077777777777L # maximum size of a file (11 octal digits)
81
82REGTYPE = "0" # regular file
83AREGTYPE = "\0" # regular file
84LNKTYPE = "1" # link (inside tarfile)
85SYMTYPE = "2" # symbolic link
86CHRTYPE = "3" # character special device
87BLKTYPE = "4" # block special device
88DIRTYPE = "5" # directory
89FIFOTYPE = "6" # fifo special device
90CONTTYPE = "7" # contiguous file
91
92GNUTYPE_LONGNAME = "L" # GNU tar extension for longnames
93GNUTYPE_LONGLINK = "K" # GNU tar extension for longlink
94GNUTYPE_SPARSE = "S" # GNU tar extension for sparse file
95
96#---------------------------------------------------------
97# tarfile constants
98#---------------------------------------------------------
99SUPPORTED_TYPES = (REGTYPE, AREGTYPE, LNKTYPE, # file types that tarfile
100 SYMTYPE, DIRTYPE, FIFOTYPE, # can cope with.
101 CONTTYPE, CHRTYPE, BLKTYPE,
102 GNUTYPE_LONGNAME, GNUTYPE_LONGLINK,
103 GNUTYPE_SPARSE)
104
105REGULAR_TYPES = (REGTYPE, AREGTYPE, # file types that somehow
106 CONTTYPE, GNUTYPE_SPARSE) # represent regular files
107
108#---------------------------------------------------------
109# Bits used in the mode field, values in octal.
110#---------------------------------------------------------
111S_IFLNK = 0120000 # symbolic link
112S_IFREG = 0100000 # regular file
113S_IFBLK = 0060000 # block device
114S_IFDIR = 0040000 # directory
115S_IFCHR = 0020000 # character device
116S_IFIFO = 0010000 # fifo
117
118TSUID = 04000 # set UID on execution
119TSGID = 02000 # set GID on execution
120TSVTX = 01000 # reserved
121
122TUREAD = 0400 # read by owner
123TUWRITE = 0200 # write by owner
124TUEXEC = 0100 # execute/search by owner
125TGREAD = 0040 # read by group
126TGWRITE = 0020 # write by group
127TGEXEC = 0010 # execute/search by group
128TOREAD = 0004 # read by other
129TOWRITE = 0002 # write by other
130TOEXEC = 0001 # execute/search by other
131
132#---------------------------------------------------------
133# Some useful functions
134#---------------------------------------------------------
135def nts(s):
136 """Convert a null-terminated string buffer to a python string.
137 """
138 return s.split(NUL, 1)[0]
139
140def calc_chksum(buf):
141 """Calculate the checksum for a member's header. It's a simple addition
142 of all bytes, treating the chksum field as if filled with spaces.
143 buf is a 512 byte long string buffer which holds the header.
144 """
145 chk = 256 # chksum field is treated as blanks,
146 # so the initial value is 8 * ord(" ")
147 for c in buf[:148]: chk += ord(c) # sum up all bytes before chksum
148 for c in buf[156:]: chk += ord(c) # sum up all bytes after chksum
149 return chk
150
151def copyfileobj(src, dst, length=None):
152 """Copy length bytes from fileobj src to fileobj dst.
153 If length is None, copy the entire content.
154 """
155 if length == 0:
156 return
157 if length is None:
158 shutil.copyfileobj(src, dst)
159 return
160
161 BUFSIZE = 16 * 1024
162 blocks, remainder = divmod(length, BUFSIZE)
163 for b in xrange(blocks):
164 buf = src.read(BUFSIZE)
165 if len(buf) < BUFSIZE:
166 raise IOError, "end of file reached"
167 dst.write(buf)
168
169 if remainder != 0:
170 buf = src.read(remainder)
171 if len(buf) < remainder:
172 raise IOError, "end of file reached"
173 dst.write(buf)
174 return
175
176filemode_table = (
177 (S_IFLNK, "l",
178 S_IFREG, "-",
179 S_IFBLK, "b",
180 S_IFDIR, "d",
181 S_IFCHR, "c",
182 S_IFIFO, "p"),
183 (TUREAD, "r"),
184 (TUWRITE, "w"),
185 (TUEXEC, "x", TSUID, "S", TUEXEC|TSUID, "s"),
186 (TGREAD, "r"),
187 (TGWRITE, "w"),
188 (TGEXEC, "x", TSGID, "S", TGEXEC|TSGID, "s"),
189 (TOREAD, "r"),
190 (TOWRITE, "w"),
191 (TOEXEC, "x", TSVTX, "T", TOEXEC|TSVTX, "t"))
192
193def filemode(mode):
194 """Convert a file's mode to a string of the form
195 -rwxrwxrwx.
196 Used by TarFile.list()
197 """
198 s = ""
199 for t in filemode_table:
200 while True:
201 if mode & t[0] == t[0]:
202 s += t[1]
203 elif len(t) > 2:
204 t = t[2:]
205 continue
206 else:
207 s += "-"
208 break
209 return s
210
211if os.sep != "/":
212 normpath = lambda path: os.path.normpath(path).replace(os.sep, "/")
213else:
214 normpath = os.path.normpath
215
216class TarError(Exception):
217 """Base exception."""
218 pass
219class ExtractError(TarError):
220 """General exception for extract errors."""
221 pass
222class ReadError(TarError):
223 """Exception for unreadble tar archives."""
224 pass
225class CompressionError(TarError):
226 """Exception for unavailable compression methods."""
227 pass
228class StreamError(TarError):
229 """Exception for unsupported operations on stream-like TarFiles."""
230 pass
231
232#---------------------------
233# internal stream interface
234#---------------------------
235class _LowLevelFile:
236 """Low-level file object. Supports reading and writing.
237 It is used instead of a regular file object for streaming
238 access.
239 """
240
241 def __init__(self, name, mode):
242 mode = {
243 "r": os.O_RDONLY,
244 "w": os.O_WRONLY | os.O_CREAT | os.O_TRUNC,
245 }[mode]
246 if hasattr(os, "O_BINARY"):
247 mode |= os.O_BINARY
248 self.fd = os.open(name, mode)
249
250 def close(self):
251 os.close(self.fd)
252
253 def read(self, size):
254 return os.read(self.fd, size)
255
256 def write(self, s):
257 os.write(self.fd, s)
258
259class _Stream:
260 """Class that serves as an adapter between TarFile and
261 a stream-like object. The stream-like object only
262 needs to have a read() or write() method and is accessed
263 blockwise. Use of gzip or bzip2 compression is possible.
264 A stream-like object could be for example: sys.stdin,
265 sys.stdout, a socket, a tape device etc.
266
267 _Stream is intended to be used only internally.
268 """
269
270 def __init__(self, name, mode, type, fileobj, bufsize):
271 """Construct a _Stream object.
272 """
273 self._extfileobj = True
274 if fileobj is None:
275 fileobj = _LowLevelFile(name, mode)
276 self._extfileobj = False
277
278 self.name = name or ""
279 self.mode = mode
280 self.type = type
281 self.fileobj = fileobj
282 self.bufsize = bufsize
283 self.buf = ""
284 self.pos = 0L
285 self.closed = False
286
287 if type == "gz":
288 try:
289 import zlib
290 except ImportError:
291 raise CompressionError, "zlib module is not available"
292 self.zlib = zlib
293 self.crc = zlib.crc32("")
294 if mode == "r":
295 self._init_read_gz()
296 else:
297 self._init_write_gz()
298
299 if type == "bz2":
300 try:
301 import bz2
302 except ImportError:
303 raise CompressionError, "bz2 module is not available"
304 if mode == "r":
305 self.dbuf = ""
306 self.cmp = bz2.BZ2Decompressor()
307 else:
308 self.cmp = bz2.BZ2Compressor()
309
310 def __del__(self):
311 if not self.closed:
312 self.close()
313
314 def _init_write_gz(self):
315 """Initialize for writing with gzip compression.
316 """
317 self.cmp = self.zlib.compressobj(9, self.zlib.DEFLATED,
318 -self.zlib.MAX_WBITS,
319 self.zlib.DEF_MEM_LEVEL,
320 0)
321 timestamp = struct.pack("<L", long(time.time()))
322 self.__write("\037\213\010\010%s\002\377" % timestamp)
323 if self.name.endswith(".gz"):
324 self.name = self.name[:-3]
325 self.__write(self.name + NUL)
326
327 def write(self, s):
328 """Write string s to the stream.
329 """
330 if self.type == "gz":
331 self.crc = self.zlib.crc32(s, self.crc)
332 self.pos += len(s)
333 if self.type != "tar":
334 s = self.cmp.compress(s)
335 self.__write(s)
336
337 def __write(self, s):
338 """Write string s to the stream if a whole new block
339 is ready to be written.
340 """
341 self.buf += s
342 while len(self.buf) > self.bufsize:
343 self.fileobj.write(self.buf[:self.bufsize])
344 self.buf = self.buf[self.bufsize:]
345
346 def close(self):
347 """Close the _Stream object. No operation should be
348 done on it afterwards.
349 """
350 if self.closed:
351 return
352
353 if self.mode == "w" and self.buf:
354 if self.type != "tar":
355 self.buf += self.cmp.flush()
356 self.fileobj.write(self.buf)
357 self.buf = ""
358 if self.type == "gz":
359 self.fileobj.write(struct.pack("<l", self.crc))
360 self.fileobj.write(struct.pack("<L", self.pos))
361
362 if not self._extfileobj:
363 self.fileobj.close()
364
365 self.closed = True
366
367 def _init_read_gz(self):
368 """Initialize for reading a gzip compressed fileobj.
369 """
370 self.cmp = self.zlib.decompressobj(-self.zlib.MAX_WBITS)
371 self.dbuf = ""
372
373 # taken from gzip.GzipFile with some alterations
374 if self.__read(2) != "\037\213":
375 raise ReadError, "not a gzip file"
376 if self.__read(1) != "\010":
377 raise CompressionError, "unsupported compression method"
378
379 flag = ord(self.__read(1))
380 self.__read(6)
381
382 if flag & 4:
383 xlen = ord(self.__read(1)) + 256 * ord(self.__read(1))
384 self.read(xlen)
385 if flag & 8:
386 while True:
387 s = self.__read(1)
388 if not s or s == NUL:
389 break
390 if flag & 16:
391 while True:
392 s = self.__read(1)
393 if not s or s == NUL:
394 break
395 if flag & 2:
396 self.__read(2)
397
398 def tell(self):
399 """Return the stream's file pointer position.
400 """
401 return self.pos
402
403 def seek(self, pos=0):
404 """Set the stream's file pointer to pos. Negative seeking
405 is forbidden.
406 """
407 if pos - self.pos >= 0:
408 blocks, remainder = divmod(pos - self.pos, self.bufsize)
409 for i in xrange(blocks):
410 self.read(self.bufsize)
411 self.read(remainder)
412 else:
413 raise StreamError, "seeking backwards is not allowed"
414 return self.pos
415
416 def read(self, size=None):
417 """Return the next size number of bytes from the stream.
418 If size is not defined, return all bytes of the stream
419 up to EOF.
420 """
421 if size is None:
422 t = []
423 while True:
424 buf = self._read(self.bufsize)
425 if not buf:
426 break
427 t.append(buf)
428 buf = "".join(t)
429 else:
430 buf = self._read(size)
431 self.pos += len(buf)
432 return buf
433
434 def _read(self, size):
435 """Return size bytes from the stream.
436 """
437 if self.type == "tar":
438 return self.__read(size)
439
440 c = len(self.dbuf)
441 t = [self.dbuf]
442 while c < size:
443 buf = self.__read(self.bufsize)
444 if not buf:
445 break
446 buf = self.cmp.decompress(buf)
447 t.append(buf)
448 c += len(buf)
449 t = "".join(t)
450 self.dbuf = t[size:]
451 return t[:size]
452
453 def __read(self, size):
454 """Return size bytes from stream. If internal buffer is empty,
455 read another block from the stream.
456 """
457 c = len(self.buf)
458 t = [self.buf]
459 while c < size:
460 buf = self.fileobj.read(self.bufsize)
461 if not buf:
462 break
463 t.append(buf)
464 c += len(buf)
465 t = "".join(t)
466 self.buf = t[size:]
467 return t[:size]
468# class _Stream
469
470#------------------------
471# Extraction file object
472#------------------------
473class ExFileObject(object):
474 """File-like object for reading an archive member.
475 Is returned by TarFile.extractfile(). Support for
476 sparse files included.
477 """
478
479 def __init__(self, tarfile, tarinfo):
480 self.fileobj = tarfile.fileobj
481 self.name = tarinfo.name
482 self.mode = "r"
483 self.closed = False
484 self.offset = tarinfo.offset_data
485 self.size = tarinfo.size
486 self.pos = 0L
487 self.linebuffer = ""
488 if tarinfo.issparse():
489 self.sparse = tarinfo.sparse
490 self.read = self._readsparse
491 else:
492 self.read = self._readnormal
493
494 def __read(self, size):
495 """Overloadable read method.
496 """
497 return self.fileobj.read(size)
498
499 def readline(self, size=-1):
500 """Read a line with approx. size. If size is negative,
501 read a whole line. readline() and read() must not
502 be mixed up (!).
503 """
504 if size < 0:
505 size = sys.maxint
506
507 nl = self.linebuffer.find("\n")
508 if nl >= 0:
509 nl = min(nl, size)
510 else:
511 size -= len(self.linebuffer)
512 while nl < 0:
513 buf = self.read(min(size, 100))
514 if not buf:
515 break
516 self.linebuffer += buf
517 size -= len(buf)
518 if size <= 0:
519 break
520 nl = self.linebuffer.find("\n")
521 if nl == -1:
522 s = self.linebuffer
523 self.linebuffer = ""
524 return s
525 buf = self.linebuffer[:nl]
526 self.linebuffer = self.linebuffer[nl + 1:]
527 while buf[-1:] == "\r":
528 buf = buf[:-1]
529 return buf + "\n"
530
531 def readlines(self):
532 """Return a list with all (following) lines.
533 """
534 result = []
535 while True:
536 line = self.readline()
537 if not line: break
538 result.append(line)
539 return result
540
541 def _readnormal(self, size=None):
542 """Read operation for regular files.
543 """
544 if self.closed:
545 raise ValueError, "file is closed"
546 self.fileobj.seek(self.offset + self.pos)
547 bytesleft = self.size - self.pos
548 if size is None:
549 bytestoread = bytesleft
550 else:
551 bytestoread = min(size, bytesleft)
552 self.pos += bytestoread
553 return self.__read(bytestoread)
554
555 def _readsparse(self, size=None):
556 """Read operation for sparse files.
557 """
558 if self.closed:
559 raise ValueError, "file is closed"
560
561 if size is None:
562 size = self.size - self.pos
563
564 data = []
565 while size > 0:
566 buf = self._readsparsesection(size)
567 if not buf:
568 break
569 size -= len(buf)
570 data.append(buf)
571 return "".join(data)
572
573 def _readsparsesection(self, size):
574 """Read a single section of a sparse file.
575 """
576 section = self.sparse.find(self.pos)
577
578 if section is None:
579 return ""
580
581 toread = min(size, section.offset + section.size - self.pos)
582 if isinstance(section, _data):
583 realpos = section.realpos + self.pos - section.offset
584 self.pos += toread
585 self.fileobj.seek(self.offset + realpos)
586 return self.__read(toread)
587 else:
588 self.pos += toread
589 return NUL * toread
590
591 def tell(self):
592 """Return the current file position.
593 """
594 return self.pos
595
596 def seek(self, pos, whence=0):
597 """Seek to a position in the file.
598 """
599 self.linebuffer = ""
600 if whence == 0:
601 self.pos = min(max(pos, 0), self.size)
602 if whence == 1:
603 if pos < 0:
604 self.pos = max(self.pos + pos, 0)
605 else:
606 self.pos = min(self.pos + pos, self.size)
607 if whence == 2:
608 self.pos = max(min(self.size + pos, self.size), 0)
609
610 def close(self):
611 """Close the file object.
612 """
613 self.closed = True
614#class ExFileObject
615
616#------------------
617# Exported Classes
618#------------------
619class TarInfo(object):
620 """Informational class which holds the details about an
621 archive member given by a tar header block.
622 TarInfo objects are returned by TarFile.getmember(),
623 TarFile.getmembers() and TarFile.gettarinfo() and are
624 usually created internally.
625 """
626
627 def __init__(self, name=""):
628 """Construct a TarInfo object. name is the optional name
629 of the member.
630 """
631
632 self.name = name # member name (dirnames must end with '/')
633 self.mode = 0666 # file permissions
634 self.uid = 0 # user id
635 self.gid = 0 # group id
636 self.size = 0 # file size
637 self.mtime = 0 # modification time
638 self.chksum = 0 # header checksum
639 self.type = REGTYPE # member type
640 self.linkname = "" # link name
641 self.uname = "user" # user name
642 self.gname = "group" # group name
643 self.devmajor = 0 #-
644 self.devminor = 0 #-for use with CHRTYPE and BLKTYPE
645 self.prefix = "" # prefix to filename or holding information
646 # about sparse files
647
648 self.offset = 0 # the tar header starts here
649 self.offset_data = 0 # the file's data starts here
650
651 def __repr__(self):
652 return "<%s %r at %#x>" % (self.__class__.__name__,self.name,id(self))
653
654 def frombuf(cls, buf):
655 """Construct a TarInfo object from a 512 byte string buffer.
656 """
657 tarinfo = cls()
658 tarinfo.name = nts(buf[0:100])
659 tarinfo.mode = int(buf[100:108], 8)
660 tarinfo.uid = int(buf[108:116],8)
661 tarinfo.gid = int(buf[116:124],8)
662 tarinfo.size = long(buf[124:136], 8)
663 tarinfo.mtime = long(buf[136:148], 8)
664 tarinfo.chksum = int(buf[148:156], 8)
665 tarinfo.type = buf[156:157]
666 tarinfo.linkname = nts(buf[157:257])
667 tarinfo.uname = nts(buf[265:297])
668 tarinfo.gname = nts(buf[297:329])
669 try:
670 tarinfo.devmajor = int(buf[329:337], 8)
671 tarinfo.devminor = int(buf[337:345], 8)
672 except ValueError:
673 tarinfo.devmajor = tarinfo.devmajor = 0
Jack Jansen2b4b5a52003-04-22 22:03:11 +0000674 tarinfo.prefix = buf[345:500]
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000675
676 # The prefix field is used for filenames > 100 in
677 # the POSIX standard.
Jack Jansen2b4b5a52003-04-22 22:03:11 +0000678 # name = prefix + '/' + name
679 if tarinfo.type != GNUTYPE_SPARSE:
680 tarinfo.name = normpath(os.path.join(nts(tarinfo.prefix), tarinfo.name))
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000681
682 # Directory names should have a '/' at the end.
683 if tarinfo.isdir() and tarinfo.name[-1:] != "/":
684 tarinfo.name += "/"
685 return tarinfo
686
687 frombuf = classmethod(frombuf)
688
689 def tobuf(self):
690 """Return a tar header block as a 512 byte string.
691 """
692 name = self.name
693
694 # The following code was contributed by Detlef Lannert.
695 parts = []
696 for value, fieldsize in (
697 (name, 100),
698 ("%07o" % (self.mode & 07777), 8),
699 ("%07o" % self.uid, 8),
700 ("%07o" % self.gid, 8),
701 ("%011o" % self.size, 12),
702 ("%011o" % self.mtime, 12),
703 (" ", 8),
704 (self.type, 1),
705 (self.linkname, 100),
706 (MAGIC, 6),
707 (VERSION, 2),
708 (self.uname, 32),
709 (self.gname, 32),
710 ("%07o" % self.devmajor, 8),
711 ("%07o" % self.devminor, 8),
712 (self.prefix, 155)
713 ):
714 l = len(value)
715 parts.append(value + (fieldsize - l) * NUL)
716
717 buf = "".join(parts)
718 chksum = calc_chksum(buf)
719 buf = buf[:148] + "%06o\0" % chksum + buf[155:]
720 buf += (BLOCKSIZE - len(buf)) * NUL
721 self.buf = buf
722 return buf
723
724 def isreg(self):
725 return self.type in REGULAR_TYPES
726 def isfile(self):
727 return self.isreg()
728 def isdir(self):
729 return self.type == DIRTYPE
730 def issym(self):
731 return self.type == SYMTYPE
732 def islnk(self):
733 return self.type == LNKTYPE
734 def ischr(self):
735 return self.type == CHRTYPE
736 def isblk(self):
737 return self.type == BLKTYPE
738 def isfifo(self):
739 return self.type == FIFOTYPE
740 def issparse(self):
741 return self.type == GNUTYPE_SPARSE
742 def isdev(self):
743 return self.type in (CHRTYPE, BLKTYPE, FIFOTYPE)
744# class TarInfo
745
746class TarFile(object):
747 """The TarFile Class provides an interface to tar archives.
748 """
749
750 debug = 0 # May be set from 0 (no msgs) to 3 (all msgs)
751
752 dereference = False # If true, add content of linked file to the
753 # tar file, else the link.
754
755 ignore_zeros = False # If true, skips empty or invalid blocks and
756 # continues processing.
757
758 errorlevel = 0 # If 0, fatal errors only appear in debug
759 # messages (if debug >= 0). If > 0, errors
760 # are passed to the caller as exceptions.
761
762 posix = True # If True, generates POSIX.1-1990-compliant
763 # archives (no GNU extensions!)
764
765 fileobject = ExFileObject
766
767 def __init__(self, name=None, mode="r", fileobj=None):
768 """Open an (uncompressed) tar archive `name'. `mode' is either 'r' to
769 read from an existing archive, 'a' to append data to an existing
770 file or 'w' to create a new file overwriting an existing one. `mode'
771 defaults to 'r'.
772 If `fileobj' is given, it is used for reading or writing data. If it
773 can be determined, `mode' is overridden by `fileobj's mode.
774 `fileobj' is not closed, when TarFile is closed.
775 """
776 self.name = name
777
778 if len(mode) > 1 or mode not in "raw":
779 raise ValueError, "mode must be 'r', 'a' or 'w'"
780 self._mode = mode
781 self.mode = {"r": "rb", "a": "r+b", "w": "wb"}[mode]
782
783 if not fileobj:
784 fileobj = file(self.name, self.mode)
785 self._extfileobj = False
786 else:
787 if self.name is None and hasattr(fileobj, "name"):
788 self.name = fileobj.name
789 if hasattr(fileobj, "mode"):
790 self.mode = fileobj.mode
791 self._extfileobj = True
792 self.fileobj = fileobj
793
794 # Init datastructures
795 self.closed = False
796 self.members = [] # list of members as TarInfo objects
797 self.membernames = [] # names of members
798 self.chunks = [0] # chunk cache
799 self._loaded = False # flag if all members have been read
800 self.offset = 0L # current position in the archive file
801 self.inodes = {} # dictionary caching the inodes of
802 # archive members already added
803
804 if self._mode == "r":
805 self.firstmember = None
806 self.firstmember = self.next()
807
808 if self._mode == "a":
809 # Move to the end of the archive,
810 # before the first empty block.
811 self.firstmember = None
812 while True:
813 try:
814 tarinfo = self.next()
815 except ReadError:
816 self.fileobj.seek(0)
817 break
818 if tarinfo is None:
819 self.fileobj.seek(- BLOCKSIZE, 1)
820 break
821
822 if self._mode in "aw":
823 self._loaded = True
824
825 #--------------------------------------------------------------------------
826 # Below are the classmethods which act as alternate constructors to the
827 # TarFile class. The open() method is the only one that is needed for
828 # public use; it is the "super"-constructor and is able to select an
829 # adequate "sub"-constructor for a particular compression using the mapping
830 # from OPEN_METH.
831 #
832 # This concept allows one to subclass TarFile without losing the comfort of
833 # the super-constructor. A sub-constructor is registered and made available
834 # by adding it to the mapping in OPEN_METH.
835
836 def open(cls, name=None, mode="r", fileobj=None, bufsize=20*512):
837 """Open a tar archive for reading, writing or appending. Return
838 an appropriate TarFile class.
839
840 mode:
841 'r' open for reading with transparent compression
842 'r:' open for reading exclusively uncompressed
843 'r:gz' open for reading with gzip compression
844 'r:bz2' open for reading with bzip2 compression
845 'a' or 'a:' open for appending
846 'w' or 'w:' open for writing without compression
847 'w:gz' open for writing with gzip compression
848 'w:bz2' open for writing with bzip2 compression
849 'r|' open an uncompressed stream of tar blocks for reading
850 'r|gz' open a gzip compressed stream of tar blocks
851 'r|bz2' open a bzip2 compressed stream of tar blocks
852 'w|' open an uncompressed stream for writing
853 'w|gz' open a gzip compressed stream for writing
854 'w|bz2' open a bzip2 compressed stream for writing
855 """
856
857 if not name and not fileobj:
858 raise ValueError, "nothing to open"
859
860 if ":" in mode:
861 filemode, comptype = mode.split(":", 1)
862 filemode = filemode or "r"
863 comptype = comptype or "tar"
864
865 # Select the *open() function according to
866 # given compression.
867 if comptype in cls.OPEN_METH:
868 func = getattr(cls, cls.OPEN_METH[comptype])
869 else:
870 raise CompressionError, "unknown compression type %r" % comptype
871 return func(name, filemode, fileobj)
872
873 elif "|" in mode:
874 filemode, comptype = mode.split("|", 1)
875 filemode = filemode or "r"
876 comptype = comptype or "tar"
877
878 if filemode not in "rw":
879 raise ValueError, "mode must be 'r' or 'w'"
880
881 t = cls(name, filemode,
882 _Stream(name, filemode, comptype, fileobj, bufsize))
883 t._extfileobj = False
884 return t
885
886 elif mode == "r":
887 # Find out which *open() is appropriate for opening the file.
888 for comptype in cls.OPEN_METH:
889 func = getattr(cls, cls.OPEN_METH[comptype])
890 try:
891 return func(name, "r", fileobj)
892 except (ReadError, CompressionError):
893 continue
894 raise ReadError, "file could not be opened successfully"
895
896 elif mode in "aw":
897 return cls.taropen(name, mode, fileobj)
898
899 raise ValueError, "undiscernible mode"
900
901 open = classmethod(open)
902
903 def taropen(cls, name, mode="r", fileobj=None):
904 """Open uncompressed tar archive name for reading or writing.
905 """
906 if len(mode) > 1 or mode not in "raw":
907 raise ValueError, "mode must be 'r', 'a' or 'w'"
908 return cls(name, mode, fileobj)
909
910 taropen = classmethod(taropen)
911
912 def gzopen(cls, name, mode="r", fileobj=None, compresslevel=9):
913 """Open gzip compressed tar archive name for reading or writing.
914 Appending is not allowed.
915 """
916 if len(mode) > 1 or mode not in "rw":
917 raise ValueError, "mode must be 'r' or 'w'"
918
919 try:
920 import gzip
Neal Norwitz4ec68242003-04-11 03:05:56 +0000921 gzip.GzipFile
922 except (ImportError, AttributeError):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000923 raise CompressionError, "gzip module is not available"
924
925 pre, ext = os.path.splitext(name)
926 pre = os.path.basename(pre)
927 if ext == ".tgz":
928 ext = ".tar"
929 if ext == ".gz":
930 ext = ""
931 tarname = pre + ext
932
933 if fileobj is None:
934 fileobj = file(name, mode + "b")
935
936 if mode != "r":
937 name = tarname
938
939 try:
940 t = cls.taropen(tarname, mode,
941 gzip.GzipFile(name, mode, compresslevel, fileobj)
942 )
943 except IOError:
944 raise ReadError, "not a gzip file"
945 t._extfileobj = False
946 return t
947
948 gzopen = classmethod(gzopen)
949
950 def bz2open(cls, name, mode="r", fileobj=None, compresslevel=9):
951 """Open bzip2 compressed tar archive name for reading or writing.
952 Appending is not allowed.
953 """
954 if len(mode) > 1 or mode not in "rw":
955 raise ValueError, "mode must be 'r' or 'w'."
956
957 try:
958 import bz2
959 except ImportError:
960 raise CompressionError, "bz2 module is not available"
961
962 pre, ext = os.path.splitext(name)
963 pre = os.path.basename(pre)
964 if ext == ".tbz2":
965 ext = ".tar"
966 if ext == ".bz2":
967 ext = ""
968 tarname = pre + ext
969
970 if fileobj is not None:
971 raise ValueError, "no support for external file objects"
972
973 try:
974 t = cls.taropen(tarname, mode, bz2.BZ2File(name, mode, compresslevel=compresslevel))
975 except IOError:
976 raise ReadError, "not a bzip2 file"
977 t._extfileobj = False
978 return t
979
980 bz2open = classmethod(bz2open)
981
982 # All *open() methods are registered here.
983 OPEN_METH = {
984 "tar": "taropen", # uncompressed tar
985 "gz": "gzopen", # gzip compressed tar
986 "bz2": "bz2open" # bzip2 compressed tar
987 }
988
989 #--------------------------------------------------------------------------
990 # The public methods which TarFile provides:
991
992 def close(self):
993 """Close the TarFile. In write-mode, two finishing zero blocks are
994 appended to the archive.
995 """
996 if self.closed:
997 return
998
999 if self._mode in "aw":
1000 self.fileobj.write(NUL * (BLOCKSIZE * 2))
1001 self.offset += (BLOCKSIZE * 2)
1002 # fill up the end with zero-blocks
1003 # (like option -b20 for tar does)
1004 blocks, remainder = divmod(self.offset, RECORDSIZE)
1005 if remainder > 0:
1006 self.fileobj.write(NUL * (RECORDSIZE - remainder))
1007
1008 if not self._extfileobj:
1009 self.fileobj.close()
1010 self.closed = True
1011
1012 def getmember(self, name):
1013 """Return a TarInfo object for member `name'. If `name' can not be
1014 found in the archive, KeyError is raised. If a member occurs more
1015 than once in the archive, its last occurence is assumed to be the
1016 most up-to-date version.
1017 """
1018 self._check()
1019 if name not in self.membernames and not self._loaded:
1020 self._load()
1021 if name not in self.membernames:
1022 raise KeyError, "filename %r not found" % name
1023 return self._getmember(name)
1024
1025 def getmembers(self):
1026 """Return the members of the archive as a list of TarInfo objects. The
1027 list has the same order as the members in the archive.
1028 """
1029 self._check()
1030 if not self._loaded: # if we want to obtain a list of
1031 self._load() # all members, we first have to
1032 # scan the whole archive.
1033 return self.members
1034
1035 def getnames(self):
1036 """Return the members of the archive as a list of their names. It has
1037 the same order as the list returned by getmembers().
1038 """
1039 self._check()
1040 if not self._loaded:
1041 self._load()
1042 return self.membernames
1043
1044 def gettarinfo(self, name=None, arcname=None, fileobj=None):
1045 """Create a TarInfo object for either the file `name' or the file
1046 object `fileobj' (using os.fstat on its file descriptor). You can
1047 modify some of the TarInfo's attributes before you add it using
1048 addfile(). If given, `arcname' specifies an alternative name for the
1049 file in the archive.
1050 """
1051 self._check("aw")
1052
1053 # When fileobj is given, replace name by
1054 # fileobj's real name.
1055 if fileobj is not None:
1056 name = fileobj.name
1057
1058 # Building the name of the member in the archive.
1059 # Backward slashes are converted to forward slashes,
1060 # Absolute paths are turned to relative paths.
1061 if arcname is None:
1062 arcname = name
1063 arcname = normpath(arcname)
1064 drv, arcname = os.path.splitdrive(arcname)
1065 while arcname[0:1] == "/":
1066 arcname = arcname[1:]
1067
1068 # Now, fill the TarInfo object with
1069 # information specific for the file.
1070 tarinfo = TarInfo()
1071
1072 # Use os.stat or os.lstat, depending on platform
1073 # and if symlinks shall be resolved.
1074 if fileobj is None:
1075 if hasattr(os, "lstat") and not self.dereference:
1076 statres = os.lstat(name)
1077 else:
1078 statres = os.stat(name)
1079 else:
1080 statres = os.fstat(fileobj.fileno())
1081 linkname = ""
1082
1083 stmd = statres.st_mode
1084 if stat.S_ISREG(stmd):
1085 inode = (statres.st_ino, statres.st_dev)
1086 if inode in self.inodes and not self.dereference:
1087 # Is it a hardlink to an already
1088 # archived file?
1089 type = LNKTYPE
1090 linkname = self.inodes[inode]
1091 else:
1092 # The inode is added only if its valid.
1093 # For win32 it is always 0.
1094 type = REGTYPE
1095 if inode[0]:
1096 self.inodes[inode] = arcname
1097 elif stat.S_ISDIR(stmd):
1098 type = DIRTYPE
1099 if arcname[-1:] != "/":
1100 arcname += "/"
1101 elif stat.S_ISFIFO(stmd):
1102 type = FIFOTYPE
1103 elif stat.S_ISLNK(stmd):
1104 type = SYMTYPE
1105 linkname = os.readlink(name)
1106 elif stat.S_ISCHR(stmd):
1107 type = CHRTYPE
1108 elif stat.S_ISBLK(stmd):
1109 type = BLKTYPE
1110 else:
1111 return None
1112
1113 # Fill the TarInfo object with all
1114 # information we can get.
1115 tarinfo.name = arcname
1116 tarinfo.mode = stmd
1117 tarinfo.uid = statres.st_uid
1118 tarinfo.gid = statres.st_gid
1119 tarinfo.size = statres.st_size
1120 tarinfo.mtime = statres.st_mtime
1121 tarinfo.type = type
1122 tarinfo.linkname = linkname
1123 if pwd:
1124 try:
1125 tarinfo.uname = pwd.getpwuid(tarinfo.uid)[0]
1126 except KeyError:
1127 pass
1128 if grp:
1129 try:
1130 tarinfo.gname = grp.getgrgid(tarinfo.gid)[0]
1131 except KeyError:
1132 pass
1133
1134 if type in (CHRTYPE, BLKTYPE):
1135 if hasattr(os, "major") and hasattr(os, "minor"):
1136 tarinfo.devmajor = os.major(statres.st_rdev)
1137 tarinfo.devminor = os.minor(statres.st_rdev)
1138 return tarinfo
1139
1140 def list(self, verbose=True):
1141 """Print a table of contents to sys.stdout. If `verbose' is False, only
1142 the names of the members are printed. If it is True, an `ls -l'-like
1143 output is produced.
1144 """
1145 self._check()
1146
1147 for tarinfo in self:
1148 if verbose:
1149 print filemode(tarinfo.mode),
1150 print "%s/%s" % (tarinfo.uname or tarinfo.uid,
1151 tarinfo.gname or tarinfo.gid),
1152 if tarinfo.ischr() or tarinfo.isblk():
1153 print "%10s" % ("%d,%d" \
1154 % (tarinfo.devmajor, tarinfo.devminor)),
1155 else:
1156 print "%10d" % tarinfo.size,
1157 print "%d-%02d-%02d %02d:%02d:%02d" \
1158 % time.localtime(tarinfo.mtime)[:6],
1159
1160 print tarinfo.name,
1161
1162 if verbose:
1163 if tarinfo.issym():
1164 print "->", tarinfo.linkname,
1165 if tarinfo.islnk():
1166 print "link to", tarinfo.linkname,
1167 print
1168
1169 def add(self, name, arcname=None, recursive=True):
1170 """Add the file `name' to the archive. `name' may be any type of file
1171 (directory, fifo, symbolic link, etc.). If given, `arcname'
1172 specifies an alternative name for the file in the archive.
1173 Directories are added recursively by default. This can be avoided by
1174 setting `recursive' to False.
1175 """
1176 self._check("aw")
1177
1178 if arcname is None:
1179 arcname = name
1180
1181 # Skip if somebody tries to archive the archive...
1182 if self.name is not None \
1183 and os.path.abspath(name) == os.path.abspath(self.name):
1184 self._dbg(2, "tarfile: Skipped %r" % name)
1185 return
1186
1187 # Special case: The user wants to add the current
1188 # working directory.
1189 if name == ".":
1190 if recursive:
1191 if arcname == ".":
1192 arcname = ""
1193 for f in os.listdir("."):
1194 self.add(f, os.path.join(arcname, f))
1195 return
1196
1197 self._dbg(1, name)
1198
1199 # Create a TarInfo object from the file.
1200 tarinfo = self.gettarinfo(name, arcname)
1201
1202 if tarinfo is None:
1203 self._dbg(1, "tarfile: Unsupported type %r" % name)
1204 return
1205
1206 # Append the tar header and data to the archive.
1207 if tarinfo.isreg():
1208 f = file(name, "rb")
1209 self.addfile(tarinfo, f)
1210 f.close()
1211
1212 if tarinfo.type in (LNKTYPE, SYMTYPE, FIFOTYPE, CHRTYPE, BLKTYPE):
1213 tarinfo.size = 0L
1214 self.addfile(tarinfo)
1215
1216 if tarinfo.isdir():
1217 self.addfile(tarinfo)
1218 if recursive:
1219 for f in os.listdir(name):
1220 self.add(os.path.join(name, f), os.path.join(arcname, f))
1221
1222 def addfile(self, tarinfo, fileobj=None):
1223 """Add the TarInfo object `tarinfo' to the archive. If `fileobj' is
1224 given, tarinfo.size bytes are read from it and added to the archive.
1225 You can create TarInfo objects using gettarinfo().
1226 On Windows platforms, `fileobj' should always be opened with mode
1227 'rb' to avoid irritation about the file size.
1228 """
1229 self._check("aw")
1230
1231 tarinfo.name = normpath(tarinfo.name)
1232 if tarinfo.isdir():
1233 # directories should end with '/'
1234 tarinfo.name += "/"
1235
1236 if tarinfo.linkname:
1237 tarinfo.linkname = normpath(tarinfo.linkname)
1238
1239 if tarinfo.size > MAXSIZE_MEMBER:
1240 raise ValueError, "file is too large (>8GB)"
1241
1242 if len(tarinfo.linkname) > LENGTH_LINK:
1243 if self.posix:
1244 raise ValueError, "linkname is too long (>%d)" \
1245 % (LENGTH_LINK)
1246 else:
1247 self._create_gnulong(tarinfo.linkname, GNUTYPE_LONGLINK)
1248 tarinfo.linkname = tarinfo.linkname[:LENGTH_LINK -1]
1249 self._dbg(2, "tarfile: Created GNU tar extension LONGLINK")
1250
1251 if len(tarinfo.name) > LENGTH_NAME:
1252 if self.posix:
1253 prefix = tarinfo.name[:LENGTH_PREFIX + 1]
1254 while prefix and prefix[-1] != "/":
Tim Peters2c60f7a2003-01-29 03:49:43 +00001255 prefix = prefix[:-1]
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001256
1257 name = tarinfo.name[len(prefix):]
1258 prefix = prefix[:-1]
1259
1260 if not prefix or len(name) > LENGTH_NAME:
1261 raise ValueError, "name is too long (>%d)" \
1262 % (LENGTH_NAME)
1263
1264 tarinfo.name = name
1265 tarinfo.prefix = prefix
1266 else:
1267 self._create_gnulong(tarinfo.name, GNUTYPE_LONGNAME)
1268 tarinfo.name = tarinfo.name[:LENGTH_NAME - 1]
1269 self._dbg(2, "tarfile: Created GNU tar extension LONGNAME")
1270
1271 self.fileobj.write(tarinfo.tobuf())
1272 self.offset += BLOCKSIZE
1273
1274 # If there's data to follow, append it.
1275 if fileobj is not None:
1276 copyfileobj(fileobj, self.fileobj, tarinfo.size)
1277 blocks, remainder = divmod(tarinfo.size, BLOCKSIZE)
1278 if remainder > 0:
1279 self.fileobj.write(NUL * (BLOCKSIZE - remainder))
1280 blocks += 1
1281 self.offset += blocks * BLOCKSIZE
1282
1283 self.members.append(tarinfo)
1284 self.membernames.append(tarinfo.name)
1285 self.chunks.append(self.offset)
1286
1287 def extract(self, member, path=""):
1288 """Extract a member from the archive to the current working directory,
1289 using its full name. Its file information is extracted as accurately
1290 as possible. `member' may be a filename or a TarInfo object. You can
1291 specify a different directory using `path'.
1292 """
1293 self._check("r")
1294
1295 if isinstance(member, TarInfo):
1296 tarinfo = member
1297 else:
1298 tarinfo = self.getmember(member)
1299
1300 try:
1301 self._extract_member(tarinfo, os.path.join(path, tarinfo.name))
1302 except EnvironmentError, e:
1303 if self.errorlevel > 0:
1304 raise
1305 else:
1306 if e.filename is None:
1307 self._dbg(1, "tarfile: %s" % e.strerror)
1308 else:
1309 self._dbg(1, "tarfile: %s %r" % (e.strerror, e.filename))
1310 except ExtractError, e:
1311 if self.errorlevel > 1:
1312 raise
1313 else:
1314 self._dbg(1, "tarfile: %s" % e)
1315
1316 def extractfile(self, member):
1317 """Extract a member from the archive as a file object. `member' may be
1318 a filename or a TarInfo object. If `member' is a regular file, a
1319 file-like object is returned. If `member' is a link, a file-like
1320 object is constructed from the link's target. If `member' is none of
1321 the above, None is returned.
1322 The file-like object is read-only and provides the following
1323 methods: read(), readline(), readlines(), seek() and tell()
1324 """
1325 self._check("r")
1326
1327 if isinstance(member, TarInfo):
1328 tarinfo = member
1329 else:
1330 tarinfo = self.getmember(member)
1331
1332 if tarinfo.isreg():
1333 return self.fileobject(self, tarinfo)
1334
1335 elif tarinfo.type not in SUPPORTED_TYPES:
1336 # If a member's type is unknown, it is treated as a
1337 # regular file.
1338 return self.fileobject(self, tarinfo)
1339
1340 elif tarinfo.islnk() or tarinfo.issym():
1341 if isinstance(self.fileobj, _Stream):
1342 # A small but ugly workaround for the case that someone tries
1343 # to extract a (sym)link as a file-object from a non-seekable
1344 # stream of tar blocks.
1345 raise StreamError, "cannot extract (sym)link as file object"
1346 else:
1347 # A (sym)link's file object is it's target's file object.
1348 return self.extractfile(self._getmember(tarinfo.linkname,
1349 tarinfo))
1350 else:
1351 # If there's no data associated with the member (directory, chrdev,
1352 # blkdev, etc.), return None instead of a file object.
1353 return None
1354
1355 def _extract_member(self, tarinfo, targetpath):
1356 """Extract the TarInfo object tarinfo to a physical
1357 file called targetpath.
1358 """
1359 # Fetch the TarInfo object for the given name
1360 # and build the destination pathname, replacing
1361 # forward slashes to platform specific separators.
1362 if targetpath[-1:] == "/":
1363 targetpath = targetpath[:-1]
1364 targetpath = os.path.normpath(targetpath)
1365
1366 # Create all upper directories.
1367 upperdirs = os.path.dirname(targetpath)
1368 if upperdirs and not os.path.exists(upperdirs):
1369 ti = TarInfo()
1370 ti.name = upperdirs
1371 ti.type = DIRTYPE
1372 ti.mode = 0777
1373 ti.mtime = tarinfo.mtime
1374 ti.uid = tarinfo.uid
1375 ti.gid = tarinfo.gid
1376 ti.uname = tarinfo.uname
1377 ti.gname = tarinfo.gname
1378 try:
1379 self._extract_member(ti, ti.name)
1380 except:
1381 pass
1382
1383 if tarinfo.islnk() or tarinfo.issym():
1384 self._dbg(1, "%s -> %s" % (tarinfo.name, tarinfo.linkname))
1385 else:
1386 self._dbg(1, tarinfo.name)
1387
1388 if tarinfo.isreg():
1389 self.makefile(tarinfo, targetpath)
1390 elif tarinfo.isdir():
1391 self.makedir(tarinfo, targetpath)
1392 elif tarinfo.isfifo():
1393 self.makefifo(tarinfo, targetpath)
1394 elif tarinfo.ischr() or tarinfo.isblk():
1395 self.makedev(tarinfo, targetpath)
1396 elif tarinfo.islnk() or tarinfo.issym():
1397 self.makelink(tarinfo, targetpath)
1398 elif tarinfo.type not in SUPPORTED_TYPES:
1399 self.makeunknown(tarinfo, targetpath)
1400 else:
1401 self.makefile(tarinfo, targetpath)
1402
1403 self.chown(tarinfo, targetpath)
1404 if not tarinfo.issym():
1405 self.chmod(tarinfo, targetpath)
1406 self.utime(tarinfo, targetpath)
1407
1408 #--------------------------------------------------------------------------
1409 # Below are the different file methods. They are called via
1410 # _extract_member() when extract() is called. They can be replaced in a
1411 # subclass to implement other functionality.
1412
1413 def makedir(self, tarinfo, targetpath):
1414 """Make a directory called targetpath.
1415 """
1416 try:
1417 os.mkdir(targetpath)
1418 except EnvironmentError, e:
1419 if e.errno != errno.EEXIST:
1420 raise
1421
1422 def makefile(self, tarinfo, targetpath):
1423 """Make a file called targetpath.
1424 """
1425 source = self.extractfile(tarinfo)
1426 target = file(targetpath, "wb")
1427 copyfileobj(source, target)
1428 source.close()
1429 target.close()
1430
1431 def makeunknown(self, tarinfo, targetpath):
1432 """Make a file from a TarInfo object with an unknown type
1433 at targetpath.
1434 """
1435 self.makefile(tarinfo, targetpath)
1436 self._dbg(1, "tarfile: Unknown file type %r, " \
1437 "extracted as regular file." % tarinfo.type)
1438
1439 def makefifo(self, tarinfo, targetpath):
1440 """Make a fifo called targetpath.
1441 """
1442 if hasattr(os, "mkfifo"):
1443 os.mkfifo(targetpath)
1444 else:
1445 raise ExtractError, "fifo not supported by system"
1446
1447 def makedev(self, tarinfo, targetpath):
1448 """Make a character or block device called targetpath.
1449 """
1450 if not hasattr(os, "mknod") or not hasattr(os, "makedev"):
1451 raise ExtractError, "special devices not supported by system"
1452
1453 mode = tarinfo.mode
1454 if tarinfo.isblk():
1455 mode |= stat.S_IFBLK
1456 else:
1457 mode |= stat.S_IFCHR
1458
1459 os.mknod(targetpath, mode,
1460 os.makedev(tarinfo.devmajor, tarinfo.devminor))
1461
1462 def makelink(self, tarinfo, targetpath):
1463 """Make a (symbolic) link called targetpath. If it cannot be created
1464 (platform limitation), we try to make a copy of the referenced file
1465 instead of a link.
1466 """
1467 linkpath = tarinfo.linkname
1468 try:
1469 if tarinfo.issym():
1470 os.symlink(linkpath, targetpath)
1471 else:
1472 os.link(linkpath, targetpath)
1473 except AttributeError:
1474 if tarinfo.issym():
1475 linkpath = os.path.join(os.path.dirname(tarinfo.name),
1476 linkpath)
1477 linkpath = normpath(linkpath)
1478
1479 try:
1480 self._extract_member(self.getmember(linkpath), targetpath)
1481 except (EnvironmentError, KeyError), e:
1482 linkpath = os.path.normpath(linkpath)
1483 try:
1484 shutil.copy2(linkpath, targetpath)
1485 except EnvironmentError, e:
1486 raise IOError, "link could not be created"
1487
1488 def chown(self, tarinfo, targetpath):
1489 """Set owner of targetpath according to tarinfo.
1490 """
1491 if pwd and hasattr(os, "geteuid") and os.geteuid() == 0:
1492 # We have to be root to do so.
1493 try:
1494 g = grp.getgrnam(tarinfo.gname)[2]
1495 except KeyError:
1496 try:
1497 g = grp.getgrgid(tarinfo.gid)[2]
1498 except KeyError:
1499 g = os.getgid()
1500 try:
1501 u = pwd.getpwnam(tarinfo.uname)[2]
1502 except KeyError:
1503 try:
1504 u = pwd.getpwuid(tarinfo.uid)[2]
1505 except KeyError:
1506 u = os.getuid()
1507 try:
1508 if tarinfo.issym() and hasattr(os, "lchown"):
1509 os.lchown(targetpath, u, g)
1510 else:
Andrew MacIntyre7970d202003-02-19 12:51:34 +00001511 if sys.platform != "os2emx":
1512 os.chown(targetpath, u, g)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001513 except EnvironmentError, e:
1514 raise ExtractError, "could not change owner"
1515
1516 def chmod(self, tarinfo, targetpath):
1517 """Set file permissions of targetpath according to tarinfo.
1518 """
Jack Jansen834eff62003-03-07 12:47:06 +00001519 if hasattr(os, 'chmod'):
1520 try:
1521 os.chmod(targetpath, tarinfo.mode)
1522 except EnvironmentError, e:
1523 raise ExtractError, "could not change mode"
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001524
1525 def utime(self, tarinfo, targetpath):
1526 """Set modification time of targetpath according to tarinfo.
1527 """
Jack Jansen834eff62003-03-07 12:47:06 +00001528 if not hasattr(os, 'utime'):
Tim Petersf9347782003-03-07 15:36:41 +00001529 return
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001530 if sys.platform == "win32" and tarinfo.isdir():
1531 # According to msdn.microsoft.com, it is an error (EACCES)
1532 # to use utime() on directories.
1533 return
1534 try:
1535 os.utime(targetpath, (tarinfo.mtime, tarinfo.mtime))
1536 except EnvironmentError, e:
1537 raise ExtractError, "could not change modification time"
1538
1539 #--------------------------------------------------------------------------
1540
1541 def next(self):
1542 """Return the next member of the archive as a TarInfo object, when
1543 TarFile is opened for reading. Return None if there is no more
1544 available.
1545 """
1546 self._check("ra")
1547 if self.firstmember is not None:
1548 m = self.firstmember
1549 self.firstmember = None
1550 return m
1551
1552 # Read the next block.
1553 self.fileobj.seek(self.chunks[-1])
1554 while True:
1555 buf = self.fileobj.read(BLOCKSIZE)
1556 if not buf:
1557 return None
1558 try:
1559 tarinfo = TarInfo.frombuf(buf)
1560 except ValueError:
1561 if self.ignore_zeros:
1562 if buf.count(NUL) == BLOCKSIZE:
1563 adj = "empty"
1564 else:
1565 adj = "invalid"
1566 self._dbg(2, "0x%X: %s block" % (self.offset, adj))
1567 self.offset += BLOCKSIZE
1568 continue
1569 else:
1570 # Block is empty or unreadable.
1571 if self.chunks[-1] == 0:
1572 # If the first block is invalid. That does not
1573 # look like a tar archive we can handle.
1574 raise ReadError,"empty, unreadable or compressed file"
1575 return None
1576 break
1577
1578 # We shouldn't rely on this checksum, because some tar programs
1579 # calculate it differently and it is merely validating the
1580 # header block. We could just as well skip this part, which would
1581 # have a slight effect on performance...
1582 if tarinfo.chksum != calc_chksum(buf):
1583 self._dbg(1, "tarfile: Bad Checksum %r" % tarinfo.name)
1584
1585 # Set the TarInfo object's offset to the current position of the
1586 # TarFile and set self.offset to the position where the data blocks
1587 # should begin.
1588 tarinfo.offset = self.offset
1589 self.offset += BLOCKSIZE
1590
1591 # Check if the TarInfo object has a typeflag for which a callback
1592 # method is registered in the TYPE_METH. If so, then call it.
1593 if tarinfo.type in self.TYPE_METH:
1594 tarinfo = self.TYPE_METH[tarinfo.type](self, tarinfo)
1595 else:
1596 tarinfo.offset_data = self.offset
1597 if tarinfo.isreg() or tarinfo.type not in SUPPORTED_TYPES:
1598 # Skip the following data blocks.
1599 self.offset += self._block(tarinfo.size)
1600
1601 if tarinfo.isreg() and tarinfo.name[:-1] == "/":
1602 # some old tar programs don't know DIRTYPE
1603 tarinfo.type = DIRTYPE
1604
1605 self.members.append(tarinfo)
1606 self.membernames.append(tarinfo.name)
1607 self.chunks.append(self.offset)
1608 return tarinfo
1609
1610 #--------------------------------------------------------------------------
1611 # Below are some methods which are called for special typeflags in the
1612 # next() method, e.g. for unwrapping GNU longname/longlink blocks. They
1613 # are registered in TYPE_METH below. You can register your own methods
1614 # with this mapping.
1615 # A registered method is called with a TarInfo object as only argument.
1616 #
1617 # During its execution the method MUST perform the following tasks:
1618 # 1. set tarinfo.offset_data to the position where the data blocks begin,
1619 # if there is data to follow.
1620 # 2. set self.offset to the position where the next member's header will
1621 # begin.
1622 # 3. return a valid TarInfo object.
1623
1624 def proc_gnulong(self, tarinfo):
1625 """Evaluate the blocks that hold a GNU longname
1626 or longlink member.
1627 """
1628 buf = ""
1629 name = None
1630 linkname = None
1631 count = tarinfo.size
1632 while count > 0:
1633 block = self.fileobj.read(BLOCKSIZE)
1634 buf += block
1635 self.offset += BLOCKSIZE
1636 count -= BLOCKSIZE
1637
1638 if tarinfo.type == GNUTYPE_LONGNAME:
1639 name = nts(buf)
1640 if tarinfo.type == GNUTYPE_LONGLINK:
1641 linkname = nts(buf)
1642
1643 buf = self.fileobj.read(BLOCKSIZE)
1644
1645 tarinfo = TarInfo.frombuf(buf)
1646 tarinfo.offset = self.offset
1647 self.offset += BLOCKSIZE
1648 tarinfo.offset_data = self.offset
1649 tarinfo.name = name or tarinfo.name
1650 tarinfo.linkname = linkname or tarinfo.linkname
1651
1652 if tarinfo.isreg() or tarinfo.type not in SUPPORTED_TYPES:
1653 # Skip the following data blocks.
1654 self.offset += self._block(tarinfo.size)
1655 return tarinfo
1656
1657 def proc_sparse(self, tarinfo):
1658 """Analyze a GNU sparse header plus extra headers.
1659 """
1660 buf = tarinfo.tobuf()
1661 sp = _ringbuffer()
1662 pos = 386
1663 lastpos = 0L
1664 realpos = 0L
1665 # There are 4 possible sparse structs in the
1666 # first header.
1667 for i in xrange(4):
1668 try:
1669 offset = int(buf[pos:pos + 12], 8)
1670 numbytes = int(buf[pos + 12:pos + 24], 8)
1671 except ValueError:
1672 break
1673 if offset > lastpos:
1674 sp.append(_hole(lastpos, offset - lastpos))
1675 sp.append(_data(offset, numbytes, realpos))
1676 realpos += numbytes
1677 lastpos = offset + numbytes
1678 pos += 24
1679
1680 isextended = ord(buf[482])
1681 origsize = int(buf[483:495], 8)
1682
1683 # If the isextended flag is given,
1684 # there are extra headers to process.
1685 while isextended == 1:
1686 buf = self.fileobj.read(BLOCKSIZE)
1687 self.offset += BLOCKSIZE
1688 pos = 0
1689 for i in xrange(21):
1690 try:
1691 offset = int(buf[pos:pos + 12], 8)
1692 numbytes = int(buf[pos + 12:pos + 24], 8)
1693 except ValueError:
1694 break
1695 if offset > lastpos:
1696 sp.append(_hole(lastpos, offset - lastpos))
1697 sp.append(_data(offset, numbytes, realpos))
1698 realpos += numbytes
1699 lastpos = offset + numbytes
1700 pos += 24
1701 isextended = ord(buf[504])
1702
1703 if lastpos < origsize:
1704 sp.append(_hole(lastpos, origsize - lastpos))
1705
1706 tarinfo.sparse = sp
1707
1708 tarinfo.offset_data = self.offset
1709 self.offset += self._block(tarinfo.size)
1710 tarinfo.size = origsize
1711 return tarinfo
1712
1713 # The type mapping for the next() method. The keys are single character
1714 # strings, the typeflag. The values are methods which are called when
1715 # next() encounters such a typeflag.
1716 TYPE_METH = {
1717 GNUTYPE_LONGNAME: proc_gnulong,
1718 GNUTYPE_LONGLINK: proc_gnulong,
1719 GNUTYPE_SPARSE: proc_sparse
1720 }
1721
1722 #--------------------------------------------------------------------------
1723 # Little helper methods:
1724
1725 def _block(self, count):
1726 """Round up a byte count by BLOCKSIZE and return it,
1727 e.g. _block(834) => 1024.
1728 """
1729 blocks, remainder = divmod(count, BLOCKSIZE)
1730 if remainder:
1731 blocks += 1
1732 return blocks * BLOCKSIZE
1733
1734 def _getmember(self, name, tarinfo=None):
1735 """Find an archive member by name from bottom to top.
1736 If tarinfo is given, it is used as the starting point.
1737 """
1738 if tarinfo is None:
1739 end = len(self.members)
1740 else:
1741 end = self.members.index(tarinfo)
1742
1743 for i in xrange(end - 1, -1, -1):
1744 if name == self.membernames[i]:
1745 return self.members[i]
1746
1747 def _load(self):
1748 """Read through the entire archive file and look for readable
1749 members.
1750 """
1751 while True:
1752 tarinfo = self.next()
1753 if tarinfo is None:
1754 break
1755 self._loaded = True
1756
1757 def _check(self, mode=None):
1758 """Check if TarFile is still open, and if the operation's mode
1759 corresponds to TarFile's mode.
1760 """
1761 if self.closed:
1762 raise IOError, "%s is closed" % self.__class__.__name__
1763 if mode is not None and self._mode not in mode:
1764 raise IOError, "bad operation for mode %r" % self._mode
1765
1766 def __iter__(self):
1767 """Provide an iterator object.
1768 """
1769 if self._loaded:
1770 return iter(self.members)
1771 else:
1772 return TarIter(self)
1773
1774 def _create_gnulong(self, name, type):
1775 """Write a GNU longname/longlink member to the TarFile.
1776 It consists of an extended tar header, with the length
1777 of the longname as size, followed by data blocks,
1778 which contain the longname as a null terminated string.
1779 """
1780 tarinfo = TarInfo()
1781 tarinfo.name = "././@LongLink"
1782 tarinfo.type = type
1783 tarinfo.mode = 0
1784 tarinfo.size = len(name)
1785
1786 # write extended header
1787 self.fileobj.write(tarinfo.tobuf())
1788 # write name blocks
1789 self.fileobj.write(name)
1790 blocks, remainder = divmod(tarinfo.size, BLOCKSIZE)
1791 if remainder > 0:
1792 self.fileobj.write(NUL * (BLOCKSIZE - remainder))
1793 blocks += 1
1794 self.offset += blocks * BLOCKSIZE
1795
1796 def _dbg(self, level, msg):
1797 """Write debugging output to sys.stderr.
1798 """
1799 if level <= self.debug:
1800 print >> sys.stderr, msg
1801# class TarFile
1802
1803class TarIter:
1804 """Iterator Class.
1805
1806 for tarinfo in TarFile(...):
1807 suite...
1808 """
1809
1810 def __init__(self, tarfile):
1811 """Construct a TarIter object.
1812 """
1813 self.tarfile = tarfile
1814 def __iter__(self):
1815 """Return iterator object.
1816 """
1817 return self
1818 def next(self):
1819 """Return the next item using TarFile's next() method.
1820 When all members have been read, set TarFile as _loaded.
1821 """
1822 tarinfo = self.tarfile.next()
1823 if not tarinfo:
1824 self.tarfile._loaded = True
1825 raise StopIteration
1826 return tarinfo
1827
1828# Helper classes for sparse file support
1829class _section:
1830 """Base class for _data and _hole.
1831 """
1832 def __init__(self, offset, size):
1833 self.offset = offset
1834 self.size = size
1835 def __contains__(self, offset):
1836 return self.offset <= offset < self.offset + self.size
1837
1838class _data(_section):
1839 """Represent a data section in a sparse file.
1840 """
1841 def __init__(self, offset, size, realpos):
1842 _section.__init__(self, offset, size)
1843 self.realpos = realpos
1844
1845class _hole(_section):
1846 """Represent a hole section in a sparse file.
1847 """
1848 pass
1849
1850class _ringbuffer(list):
1851 """Ringbuffer class which increases performance
1852 over a regular list.
1853 """
1854 def __init__(self):
1855 self.idx = 0
1856 def find(self, offset):
1857 idx = self.idx
1858 while True:
1859 item = self[idx]
1860 if offset in item:
1861 break
1862 idx += 1
1863 if idx == len(self):
1864 idx = 0
1865 if idx == self.idx:
1866 # End of File
1867 return None
1868 self.idx = idx
1869 return item
1870
1871#---------------------------------------------
1872# zipfile compatible TarFile class
1873#---------------------------------------------
1874TAR_PLAIN = 0 # zipfile.ZIP_STORED
1875TAR_GZIPPED = 8 # zipfile.ZIP_DEFLATED
1876class TarFileCompat:
1877 """TarFile class compatible with standard module zipfile's
1878 ZipFile class.
1879 """
1880 def __init__(self, file, mode="r", compression=TAR_PLAIN):
1881 if compression == TAR_PLAIN:
1882 self.tarfile = TarFile.taropen(file, mode)
1883 elif compression == TAR_GZIPPED:
1884 self.tarfile = TarFile.gzopen(file, mode)
1885 else:
1886 raise ValueError, "unknown compression constant"
1887 if mode[0:1] == "r":
1888 members = self.tarfile.getmembers()
1889 for i in xrange(len(members)):
1890 m = members[i]
1891 m.filename = m.name
1892 m.file_size = m.size
1893 m.date_time = time.gmtime(m.mtime)[:6]
1894 def namelist(self):
1895 return map(lambda m: m.name, self.infolist())
1896 def infolist(self):
1897 return filter(lambda m: m.type in REGULAR_TYPES,
1898 self.tarfile.getmembers())
1899 def printdir(self):
1900 self.tarfile.list()
1901 def testzip(self):
1902 return
1903 def getinfo(self, name):
1904 return self.tarfile.getmember(name)
1905 def read(self, name):
1906 return self.tarfile.extractfile(self.tarfile.getmember(name)).read()
1907 def write(self, filename, arcname=None, compress_type=None):
1908 self.tarfile.add(filename, arcname)
1909 def writestr(self, zinfo, bytes):
1910 import StringIO
1911 import calendar
1912 zinfo.name = zinfo.filename
1913 zinfo.size = zinfo.file_size
1914 zinfo.mtime = calendar.timegm(zinfo.date_time)
1915 self.tarfile.addfile(zinfo, StringIO.StringIO(bytes))
1916 def close(self):
1917 self.tarfile.close()
1918#class TarFileCompat
1919
1920#--------------------
1921# exported functions
1922#--------------------
1923def is_tarfile(name):
1924 """Return True if name points to a tar archive that we
1925 are able to handle, else return False.
1926 """
1927 try:
1928 t = open(name)
1929 t.close()
1930 return True
1931 except TarError:
1932 return False
1933
1934open = TarFile.open