blob: d20107ecd28c249a35994eef3555877957a9dfdc [file] [log] [blame]
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001#!/usr/bin/env python
2# -*- coding: iso-8859-1 -*-
3#-------------------------------------------------------------------
4# tarfile.py
5#-------------------------------------------------------------------
6# Copyright (C) 2002 Lars Gustäbel <lars@gustaebel.de>
7# All rights reserved.
8#
9# Permission is hereby granted, free of charge, to any person
10# obtaining a copy of this software and associated documentation
11# files (the "Software"), to deal in the Software without
12# restriction, including without limitation the rights to use,
13# copy, modify, merge, publish, distribute, sublicense, and/or sell
14# copies of the Software, and to permit persons to whom the
15# Software is furnished to do so, subject to the following
16# conditions:
17#
18# The above copyright notice and this permission notice shall be
19# included in all copies or substantial portions of the Software.
20#
21# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
22# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
23# OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
24# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
25# HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
26# WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
27# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
28# OTHER DEALINGS IN THE SOFTWARE.
29#
30"""Read from and write to tar format archives.
31"""
32
33__version__ = "$Revision$"
34# $Source$
35
36version = "0.6.4"
37__author__ = "Lars Gustäbel (lars@gustaebel.de)"
38__date__ = "$Date$"
39__cvsid__ = "$Id$"
40__credits__ = "Gustavo Niemeyer, Niels Gustäbel, Richard Townsend."
41
42#---------
43# Imports
44#---------
45import sys
46import os
47import shutil
48import stat
49import errno
50import time
51import struct
52
Jack Jansencfc49022003-03-07 13:37:32 +000053if sys.platform == 'mac':
54 # This module needs work for MacOS9, especially in the area of pathname
55 # handling. In many places it is assumed a simple substitution of / by the
56 # local os.path.sep is good enough to convert pathnames, but this does not
57 # work with the mac rooted:path:name versus :nonrooted:path:name syntax
58 raise ImportError, "tarfile does not work for platform==mac"
59
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +000060try:
61 import grp, pwd
62except ImportError:
63 grp = pwd = None
64
65# from tarfile import *
66__all__ = ["TarFile", "TarInfo", "is_tarfile", "TarError"]
67
68#---------------------------------------------------------
69# tar constants
70#---------------------------------------------------------
71NUL = "\0" # the null character
72BLOCKSIZE = 512 # length of processing blocks
73RECORDSIZE = BLOCKSIZE * 20 # length of records
74MAGIC = "ustar" # magic tar string
75VERSION = "00" # version number
76
77LENGTH_NAME = 100 # maximum length of a filename
78LENGTH_LINK = 100 # maximum length of a linkname
79LENGTH_PREFIX = 155 # maximum length of the prefix field
80MAXSIZE_MEMBER = 077777777777L # maximum size of a file (11 octal digits)
81
82REGTYPE = "0" # regular file
83AREGTYPE = "\0" # regular file
84LNKTYPE = "1" # link (inside tarfile)
85SYMTYPE = "2" # symbolic link
86CHRTYPE = "3" # character special device
87BLKTYPE = "4" # block special device
88DIRTYPE = "5" # directory
89FIFOTYPE = "6" # fifo special device
90CONTTYPE = "7" # contiguous file
91
92GNUTYPE_LONGNAME = "L" # GNU tar extension for longnames
93GNUTYPE_LONGLINK = "K" # GNU tar extension for longlink
94GNUTYPE_SPARSE = "S" # GNU tar extension for sparse file
95
96#---------------------------------------------------------
97# tarfile constants
98#---------------------------------------------------------
99SUPPORTED_TYPES = (REGTYPE, AREGTYPE, LNKTYPE, # file types that tarfile
100 SYMTYPE, DIRTYPE, FIFOTYPE, # can cope with.
101 CONTTYPE, CHRTYPE, BLKTYPE,
102 GNUTYPE_LONGNAME, GNUTYPE_LONGLINK,
103 GNUTYPE_SPARSE)
104
105REGULAR_TYPES = (REGTYPE, AREGTYPE, # file types that somehow
106 CONTTYPE, GNUTYPE_SPARSE) # represent regular files
107
108#---------------------------------------------------------
109# Bits used in the mode field, values in octal.
110#---------------------------------------------------------
111S_IFLNK = 0120000 # symbolic link
112S_IFREG = 0100000 # regular file
113S_IFBLK = 0060000 # block device
114S_IFDIR = 0040000 # directory
115S_IFCHR = 0020000 # character device
116S_IFIFO = 0010000 # fifo
117
118TSUID = 04000 # set UID on execution
119TSGID = 02000 # set GID on execution
120TSVTX = 01000 # reserved
121
122TUREAD = 0400 # read by owner
123TUWRITE = 0200 # write by owner
124TUEXEC = 0100 # execute/search by owner
125TGREAD = 0040 # read by group
126TGWRITE = 0020 # write by group
127TGEXEC = 0010 # execute/search by group
128TOREAD = 0004 # read by other
129TOWRITE = 0002 # write by other
130TOEXEC = 0001 # execute/search by other
131
132#---------------------------------------------------------
133# Some useful functions
134#---------------------------------------------------------
135def nts(s):
136 """Convert a null-terminated string buffer to a python string.
137 """
Andrew M. Kuchling864bba12004-07-10 22:02:11 +0000138 return s.rstrip(NUL)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000139
140def calc_chksum(buf):
141 """Calculate the checksum for a member's header. It's a simple addition
142 of all bytes, treating the chksum field as if filled with spaces.
143 buf is a 512 byte long string buffer which holds the header.
144 """
145 chk = 256 # chksum field is treated as blanks,
146 # so the initial value is 8 * ord(" ")
147 for c in buf[:148]: chk += ord(c) # sum up all bytes before chksum
148 for c in buf[156:]: chk += ord(c) # sum up all bytes after chksum
149 return chk
150
151def copyfileobj(src, dst, length=None):
152 """Copy length bytes from fileobj src to fileobj dst.
153 If length is None, copy the entire content.
154 """
155 if length == 0:
156 return
157 if length is None:
158 shutil.copyfileobj(src, dst)
159 return
160
161 BUFSIZE = 16 * 1024
162 blocks, remainder = divmod(length, BUFSIZE)
163 for b in xrange(blocks):
164 buf = src.read(BUFSIZE)
165 if len(buf) < BUFSIZE:
166 raise IOError, "end of file reached"
167 dst.write(buf)
168
169 if remainder != 0:
170 buf = src.read(remainder)
171 if len(buf) < remainder:
172 raise IOError, "end of file reached"
173 dst.write(buf)
174 return
175
176filemode_table = (
177 (S_IFLNK, "l",
178 S_IFREG, "-",
179 S_IFBLK, "b",
180 S_IFDIR, "d",
181 S_IFCHR, "c",
182 S_IFIFO, "p"),
183 (TUREAD, "r"),
184 (TUWRITE, "w"),
185 (TUEXEC, "x", TSUID, "S", TUEXEC|TSUID, "s"),
186 (TGREAD, "r"),
187 (TGWRITE, "w"),
188 (TGEXEC, "x", TSGID, "S", TGEXEC|TSGID, "s"),
189 (TOREAD, "r"),
190 (TOWRITE, "w"),
191 (TOEXEC, "x", TSVTX, "T", TOEXEC|TSVTX, "t"))
192
193def filemode(mode):
194 """Convert a file's mode to a string of the form
195 -rwxrwxrwx.
196 Used by TarFile.list()
197 """
198 s = ""
199 for t in filemode_table:
200 while True:
201 if mode & t[0] == t[0]:
202 s += t[1]
203 elif len(t) > 2:
204 t = t[2:]
205 continue
206 else:
207 s += "-"
208 break
209 return s
210
211if os.sep != "/":
212 normpath = lambda path: os.path.normpath(path).replace(os.sep, "/")
213else:
214 normpath = os.path.normpath
215
216class TarError(Exception):
217 """Base exception."""
218 pass
219class ExtractError(TarError):
220 """General exception for extract errors."""
221 pass
222class ReadError(TarError):
223 """Exception for unreadble tar archives."""
224 pass
225class CompressionError(TarError):
226 """Exception for unavailable compression methods."""
227 pass
228class StreamError(TarError):
229 """Exception for unsupported operations on stream-like TarFiles."""
230 pass
231
232#---------------------------
233# internal stream interface
234#---------------------------
235class _LowLevelFile:
236 """Low-level file object. Supports reading and writing.
237 It is used instead of a regular file object for streaming
238 access.
239 """
240
241 def __init__(self, name, mode):
242 mode = {
243 "r": os.O_RDONLY,
244 "w": os.O_WRONLY | os.O_CREAT | os.O_TRUNC,
245 }[mode]
246 if hasattr(os, "O_BINARY"):
247 mode |= os.O_BINARY
248 self.fd = os.open(name, mode)
249
250 def close(self):
251 os.close(self.fd)
252
253 def read(self, size):
254 return os.read(self.fd, size)
255
256 def write(self, s):
257 os.write(self.fd, s)
258
259class _Stream:
260 """Class that serves as an adapter between TarFile and
261 a stream-like object. The stream-like object only
262 needs to have a read() or write() method and is accessed
263 blockwise. Use of gzip or bzip2 compression is possible.
264 A stream-like object could be for example: sys.stdin,
265 sys.stdout, a socket, a tape device etc.
266
267 _Stream is intended to be used only internally.
268 """
269
270 def __init__(self, name, mode, type, fileobj, bufsize):
271 """Construct a _Stream object.
272 """
273 self._extfileobj = True
274 if fileobj is None:
275 fileobj = _LowLevelFile(name, mode)
276 self._extfileobj = False
277
278 self.name = name or ""
279 self.mode = mode
280 self.type = type
281 self.fileobj = fileobj
282 self.bufsize = bufsize
283 self.buf = ""
284 self.pos = 0L
285 self.closed = False
286
287 if type == "gz":
288 try:
289 import zlib
290 except ImportError:
291 raise CompressionError, "zlib module is not available"
292 self.zlib = zlib
293 self.crc = zlib.crc32("")
294 if mode == "r":
295 self._init_read_gz()
296 else:
297 self._init_write_gz()
298
299 if type == "bz2":
300 try:
301 import bz2
302 except ImportError:
303 raise CompressionError, "bz2 module is not available"
304 if mode == "r":
305 self.dbuf = ""
306 self.cmp = bz2.BZ2Decompressor()
307 else:
308 self.cmp = bz2.BZ2Compressor()
309
310 def __del__(self):
311 if not self.closed:
312 self.close()
313
314 def _init_write_gz(self):
315 """Initialize for writing with gzip compression.
316 """
317 self.cmp = self.zlib.compressobj(9, self.zlib.DEFLATED,
318 -self.zlib.MAX_WBITS,
319 self.zlib.DEF_MEM_LEVEL,
320 0)
321 timestamp = struct.pack("<L", long(time.time()))
322 self.__write("\037\213\010\010%s\002\377" % timestamp)
323 if self.name.endswith(".gz"):
324 self.name = self.name[:-3]
325 self.__write(self.name + NUL)
326
327 def write(self, s):
328 """Write string s to the stream.
329 """
330 if self.type == "gz":
331 self.crc = self.zlib.crc32(s, self.crc)
332 self.pos += len(s)
333 if self.type != "tar":
334 s = self.cmp.compress(s)
335 self.__write(s)
336
337 def __write(self, s):
338 """Write string s to the stream if a whole new block
339 is ready to be written.
340 """
341 self.buf += s
342 while len(self.buf) > self.bufsize:
343 self.fileobj.write(self.buf[:self.bufsize])
344 self.buf = self.buf[self.bufsize:]
345
346 def close(self):
347 """Close the _Stream object. No operation should be
348 done on it afterwards.
349 """
350 if self.closed:
351 return
352
353 if self.mode == "w" and self.buf:
354 if self.type != "tar":
355 self.buf += self.cmp.flush()
356 self.fileobj.write(self.buf)
357 self.buf = ""
358 if self.type == "gz":
359 self.fileobj.write(struct.pack("<l", self.crc))
Andrew M. Kuchling10a44492003-10-24 17:38:34 +0000360 self.fileobj.write(struct.pack("<L", self.pos & 0xffffFFFFL))
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000361
362 if not self._extfileobj:
363 self.fileobj.close()
364
365 self.closed = True
366
367 def _init_read_gz(self):
368 """Initialize for reading a gzip compressed fileobj.
369 """
370 self.cmp = self.zlib.decompressobj(-self.zlib.MAX_WBITS)
371 self.dbuf = ""
372
373 # taken from gzip.GzipFile with some alterations
374 if self.__read(2) != "\037\213":
375 raise ReadError, "not a gzip file"
376 if self.__read(1) != "\010":
377 raise CompressionError, "unsupported compression method"
378
379 flag = ord(self.__read(1))
380 self.__read(6)
381
382 if flag & 4:
383 xlen = ord(self.__read(1)) + 256 * ord(self.__read(1))
384 self.read(xlen)
385 if flag & 8:
386 while True:
387 s = self.__read(1)
388 if not s or s == NUL:
389 break
390 if flag & 16:
391 while True:
392 s = self.__read(1)
393 if not s or s == NUL:
394 break
395 if flag & 2:
396 self.__read(2)
397
398 def tell(self):
399 """Return the stream's file pointer position.
400 """
401 return self.pos
402
403 def seek(self, pos=0):
404 """Set the stream's file pointer to pos. Negative seeking
405 is forbidden.
406 """
407 if pos - self.pos >= 0:
408 blocks, remainder = divmod(pos - self.pos, self.bufsize)
409 for i in xrange(blocks):
410 self.read(self.bufsize)
411 self.read(remainder)
412 else:
413 raise StreamError, "seeking backwards is not allowed"
414 return self.pos
415
416 def read(self, size=None):
417 """Return the next size number of bytes from the stream.
418 If size is not defined, return all bytes of the stream
419 up to EOF.
420 """
421 if size is None:
422 t = []
423 while True:
424 buf = self._read(self.bufsize)
425 if not buf:
426 break
427 t.append(buf)
428 buf = "".join(t)
429 else:
430 buf = self._read(size)
431 self.pos += len(buf)
432 return buf
433
434 def _read(self, size):
435 """Return size bytes from the stream.
436 """
437 if self.type == "tar":
438 return self.__read(size)
439
440 c = len(self.dbuf)
441 t = [self.dbuf]
442 while c < size:
443 buf = self.__read(self.bufsize)
444 if not buf:
445 break
446 buf = self.cmp.decompress(buf)
447 t.append(buf)
448 c += len(buf)
449 t = "".join(t)
450 self.dbuf = t[size:]
451 return t[:size]
452
453 def __read(self, size):
454 """Return size bytes from stream. If internal buffer is empty,
455 read another block from the stream.
456 """
457 c = len(self.buf)
458 t = [self.buf]
459 while c < size:
460 buf = self.fileobj.read(self.bufsize)
461 if not buf:
462 break
463 t.append(buf)
464 c += len(buf)
465 t = "".join(t)
466 self.buf = t[size:]
467 return t[:size]
468# class _Stream
469
470#------------------------
471# Extraction file object
472#------------------------
473class ExFileObject(object):
474 """File-like object for reading an archive member.
475 Is returned by TarFile.extractfile(). Support for
476 sparse files included.
477 """
478
479 def __init__(self, tarfile, tarinfo):
480 self.fileobj = tarfile.fileobj
481 self.name = tarinfo.name
482 self.mode = "r"
483 self.closed = False
484 self.offset = tarinfo.offset_data
485 self.size = tarinfo.size
486 self.pos = 0L
487 self.linebuffer = ""
488 if tarinfo.issparse():
489 self.sparse = tarinfo.sparse
490 self.read = self._readsparse
491 else:
492 self.read = self._readnormal
493
494 def __read(self, size):
495 """Overloadable read method.
496 """
497 return self.fileobj.read(size)
498
499 def readline(self, size=-1):
500 """Read a line with approx. size. If size is negative,
501 read a whole line. readline() and read() must not
502 be mixed up (!).
503 """
504 if size < 0:
505 size = sys.maxint
506
507 nl = self.linebuffer.find("\n")
508 if nl >= 0:
509 nl = min(nl, size)
510 else:
511 size -= len(self.linebuffer)
512 while nl < 0:
513 buf = self.read(min(size, 100))
514 if not buf:
515 break
516 self.linebuffer += buf
517 size -= len(buf)
518 if size <= 0:
519 break
520 nl = self.linebuffer.find("\n")
521 if nl == -1:
522 s = self.linebuffer
523 self.linebuffer = ""
524 return s
525 buf = self.linebuffer[:nl]
526 self.linebuffer = self.linebuffer[nl + 1:]
527 while buf[-1:] == "\r":
528 buf = buf[:-1]
529 return buf + "\n"
530
531 def readlines(self):
532 """Return a list with all (following) lines.
533 """
534 result = []
535 while True:
536 line = self.readline()
537 if not line: break
538 result.append(line)
539 return result
540
541 def _readnormal(self, size=None):
542 """Read operation for regular files.
543 """
544 if self.closed:
545 raise ValueError, "file is closed"
546 self.fileobj.seek(self.offset + self.pos)
547 bytesleft = self.size - self.pos
548 if size is None:
549 bytestoread = bytesleft
550 else:
551 bytestoread = min(size, bytesleft)
552 self.pos += bytestoread
553 return self.__read(bytestoread)
554
555 def _readsparse(self, size=None):
556 """Read operation for sparse files.
557 """
558 if self.closed:
559 raise ValueError, "file is closed"
560
561 if size is None:
562 size = self.size - self.pos
563
564 data = []
565 while size > 0:
566 buf = self._readsparsesection(size)
567 if not buf:
568 break
569 size -= len(buf)
570 data.append(buf)
571 return "".join(data)
572
573 def _readsparsesection(self, size):
574 """Read a single section of a sparse file.
575 """
576 section = self.sparse.find(self.pos)
577
578 if section is None:
579 return ""
580
581 toread = min(size, section.offset + section.size - self.pos)
582 if isinstance(section, _data):
583 realpos = section.realpos + self.pos - section.offset
584 self.pos += toread
585 self.fileobj.seek(self.offset + realpos)
586 return self.__read(toread)
587 else:
588 self.pos += toread
589 return NUL * toread
590
591 def tell(self):
592 """Return the current file position.
593 """
594 return self.pos
595
596 def seek(self, pos, whence=0):
597 """Seek to a position in the file.
598 """
599 self.linebuffer = ""
600 if whence == 0:
601 self.pos = min(max(pos, 0), self.size)
602 if whence == 1:
603 if pos < 0:
604 self.pos = max(self.pos + pos, 0)
605 else:
606 self.pos = min(self.pos + pos, self.size)
607 if whence == 2:
608 self.pos = max(min(self.size + pos, self.size), 0)
609
610 def close(self):
611 """Close the file object.
612 """
613 self.closed = True
614#class ExFileObject
615
616#------------------
617# Exported Classes
618#------------------
619class TarInfo(object):
620 """Informational class which holds the details about an
621 archive member given by a tar header block.
622 TarInfo objects are returned by TarFile.getmember(),
623 TarFile.getmembers() and TarFile.gettarinfo() and are
624 usually created internally.
625 """
626
627 def __init__(self, name=""):
628 """Construct a TarInfo object. name is the optional name
629 of the member.
630 """
631
632 self.name = name # member name (dirnames must end with '/')
633 self.mode = 0666 # file permissions
634 self.uid = 0 # user id
635 self.gid = 0 # group id
636 self.size = 0 # file size
637 self.mtime = 0 # modification time
638 self.chksum = 0 # header checksum
639 self.type = REGTYPE # member type
640 self.linkname = "" # link name
641 self.uname = "user" # user name
642 self.gname = "group" # group name
643 self.devmajor = 0 #-
644 self.devminor = 0 #-for use with CHRTYPE and BLKTYPE
645 self.prefix = "" # prefix to filename or holding information
646 # about sparse files
647
648 self.offset = 0 # the tar header starts here
649 self.offset_data = 0 # the file's data starts here
650
651 def __repr__(self):
652 return "<%s %r at %#x>" % (self.__class__.__name__,self.name,id(self))
653
654 def frombuf(cls, buf):
655 """Construct a TarInfo object from a 512 byte string buffer.
656 """
657 tarinfo = cls()
658 tarinfo.name = nts(buf[0:100])
659 tarinfo.mode = int(buf[100:108], 8)
660 tarinfo.uid = int(buf[108:116],8)
661 tarinfo.gid = int(buf[116:124],8)
662 tarinfo.size = long(buf[124:136], 8)
663 tarinfo.mtime = long(buf[136:148], 8)
664 tarinfo.chksum = int(buf[148:156], 8)
665 tarinfo.type = buf[156:157]
666 tarinfo.linkname = nts(buf[157:257])
667 tarinfo.uname = nts(buf[265:297])
668 tarinfo.gname = nts(buf[297:329])
669 try:
670 tarinfo.devmajor = int(buf[329:337], 8)
671 tarinfo.devminor = int(buf[337:345], 8)
672 except ValueError:
673 tarinfo.devmajor = tarinfo.devmajor = 0
Jack Jansen2b4b5a52003-04-22 22:03:11 +0000674 tarinfo.prefix = buf[345:500]
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000675
676 # The prefix field is used for filenames > 100 in
677 # the POSIX standard.
Jack Jansen2b4b5a52003-04-22 22:03:11 +0000678 # name = prefix + '/' + name
679 if tarinfo.type != GNUTYPE_SPARSE:
680 tarinfo.name = normpath(os.path.join(nts(tarinfo.prefix), tarinfo.name))
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000681
682 # Directory names should have a '/' at the end.
683 if tarinfo.isdir() and tarinfo.name[-1:] != "/":
684 tarinfo.name += "/"
685 return tarinfo
686
687 frombuf = classmethod(frombuf)
688
689 def tobuf(self):
690 """Return a tar header block as a 512 byte string.
691 """
692 name = self.name
693
694 # The following code was contributed by Detlef Lannert.
695 parts = []
696 for value, fieldsize in (
697 (name, 100),
698 ("%07o" % (self.mode & 07777), 8),
699 ("%07o" % self.uid, 8),
700 ("%07o" % self.gid, 8),
701 ("%011o" % self.size, 12),
702 ("%011o" % self.mtime, 12),
703 (" ", 8),
704 (self.type, 1),
705 (self.linkname, 100),
706 (MAGIC, 6),
707 (VERSION, 2),
708 (self.uname, 32),
709 (self.gname, 32),
710 ("%07o" % self.devmajor, 8),
711 ("%07o" % self.devminor, 8),
712 (self.prefix, 155)
713 ):
714 l = len(value)
Andrew M. Kuchling864bba12004-07-10 22:02:11 +0000715 parts.append(value[:fieldsize] + (fieldsize - l) * NUL)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000716
717 buf = "".join(parts)
718 chksum = calc_chksum(buf)
719 buf = buf[:148] + "%06o\0" % chksum + buf[155:]
720 buf += (BLOCKSIZE - len(buf)) * NUL
721 self.buf = buf
722 return buf
723
724 def isreg(self):
725 return self.type in REGULAR_TYPES
726 def isfile(self):
727 return self.isreg()
728 def isdir(self):
729 return self.type == DIRTYPE
730 def issym(self):
731 return self.type == SYMTYPE
732 def islnk(self):
733 return self.type == LNKTYPE
734 def ischr(self):
735 return self.type == CHRTYPE
736 def isblk(self):
737 return self.type == BLKTYPE
738 def isfifo(self):
739 return self.type == FIFOTYPE
740 def issparse(self):
741 return self.type == GNUTYPE_SPARSE
742 def isdev(self):
743 return self.type in (CHRTYPE, BLKTYPE, FIFOTYPE)
744# class TarInfo
745
746class TarFile(object):
747 """The TarFile Class provides an interface to tar archives.
748 """
749
750 debug = 0 # May be set from 0 (no msgs) to 3 (all msgs)
751
752 dereference = False # If true, add content of linked file to the
753 # tar file, else the link.
754
755 ignore_zeros = False # If true, skips empty or invalid blocks and
756 # continues processing.
757
758 errorlevel = 0 # If 0, fatal errors only appear in debug
759 # messages (if debug >= 0). If > 0, errors
760 # are passed to the caller as exceptions.
761
762 posix = True # If True, generates POSIX.1-1990-compliant
763 # archives (no GNU extensions!)
764
765 fileobject = ExFileObject
766
767 def __init__(self, name=None, mode="r", fileobj=None):
768 """Open an (uncompressed) tar archive `name'. `mode' is either 'r' to
769 read from an existing archive, 'a' to append data to an existing
770 file or 'w' to create a new file overwriting an existing one. `mode'
771 defaults to 'r'.
772 If `fileobj' is given, it is used for reading or writing data. If it
773 can be determined, `mode' is overridden by `fileobj's mode.
774 `fileobj' is not closed, when TarFile is closed.
775 """
776 self.name = name
777
778 if len(mode) > 1 or mode not in "raw":
779 raise ValueError, "mode must be 'r', 'a' or 'w'"
780 self._mode = mode
781 self.mode = {"r": "rb", "a": "r+b", "w": "wb"}[mode]
782
783 if not fileobj:
784 fileobj = file(self.name, self.mode)
785 self._extfileobj = False
786 else:
787 if self.name is None and hasattr(fileobj, "name"):
788 self.name = fileobj.name
789 if hasattr(fileobj, "mode"):
790 self.mode = fileobj.mode
791 self._extfileobj = True
792 self.fileobj = fileobj
793
794 # Init datastructures
795 self.closed = False
796 self.members = [] # list of members as TarInfo objects
797 self.membernames = [] # names of members
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000798 self._loaded = False # flag if all members have been read
799 self.offset = 0L # current position in the archive file
800 self.inodes = {} # dictionary caching the inodes of
801 # archive members already added
802
803 if self._mode == "r":
804 self.firstmember = None
805 self.firstmember = self.next()
806
807 if self._mode == "a":
808 # Move to the end of the archive,
809 # before the first empty block.
810 self.firstmember = None
811 while True:
812 try:
813 tarinfo = self.next()
814 except ReadError:
815 self.fileobj.seek(0)
816 break
817 if tarinfo is None:
818 self.fileobj.seek(- BLOCKSIZE, 1)
819 break
820
821 if self._mode in "aw":
822 self._loaded = True
823
824 #--------------------------------------------------------------------------
825 # Below are the classmethods which act as alternate constructors to the
826 # TarFile class. The open() method is the only one that is needed for
827 # public use; it is the "super"-constructor and is able to select an
828 # adequate "sub"-constructor for a particular compression using the mapping
829 # from OPEN_METH.
830 #
831 # This concept allows one to subclass TarFile without losing the comfort of
832 # the super-constructor. A sub-constructor is registered and made available
833 # by adding it to the mapping in OPEN_METH.
834
835 def open(cls, name=None, mode="r", fileobj=None, bufsize=20*512):
836 """Open a tar archive for reading, writing or appending. Return
837 an appropriate TarFile class.
838
839 mode:
840 'r' open for reading with transparent compression
841 'r:' open for reading exclusively uncompressed
842 'r:gz' open for reading with gzip compression
843 'r:bz2' open for reading with bzip2 compression
844 'a' or 'a:' open for appending
845 'w' or 'w:' open for writing without compression
846 'w:gz' open for writing with gzip compression
847 'w:bz2' open for writing with bzip2 compression
848 'r|' open an uncompressed stream of tar blocks for reading
849 'r|gz' open a gzip compressed stream of tar blocks
850 'r|bz2' open a bzip2 compressed stream of tar blocks
851 'w|' open an uncompressed stream for writing
852 'w|gz' open a gzip compressed stream for writing
853 'w|bz2' open a bzip2 compressed stream for writing
854 """
855
856 if not name and not fileobj:
857 raise ValueError, "nothing to open"
858
859 if ":" in mode:
860 filemode, comptype = mode.split(":", 1)
861 filemode = filemode or "r"
862 comptype = comptype or "tar"
863
864 # Select the *open() function according to
865 # given compression.
866 if comptype in cls.OPEN_METH:
867 func = getattr(cls, cls.OPEN_METH[comptype])
868 else:
869 raise CompressionError, "unknown compression type %r" % comptype
870 return func(name, filemode, fileobj)
871
872 elif "|" in mode:
873 filemode, comptype = mode.split("|", 1)
874 filemode = filemode or "r"
875 comptype = comptype or "tar"
876
877 if filemode not in "rw":
878 raise ValueError, "mode must be 'r' or 'w'"
879
880 t = cls(name, filemode,
881 _Stream(name, filemode, comptype, fileobj, bufsize))
882 t._extfileobj = False
883 return t
884
885 elif mode == "r":
886 # Find out which *open() is appropriate for opening the file.
887 for comptype in cls.OPEN_METH:
888 func = getattr(cls, cls.OPEN_METH[comptype])
889 try:
890 return func(name, "r", fileobj)
891 except (ReadError, CompressionError):
892 continue
893 raise ReadError, "file could not be opened successfully"
894
895 elif mode in "aw":
896 return cls.taropen(name, mode, fileobj)
897
898 raise ValueError, "undiscernible mode"
899
900 open = classmethod(open)
901
902 def taropen(cls, name, mode="r", fileobj=None):
903 """Open uncompressed tar archive name for reading or writing.
904 """
905 if len(mode) > 1 or mode not in "raw":
906 raise ValueError, "mode must be 'r', 'a' or 'w'"
907 return cls(name, mode, fileobj)
908
909 taropen = classmethod(taropen)
910
911 def gzopen(cls, name, mode="r", fileobj=None, compresslevel=9):
912 """Open gzip compressed tar archive name for reading or writing.
913 Appending is not allowed.
914 """
915 if len(mode) > 1 or mode not in "rw":
916 raise ValueError, "mode must be 'r' or 'w'"
917
918 try:
919 import gzip
Neal Norwitz4ec68242003-04-11 03:05:56 +0000920 gzip.GzipFile
921 except (ImportError, AttributeError):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000922 raise CompressionError, "gzip module is not available"
923
924 pre, ext = os.path.splitext(name)
925 pre = os.path.basename(pre)
926 if ext == ".tgz":
927 ext = ".tar"
928 if ext == ".gz":
929 ext = ""
930 tarname = pre + ext
931
932 if fileobj is None:
933 fileobj = file(name, mode + "b")
934
935 if mode != "r":
936 name = tarname
937
938 try:
939 t = cls.taropen(tarname, mode,
940 gzip.GzipFile(name, mode, compresslevel, fileobj)
941 )
942 except IOError:
943 raise ReadError, "not a gzip file"
944 t._extfileobj = False
945 return t
946
947 gzopen = classmethod(gzopen)
948
949 def bz2open(cls, name, mode="r", fileobj=None, compresslevel=9):
950 """Open bzip2 compressed tar archive name for reading or writing.
951 Appending is not allowed.
952 """
953 if len(mode) > 1 or mode not in "rw":
954 raise ValueError, "mode must be 'r' or 'w'."
955
956 try:
957 import bz2
958 except ImportError:
959 raise CompressionError, "bz2 module is not available"
960
961 pre, ext = os.path.splitext(name)
962 pre = os.path.basename(pre)
963 if ext == ".tbz2":
964 ext = ".tar"
965 if ext == ".bz2":
966 ext = ""
967 tarname = pre + ext
968
969 if fileobj is not None:
970 raise ValueError, "no support for external file objects"
971
972 try:
973 t = cls.taropen(tarname, mode, bz2.BZ2File(name, mode, compresslevel=compresslevel))
974 except IOError:
975 raise ReadError, "not a bzip2 file"
976 t._extfileobj = False
977 return t
978
979 bz2open = classmethod(bz2open)
980
981 # All *open() methods are registered here.
982 OPEN_METH = {
983 "tar": "taropen", # uncompressed tar
984 "gz": "gzopen", # gzip compressed tar
985 "bz2": "bz2open" # bzip2 compressed tar
986 }
987
988 #--------------------------------------------------------------------------
989 # The public methods which TarFile provides:
990
991 def close(self):
992 """Close the TarFile. In write-mode, two finishing zero blocks are
993 appended to the archive.
994 """
995 if self.closed:
996 return
997
998 if self._mode in "aw":
999 self.fileobj.write(NUL * (BLOCKSIZE * 2))
1000 self.offset += (BLOCKSIZE * 2)
1001 # fill up the end with zero-blocks
1002 # (like option -b20 for tar does)
1003 blocks, remainder = divmod(self.offset, RECORDSIZE)
1004 if remainder > 0:
1005 self.fileobj.write(NUL * (RECORDSIZE - remainder))
1006
1007 if not self._extfileobj:
1008 self.fileobj.close()
1009 self.closed = True
1010
1011 def getmember(self, name):
1012 """Return a TarInfo object for member `name'. If `name' can not be
1013 found in the archive, KeyError is raised. If a member occurs more
1014 than once in the archive, its last occurence is assumed to be the
1015 most up-to-date version.
1016 """
1017 self._check()
1018 if name not in self.membernames and not self._loaded:
1019 self._load()
1020 if name not in self.membernames:
1021 raise KeyError, "filename %r not found" % name
1022 return self._getmember(name)
1023
1024 def getmembers(self):
1025 """Return the members of the archive as a list of TarInfo objects. The
1026 list has the same order as the members in the archive.
1027 """
1028 self._check()
1029 if not self._loaded: # if we want to obtain a list of
1030 self._load() # all members, we first have to
1031 # scan the whole archive.
1032 return self.members
1033
1034 def getnames(self):
1035 """Return the members of the archive as a list of their names. It has
1036 the same order as the list returned by getmembers().
1037 """
1038 self._check()
1039 if not self._loaded:
1040 self._load()
1041 return self.membernames
1042
1043 def gettarinfo(self, name=None, arcname=None, fileobj=None):
1044 """Create a TarInfo object for either the file `name' or the file
1045 object `fileobj' (using os.fstat on its file descriptor). You can
1046 modify some of the TarInfo's attributes before you add it using
1047 addfile(). If given, `arcname' specifies an alternative name for the
1048 file in the archive.
1049 """
1050 self._check("aw")
1051
1052 # When fileobj is given, replace name by
1053 # fileobj's real name.
1054 if fileobj is not None:
1055 name = fileobj.name
1056
1057 # Building the name of the member in the archive.
1058 # Backward slashes are converted to forward slashes,
1059 # Absolute paths are turned to relative paths.
1060 if arcname is None:
1061 arcname = name
1062 arcname = normpath(arcname)
1063 drv, arcname = os.path.splitdrive(arcname)
1064 while arcname[0:1] == "/":
1065 arcname = arcname[1:]
1066
1067 # Now, fill the TarInfo object with
1068 # information specific for the file.
1069 tarinfo = TarInfo()
1070
1071 # Use os.stat or os.lstat, depending on platform
1072 # and if symlinks shall be resolved.
1073 if fileobj is None:
1074 if hasattr(os, "lstat") and not self.dereference:
1075 statres = os.lstat(name)
1076 else:
1077 statres = os.stat(name)
1078 else:
1079 statres = os.fstat(fileobj.fileno())
1080 linkname = ""
1081
1082 stmd = statres.st_mode
1083 if stat.S_ISREG(stmd):
1084 inode = (statres.st_ino, statres.st_dev)
1085 if inode in self.inodes and not self.dereference:
1086 # Is it a hardlink to an already
1087 # archived file?
1088 type = LNKTYPE
1089 linkname = self.inodes[inode]
1090 else:
1091 # The inode is added only if its valid.
1092 # For win32 it is always 0.
1093 type = REGTYPE
1094 if inode[0]:
1095 self.inodes[inode] = arcname
1096 elif stat.S_ISDIR(stmd):
1097 type = DIRTYPE
1098 if arcname[-1:] != "/":
1099 arcname += "/"
1100 elif stat.S_ISFIFO(stmd):
1101 type = FIFOTYPE
1102 elif stat.S_ISLNK(stmd):
1103 type = SYMTYPE
1104 linkname = os.readlink(name)
1105 elif stat.S_ISCHR(stmd):
1106 type = CHRTYPE
1107 elif stat.S_ISBLK(stmd):
1108 type = BLKTYPE
1109 else:
1110 return None
1111
1112 # Fill the TarInfo object with all
1113 # information we can get.
1114 tarinfo.name = arcname
1115 tarinfo.mode = stmd
1116 tarinfo.uid = statres.st_uid
1117 tarinfo.gid = statres.st_gid
1118 tarinfo.size = statres.st_size
1119 tarinfo.mtime = statres.st_mtime
1120 tarinfo.type = type
1121 tarinfo.linkname = linkname
1122 if pwd:
1123 try:
1124 tarinfo.uname = pwd.getpwuid(tarinfo.uid)[0]
1125 except KeyError:
1126 pass
1127 if grp:
1128 try:
1129 tarinfo.gname = grp.getgrgid(tarinfo.gid)[0]
1130 except KeyError:
1131 pass
1132
1133 if type in (CHRTYPE, BLKTYPE):
1134 if hasattr(os, "major") and hasattr(os, "minor"):
1135 tarinfo.devmajor = os.major(statres.st_rdev)
1136 tarinfo.devminor = os.minor(statres.st_rdev)
1137 return tarinfo
1138
1139 def list(self, verbose=True):
1140 """Print a table of contents to sys.stdout. If `verbose' is False, only
1141 the names of the members are printed. If it is True, an `ls -l'-like
1142 output is produced.
1143 """
1144 self._check()
1145
1146 for tarinfo in self:
1147 if verbose:
1148 print filemode(tarinfo.mode),
1149 print "%s/%s" % (tarinfo.uname or tarinfo.uid,
1150 tarinfo.gname or tarinfo.gid),
1151 if tarinfo.ischr() or tarinfo.isblk():
1152 print "%10s" % ("%d,%d" \
1153 % (tarinfo.devmajor, tarinfo.devminor)),
1154 else:
1155 print "%10d" % tarinfo.size,
1156 print "%d-%02d-%02d %02d:%02d:%02d" \
1157 % time.localtime(tarinfo.mtime)[:6],
1158
1159 print tarinfo.name,
1160
1161 if verbose:
1162 if tarinfo.issym():
1163 print "->", tarinfo.linkname,
1164 if tarinfo.islnk():
1165 print "link to", tarinfo.linkname,
1166 print
1167
1168 def add(self, name, arcname=None, recursive=True):
1169 """Add the file `name' to the archive. `name' may be any type of file
1170 (directory, fifo, symbolic link, etc.). If given, `arcname'
1171 specifies an alternative name for the file in the archive.
1172 Directories are added recursively by default. This can be avoided by
1173 setting `recursive' to False.
1174 """
1175 self._check("aw")
1176
1177 if arcname is None:
1178 arcname = name
1179
1180 # Skip if somebody tries to archive the archive...
1181 if self.name is not None \
1182 and os.path.abspath(name) == os.path.abspath(self.name):
1183 self._dbg(2, "tarfile: Skipped %r" % name)
1184 return
1185
1186 # Special case: The user wants to add the current
1187 # working directory.
1188 if name == ".":
1189 if recursive:
1190 if arcname == ".":
1191 arcname = ""
1192 for f in os.listdir("."):
1193 self.add(f, os.path.join(arcname, f))
1194 return
1195
1196 self._dbg(1, name)
1197
1198 # Create a TarInfo object from the file.
1199 tarinfo = self.gettarinfo(name, arcname)
1200
1201 if tarinfo is None:
1202 self._dbg(1, "tarfile: Unsupported type %r" % name)
1203 return
1204
1205 # Append the tar header and data to the archive.
1206 if tarinfo.isreg():
1207 f = file(name, "rb")
1208 self.addfile(tarinfo, f)
1209 f.close()
1210
1211 if tarinfo.type in (LNKTYPE, SYMTYPE, FIFOTYPE, CHRTYPE, BLKTYPE):
1212 tarinfo.size = 0L
1213 self.addfile(tarinfo)
1214
1215 if tarinfo.isdir():
1216 self.addfile(tarinfo)
1217 if recursive:
1218 for f in os.listdir(name):
1219 self.add(os.path.join(name, f), os.path.join(arcname, f))
1220
1221 def addfile(self, tarinfo, fileobj=None):
1222 """Add the TarInfo object `tarinfo' to the archive. If `fileobj' is
1223 given, tarinfo.size bytes are read from it and added to the archive.
1224 You can create TarInfo objects using gettarinfo().
1225 On Windows platforms, `fileobj' should always be opened with mode
1226 'rb' to avoid irritation about the file size.
1227 """
1228 self._check("aw")
1229
1230 tarinfo.name = normpath(tarinfo.name)
1231 if tarinfo.isdir():
1232 # directories should end with '/'
1233 tarinfo.name += "/"
1234
1235 if tarinfo.linkname:
1236 tarinfo.linkname = normpath(tarinfo.linkname)
1237
1238 if tarinfo.size > MAXSIZE_MEMBER:
1239 raise ValueError, "file is too large (>8GB)"
1240
1241 if len(tarinfo.linkname) > LENGTH_LINK:
1242 if self.posix:
1243 raise ValueError, "linkname is too long (>%d)" \
1244 % (LENGTH_LINK)
1245 else:
1246 self._create_gnulong(tarinfo.linkname, GNUTYPE_LONGLINK)
1247 tarinfo.linkname = tarinfo.linkname[:LENGTH_LINK -1]
1248 self._dbg(2, "tarfile: Created GNU tar extension LONGLINK")
1249
1250 if len(tarinfo.name) > LENGTH_NAME:
1251 if self.posix:
1252 prefix = tarinfo.name[:LENGTH_PREFIX + 1]
1253 while prefix and prefix[-1] != "/":
Tim Peters2c60f7a2003-01-29 03:49:43 +00001254 prefix = prefix[:-1]
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001255
1256 name = tarinfo.name[len(prefix):]
1257 prefix = prefix[:-1]
1258
1259 if not prefix or len(name) > LENGTH_NAME:
1260 raise ValueError, "name is too long (>%d)" \
1261 % (LENGTH_NAME)
1262
1263 tarinfo.name = name
1264 tarinfo.prefix = prefix
1265 else:
1266 self._create_gnulong(tarinfo.name, GNUTYPE_LONGNAME)
1267 tarinfo.name = tarinfo.name[:LENGTH_NAME - 1]
1268 self._dbg(2, "tarfile: Created GNU tar extension LONGNAME")
1269
1270 self.fileobj.write(tarinfo.tobuf())
1271 self.offset += BLOCKSIZE
1272
1273 # If there's data to follow, append it.
1274 if fileobj is not None:
1275 copyfileobj(fileobj, self.fileobj, tarinfo.size)
1276 blocks, remainder = divmod(tarinfo.size, BLOCKSIZE)
1277 if remainder > 0:
1278 self.fileobj.write(NUL * (BLOCKSIZE - remainder))
1279 blocks += 1
1280 self.offset += blocks * BLOCKSIZE
1281
Andrew M. Kuchling864bba12004-07-10 22:02:11 +00001282 self._record_member(tarinfo)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001283
1284 def extract(self, member, path=""):
1285 """Extract a member from the archive to the current working directory,
1286 using its full name. Its file information is extracted as accurately
1287 as possible. `member' may be a filename or a TarInfo object. You can
1288 specify a different directory using `path'.
1289 """
1290 self._check("r")
1291
1292 if isinstance(member, TarInfo):
1293 tarinfo = member
1294 else:
1295 tarinfo = self.getmember(member)
1296
1297 try:
1298 self._extract_member(tarinfo, os.path.join(path, tarinfo.name))
1299 except EnvironmentError, e:
1300 if self.errorlevel > 0:
1301 raise
1302 else:
1303 if e.filename is None:
1304 self._dbg(1, "tarfile: %s" % e.strerror)
1305 else:
1306 self._dbg(1, "tarfile: %s %r" % (e.strerror, e.filename))
1307 except ExtractError, e:
1308 if self.errorlevel > 1:
1309 raise
1310 else:
1311 self._dbg(1, "tarfile: %s" % e)
1312
1313 def extractfile(self, member):
1314 """Extract a member from the archive as a file object. `member' may be
1315 a filename or a TarInfo object. If `member' is a regular file, a
1316 file-like object is returned. If `member' is a link, a file-like
1317 object is constructed from the link's target. If `member' is none of
1318 the above, None is returned.
1319 The file-like object is read-only and provides the following
1320 methods: read(), readline(), readlines(), seek() and tell()
1321 """
1322 self._check("r")
1323
1324 if isinstance(member, TarInfo):
1325 tarinfo = member
1326 else:
1327 tarinfo = self.getmember(member)
1328
1329 if tarinfo.isreg():
1330 return self.fileobject(self, tarinfo)
1331
1332 elif tarinfo.type not in SUPPORTED_TYPES:
1333 # If a member's type is unknown, it is treated as a
1334 # regular file.
1335 return self.fileobject(self, tarinfo)
1336
1337 elif tarinfo.islnk() or tarinfo.issym():
1338 if isinstance(self.fileobj, _Stream):
1339 # A small but ugly workaround for the case that someone tries
1340 # to extract a (sym)link as a file-object from a non-seekable
1341 # stream of tar blocks.
1342 raise StreamError, "cannot extract (sym)link as file object"
1343 else:
1344 # A (sym)link's file object is it's target's file object.
1345 return self.extractfile(self._getmember(tarinfo.linkname,
1346 tarinfo))
1347 else:
1348 # If there's no data associated with the member (directory, chrdev,
1349 # blkdev, etc.), return None instead of a file object.
1350 return None
1351
1352 def _extract_member(self, tarinfo, targetpath):
1353 """Extract the TarInfo object tarinfo to a physical
1354 file called targetpath.
1355 """
1356 # Fetch the TarInfo object for the given name
1357 # and build the destination pathname, replacing
1358 # forward slashes to platform specific separators.
1359 if targetpath[-1:] == "/":
1360 targetpath = targetpath[:-1]
1361 targetpath = os.path.normpath(targetpath)
1362
1363 # Create all upper directories.
1364 upperdirs = os.path.dirname(targetpath)
1365 if upperdirs and not os.path.exists(upperdirs):
1366 ti = TarInfo()
1367 ti.name = upperdirs
1368 ti.type = DIRTYPE
1369 ti.mode = 0777
1370 ti.mtime = tarinfo.mtime
1371 ti.uid = tarinfo.uid
1372 ti.gid = tarinfo.gid
1373 ti.uname = tarinfo.uname
1374 ti.gname = tarinfo.gname
1375 try:
1376 self._extract_member(ti, ti.name)
1377 except:
1378 pass
1379
1380 if tarinfo.islnk() or tarinfo.issym():
1381 self._dbg(1, "%s -> %s" % (tarinfo.name, tarinfo.linkname))
1382 else:
1383 self._dbg(1, tarinfo.name)
1384
1385 if tarinfo.isreg():
1386 self.makefile(tarinfo, targetpath)
1387 elif tarinfo.isdir():
1388 self.makedir(tarinfo, targetpath)
1389 elif tarinfo.isfifo():
1390 self.makefifo(tarinfo, targetpath)
1391 elif tarinfo.ischr() or tarinfo.isblk():
1392 self.makedev(tarinfo, targetpath)
1393 elif tarinfo.islnk() or tarinfo.issym():
1394 self.makelink(tarinfo, targetpath)
1395 elif tarinfo.type not in SUPPORTED_TYPES:
1396 self.makeunknown(tarinfo, targetpath)
1397 else:
1398 self.makefile(tarinfo, targetpath)
1399
1400 self.chown(tarinfo, targetpath)
1401 if not tarinfo.issym():
1402 self.chmod(tarinfo, targetpath)
1403 self.utime(tarinfo, targetpath)
1404
1405 #--------------------------------------------------------------------------
1406 # Below are the different file methods. They are called via
1407 # _extract_member() when extract() is called. They can be replaced in a
1408 # subclass to implement other functionality.
1409
1410 def makedir(self, tarinfo, targetpath):
1411 """Make a directory called targetpath.
1412 """
1413 try:
1414 os.mkdir(targetpath)
1415 except EnvironmentError, e:
1416 if e.errno != errno.EEXIST:
1417 raise
1418
1419 def makefile(self, tarinfo, targetpath):
1420 """Make a file called targetpath.
1421 """
1422 source = self.extractfile(tarinfo)
1423 target = file(targetpath, "wb")
1424 copyfileobj(source, target)
1425 source.close()
1426 target.close()
1427
1428 def makeunknown(self, tarinfo, targetpath):
1429 """Make a file from a TarInfo object with an unknown type
1430 at targetpath.
1431 """
1432 self.makefile(tarinfo, targetpath)
1433 self._dbg(1, "tarfile: Unknown file type %r, " \
1434 "extracted as regular file." % tarinfo.type)
1435
1436 def makefifo(self, tarinfo, targetpath):
1437 """Make a fifo called targetpath.
1438 """
1439 if hasattr(os, "mkfifo"):
1440 os.mkfifo(targetpath)
1441 else:
1442 raise ExtractError, "fifo not supported by system"
1443
1444 def makedev(self, tarinfo, targetpath):
1445 """Make a character or block device called targetpath.
1446 """
1447 if not hasattr(os, "mknod") or not hasattr(os, "makedev"):
1448 raise ExtractError, "special devices not supported by system"
1449
1450 mode = tarinfo.mode
1451 if tarinfo.isblk():
1452 mode |= stat.S_IFBLK
1453 else:
1454 mode |= stat.S_IFCHR
1455
1456 os.mknod(targetpath, mode,
1457 os.makedev(tarinfo.devmajor, tarinfo.devminor))
1458
1459 def makelink(self, tarinfo, targetpath):
1460 """Make a (symbolic) link called targetpath. If it cannot be created
1461 (platform limitation), we try to make a copy of the referenced file
1462 instead of a link.
1463 """
1464 linkpath = tarinfo.linkname
1465 try:
1466 if tarinfo.issym():
1467 os.symlink(linkpath, targetpath)
1468 else:
1469 os.link(linkpath, targetpath)
1470 except AttributeError:
1471 if tarinfo.issym():
1472 linkpath = os.path.join(os.path.dirname(tarinfo.name),
1473 linkpath)
1474 linkpath = normpath(linkpath)
1475
1476 try:
1477 self._extract_member(self.getmember(linkpath), targetpath)
1478 except (EnvironmentError, KeyError), e:
1479 linkpath = os.path.normpath(linkpath)
1480 try:
1481 shutil.copy2(linkpath, targetpath)
1482 except EnvironmentError, e:
1483 raise IOError, "link could not be created"
1484
1485 def chown(self, tarinfo, targetpath):
1486 """Set owner of targetpath according to tarinfo.
1487 """
1488 if pwd and hasattr(os, "geteuid") and os.geteuid() == 0:
1489 # We have to be root to do so.
1490 try:
1491 g = grp.getgrnam(tarinfo.gname)[2]
1492 except KeyError:
1493 try:
1494 g = grp.getgrgid(tarinfo.gid)[2]
1495 except KeyError:
1496 g = os.getgid()
1497 try:
1498 u = pwd.getpwnam(tarinfo.uname)[2]
1499 except KeyError:
1500 try:
1501 u = pwd.getpwuid(tarinfo.uid)[2]
1502 except KeyError:
1503 u = os.getuid()
1504 try:
1505 if tarinfo.issym() and hasattr(os, "lchown"):
1506 os.lchown(targetpath, u, g)
1507 else:
Andrew MacIntyre7970d202003-02-19 12:51:34 +00001508 if sys.platform != "os2emx":
1509 os.chown(targetpath, u, g)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001510 except EnvironmentError, e:
1511 raise ExtractError, "could not change owner"
1512
1513 def chmod(self, tarinfo, targetpath):
1514 """Set file permissions of targetpath according to tarinfo.
1515 """
Jack Jansen834eff62003-03-07 12:47:06 +00001516 if hasattr(os, 'chmod'):
1517 try:
1518 os.chmod(targetpath, tarinfo.mode)
1519 except EnvironmentError, e:
1520 raise ExtractError, "could not change mode"
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001521
1522 def utime(self, tarinfo, targetpath):
1523 """Set modification time of targetpath according to tarinfo.
1524 """
Jack Jansen834eff62003-03-07 12:47:06 +00001525 if not hasattr(os, 'utime'):
Tim Petersf9347782003-03-07 15:36:41 +00001526 return
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001527 if sys.platform == "win32" and tarinfo.isdir():
1528 # According to msdn.microsoft.com, it is an error (EACCES)
1529 # to use utime() on directories.
1530 return
1531 try:
1532 os.utime(targetpath, (tarinfo.mtime, tarinfo.mtime))
1533 except EnvironmentError, e:
1534 raise ExtractError, "could not change modification time"
1535
1536 #--------------------------------------------------------------------------
1537
1538 def next(self):
1539 """Return the next member of the archive as a TarInfo object, when
1540 TarFile is opened for reading. Return None if there is no more
1541 available.
1542 """
1543 self._check("ra")
1544 if self.firstmember is not None:
1545 m = self.firstmember
1546 self.firstmember = None
1547 return m
1548
1549 # Read the next block.
Andrew M. Kuchling864bba12004-07-10 22:02:11 +00001550 self.fileobj.seek(self.offset)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001551 while True:
1552 buf = self.fileobj.read(BLOCKSIZE)
1553 if not buf:
1554 return None
1555 try:
1556 tarinfo = TarInfo.frombuf(buf)
1557 except ValueError:
1558 if self.ignore_zeros:
1559 if buf.count(NUL) == BLOCKSIZE:
1560 adj = "empty"
1561 else:
1562 adj = "invalid"
1563 self._dbg(2, "0x%X: %s block" % (self.offset, adj))
1564 self.offset += BLOCKSIZE
1565 continue
1566 else:
1567 # Block is empty or unreadable.
Andrew M. Kuchling864bba12004-07-10 22:02:11 +00001568 if self.offset == 0:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001569 # If the first block is invalid. That does not
1570 # look like a tar archive we can handle.
1571 raise ReadError,"empty, unreadable or compressed file"
1572 return None
1573 break
1574
1575 # We shouldn't rely on this checksum, because some tar programs
1576 # calculate it differently and it is merely validating the
1577 # header block. We could just as well skip this part, which would
1578 # have a slight effect on performance...
1579 if tarinfo.chksum != calc_chksum(buf):
1580 self._dbg(1, "tarfile: Bad Checksum %r" % tarinfo.name)
1581
1582 # Set the TarInfo object's offset to the current position of the
1583 # TarFile and set self.offset to the position where the data blocks
1584 # should begin.
1585 tarinfo.offset = self.offset
1586 self.offset += BLOCKSIZE
1587
1588 # Check if the TarInfo object has a typeflag for which a callback
1589 # method is registered in the TYPE_METH. If so, then call it.
1590 if tarinfo.type in self.TYPE_METH:
Andrew M. Kuchling864bba12004-07-10 22:02:11 +00001591 return self.TYPE_METH[tarinfo.type](self, tarinfo)
1592
1593 tarinfo.offset_data = self.offset
1594 if tarinfo.isreg() or tarinfo.type not in SUPPORTED_TYPES:
1595 # Skip the following data blocks.
1596 self.offset += self._block(tarinfo.size)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001597
1598 if tarinfo.isreg() and tarinfo.name[:-1] == "/":
1599 # some old tar programs don't know DIRTYPE
1600 tarinfo.type = DIRTYPE
1601
Andrew M. Kuchling864bba12004-07-10 22:02:11 +00001602 self._record_member(tarinfo)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001603 return tarinfo
1604
1605 #--------------------------------------------------------------------------
1606 # Below are some methods which are called for special typeflags in the
1607 # next() method, e.g. for unwrapping GNU longname/longlink blocks. They
1608 # are registered in TYPE_METH below. You can register your own methods
1609 # with this mapping.
1610 # A registered method is called with a TarInfo object as only argument.
1611 #
1612 # During its execution the method MUST perform the following tasks:
1613 # 1. set tarinfo.offset_data to the position where the data blocks begin,
1614 # if there is data to follow.
1615 # 2. set self.offset to the position where the next member's header will
1616 # begin.
Andrew M. Kuchling864bba12004-07-10 22:02:11 +00001617 # 3. call self._record_member() if the tarinfo object is supposed to
1618 # appear as a member of the TarFile object.
1619 # 4. return tarinfo or another valid TarInfo object.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001620
1621 def proc_gnulong(self, tarinfo):
1622 """Evaluate the blocks that hold a GNU longname
1623 or longlink member.
1624 """
1625 buf = ""
1626 name = None
1627 linkname = None
1628 count = tarinfo.size
1629 while count > 0:
1630 block = self.fileobj.read(BLOCKSIZE)
1631 buf += block
1632 self.offset += BLOCKSIZE
1633 count -= BLOCKSIZE
1634
Andrew M. Kuchling864bba12004-07-10 22:02:11 +00001635 # Fetch the next header
1636 next = self.next()
1637
1638 next.offset = tarinfo.offset
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001639 if tarinfo.type == GNUTYPE_LONGNAME:
Andrew M. Kuchling864bba12004-07-10 22:02:11 +00001640 next.name = nts(buf)
1641 elif tarinfo.type == GNUTYPE_LONGLINK:
1642 next.linkname = nts(buf)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001643
Andrew M. Kuchling864bba12004-07-10 22:02:11 +00001644 return next
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001645
1646 def proc_sparse(self, tarinfo):
1647 """Analyze a GNU sparse header plus extra headers.
1648 """
1649 buf = tarinfo.tobuf()
1650 sp = _ringbuffer()
1651 pos = 386
1652 lastpos = 0L
1653 realpos = 0L
1654 # There are 4 possible sparse structs in the
1655 # first header.
1656 for i in xrange(4):
1657 try:
1658 offset = int(buf[pos:pos + 12], 8)
1659 numbytes = int(buf[pos + 12:pos + 24], 8)
1660 except ValueError:
1661 break
1662 if offset > lastpos:
1663 sp.append(_hole(lastpos, offset - lastpos))
1664 sp.append(_data(offset, numbytes, realpos))
1665 realpos += numbytes
1666 lastpos = offset + numbytes
1667 pos += 24
1668
1669 isextended = ord(buf[482])
1670 origsize = int(buf[483:495], 8)
1671
1672 # If the isextended flag is given,
1673 # there are extra headers to process.
1674 while isextended == 1:
1675 buf = self.fileobj.read(BLOCKSIZE)
1676 self.offset += BLOCKSIZE
1677 pos = 0
1678 for i in xrange(21):
1679 try:
1680 offset = int(buf[pos:pos + 12], 8)
1681 numbytes = int(buf[pos + 12:pos + 24], 8)
1682 except ValueError:
1683 break
1684 if offset > lastpos:
1685 sp.append(_hole(lastpos, offset - lastpos))
1686 sp.append(_data(offset, numbytes, realpos))
1687 realpos += numbytes
1688 lastpos = offset + numbytes
1689 pos += 24
1690 isextended = ord(buf[504])
1691
1692 if lastpos < origsize:
1693 sp.append(_hole(lastpos, origsize - lastpos))
1694
1695 tarinfo.sparse = sp
1696
1697 tarinfo.offset_data = self.offset
1698 self.offset += self._block(tarinfo.size)
1699 tarinfo.size = origsize
Andrew M. Kuchling864bba12004-07-10 22:02:11 +00001700
1701 self._record_member(tarinfo)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001702 return tarinfo
1703
1704 # The type mapping for the next() method. The keys are single character
1705 # strings, the typeflag. The values are methods which are called when
1706 # next() encounters such a typeflag.
1707 TYPE_METH = {
1708 GNUTYPE_LONGNAME: proc_gnulong,
1709 GNUTYPE_LONGLINK: proc_gnulong,
1710 GNUTYPE_SPARSE: proc_sparse
1711 }
1712
1713 #--------------------------------------------------------------------------
1714 # Little helper methods:
1715
1716 def _block(self, count):
1717 """Round up a byte count by BLOCKSIZE and return it,
1718 e.g. _block(834) => 1024.
1719 """
1720 blocks, remainder = divmod(count, BLOCKSIZE)
1721 if remainder:
1722 blocks += 1
1723 return blocks * BLOCKSIZE
1724
1725 def _getmember(self, name, tarinfo=None):
1726 """Find an archive member by name from bottom to top.
1727 If tarinfo is given, it is used as the starting point.
1728 """
1729 if tarinfo is None:
1730 end = len(self.members)
1731 else:
1732 end = self.members.index(tarinfo)
1733
1734 for i in xrange(end - 1, -1, -1):
1735 if name == self.membernames[i]:
1736 return self.members[i]
1737
Andrew M. Kuchling864bba12004-07-10 22:02:11 +00001738 def _record_member(self, tarinfo):
1739 """Record a tarinfo object in the internal datastructures.
1740 """
1741 self.members.append(tarinfo)
1742 self.membernames.append(tarinfo.name)
1743
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001744 def _load(self):
1745 """Read through the entire archive file and look for readable
1746 members.
1747 """
1748 while True:
1749 tarinfo = self.next()
1750 if tarinfo is None:
1751 break
1752 self._loaded = True
1753
1754 def _check(self, mode=None):
1755 """Check if TarFile is still open, and if the operation's mode
1756 corresponds to TarFile's mode.
1757 """
1758 if self.closed:
1759 raise IOError, "%s is closed" % self.__class__.__name__
1760 if mode is not None and self._mode not in mode:
1761 raise IOError, "bad operation for mode %r" % self._mode
1762
1763 def __iter__(self):
1764 """Provide an iterator object.
1765 """
1766 if self._loaded:
1767 return iter(self.members)
1768 else:
1769 return TarIter(self)
1770
1771 def _create_gnulong(self, name, type):
1772 """Write a GNU longname/longlink member to the TarFile.
1773 It consists of an extended tar header, with the length
1774 of the longname as size, followed by data blocks,
1775 which contain the longname as a null terminated string.
1776 """
Neal Norwitz0662f8a2004-07-20 21:54:18 +00001777 name += NUL
1778
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001779 tarinfo = TarInfo()
1780 tarinfo.name = "././@LongLink"
1781 tarinfo.type = type
1782 tarinfo.mode = 0
1783 tarinfo.size = len(name)
1784
1785 # write extended header
1786 self.fileobj.write(tarinfo.tobuf())
Neal Norwitz0662f8a2004-07-20 21:54:18 +00001787 self.offset += BLOCKSIZE
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001788 # write name blocks
1789 self.fileobj.write(name)
1790 blocks, remainder = divmod(tarinfo.size, BLOCKSIZE)
1791 if remainder > 0:
1792 self.fileobj.write(NUL * (BLOCKSIZE - remainder))
1793 blocks += 1
1794 self.offset += blocks * BLOCKSIZE
1795
1796 def _dbg(self, level, msg):
1797 """Write debugging output to sys.stderr.
1798 """
1799 if level <= self.debug:
1800 print >> sys.stderr, msg
1801# class TarFile
1802
1803class TarIter:
1804 """Iterator Class.
1805
1806 for tarinfo in TarFile(...):
1807 suite...
1808 """
1809
1810 def __init__(self, tarfile):
1811 """Construct a TarIter object.
1812 """
1813 self.tarfile = tarfile
1814 def __iter__(self):
1815 """Return iterator object.
1816 """
1817 return self
1818 def next(self):
1819 """Return the next item using TarFile's next() method.
1820 When all members have been read, set TarFile as _loaded.
1821 """
1822 tarinfo = self.tarfile.next()
1823 if not tarinfo:
1824 self.tarfile._loaded = True
1825 raise StopIteration
1826 return tarinfo
1827
1828# Helper classes for sparse file support
1829class _section:
1830 """Base class for _data and _hole.
1831 """
1832 def __init__(self, offset, size):
1833 self.offset = offset
1834 self.size = size
1835 def __contains__(self, offset):
1836 return self.offset <= offset < self.offset + self.size
1837
1838class _data(_section):
1839 """Represent a data section in a sparse file.
1840 """
1841 def __init__(self, offset, size, realpos):
1842 _section.__init__(self, offset, size)
1843 self.realpos = realpos
1844
1845class _hole(_section):
1846 """Represent a hole section in a sparse file.
1847 """
1848 pass
1849
1850class _ringbuffer(list):
1851 """Ringbuffer class which increases performance
1852 over a regular list.
1853 """
1854 def __init__(self):
1855 self.idx = 0
1856 def find(self, offset):
1857 idx = self.idx
1858 while True:
1859 item = self[idx]
1860 if offset in item:
1861 break
1862 idx += 1
1863 if idx == len(self):
1864 idx = 0
1865 if idx == self.idx:
1866 # End of File
1867 return None
1868 self.idx = idx
1869 return item
1870
1871#---------------------------------------------
1872# zipfile compatible TarFile class
1873#---------------------------------------------
1874TAR_PLAIN = 0 # zipfile.ZIP_STORED
1875TAR_GZIPPED = 8 # zipfile.ZIP_DEFLATED
1876class TarFileCompat:
1877 """TarFile class compatible with standard module zipfile's
1878 ZipFile class.
1879 """
1880 def __init__(self, file, mode="r", compression=TAR_PLAIN):
1881 if compression == TAR_PLAIN:
1882 self.tarfile = TarFile.taropen(file, mode)
1883 elif compression == TAR_GZIPPED:
1884 self.tarfile = TarFile.gzopen(file, mode)
1885 else:
1886 raise ValueError, "unknown compression constant"
1887 if mode[0:1] == "r":
1888 members = self.tarfile.getmembers()
1889 for i in xrange(len(members)):
1890 m = members[i]
1891 m.filename = m.name
1892 m.file_size = m.size
1893 m.date_time = time.gmtime(m.mtime)[:6]
1894 def namelist(self):
1895 return map(lambda m: m.name, self.infolist())
1896 def infolist(self):
1897 return filter(lambda m: m.type in REGULAR_TYPES,
1898 self.tarfile.getmembers())
1899 def printdir(self):
1900 self.tarfile.list()
1901 def testzip(self):
1902 return
1903 def getinfo(self, name):
1904 return self.tarfile.getmember(name)
1905 def read(self, name):
1906 return self.tarfile.extractfile(self.tarfile.getmember(name)).read()
1907 def write(self, filename, arcname=None, compress_type=None):
1908 self.tarfile.add(filename, arcname)
1909 def writestr(self, zinfo, bytes):
1910 import StringIO
1911 import calendar
1912 zinfo.name = zinfo.filename
1913 zinfo.size = zinfo.file_size
1914 zinfo.mtime = calendar.timegm(zinfo.date_time)
1915 self.tarfile.addfile(zinfo, StringIO.StringIO(bytes))
1916 def close(self):
1917 self.tarfile.close()
1918#class TarFileCompat
1919
1920#--------------------
1921# exported functions
1922#--------------------
1923def is_tarfile(name):
1924 """Return True if name points to a tar archive that we
1925 are able to handle, else return False.
1926 """
1927 try:
1928 t = open(name)
1929 t.close()
1930 return True
1931 except TarError:
1932 return False
1933
1934open = TarFile.open