blob: 077fbee70995055750d01e3ba986bc9ce39e15d6 [file] [log] [blame]
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001#!/usr/bin/env python
2# -*- coding: iso-8859-1 -*-
3#-------------------------------------------------------------------
4# tarfile.py
5#-------------------------------------------------------------------
6# Copyright (C) 2002 Lars Gustäbel <lars@gustaebel.de>
7# All rights reserved.
8#
9# Permission is hereby granted, free of charge, to any person
10# obtaining a copy of this software and associated documentation
11# files (the "Software"), to deal in the Software without
12# restriction, including without limitation the rights to use,
13# copy, modify, merge, publish, distribute, sublicense, and/or sell
14# copies of the Software, and to permit persons to whom the
15# Software is furnished to do so, subject to the following
16# conditions:
17#
18# The above copyright notice and this permission notice shall be
19# included in all copies or substantial portions of the Software.
20#
21# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
22# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
23# OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
24# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
25# HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
26# WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
27# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
28# OTHER DEALINGS IN THE SOFTWARE.
29#
30"""Read from and write to tar format archives.
31"""
32
33__version__ = "$Revision$"
34# $Source$
35
36version = "0.6.4"
37__author__ = "Lars Gustäbel (lars@gustaebel.de)"
38__date__ = "$Date$"
39__cvsid__ = "$Id$"
40__credits__ = "Gustavo Niemeyer, Niels Gustäbel, Richard Townsend."
41
42#---------
43# Imports
44#---------
45import sys
46import os
47import shutil
48import stat
49import errno
50import time
51import struct
52
Jack Jansencfc49022003-03-07 13:37:32 +000053if sys.platform == 'mac':
54 # This module needs work for MacOS9, especially in the area of pathname
55 # handling. In many places it is assumed a simple substitution of / by the
56 # local os.path.sep is good enough to convert pathnames, but this does not
57 # work with the mac rooted:path:name versus :nonrooted:path:name syntax
58 raise ImportError, "tarfile does not work for platform==mac"
59
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +000060try:
61 import grp, pwd
62except ImportError:
63 grp = pwd = None
64
65# from tarfile import *
66__all__ = ["TarFile", "TarInfo", "is_tarfile", "TarError"]
67
68#---------------------------------------------------------
69# tar constants
70#---------------------------------------------------------
71NUL = "\0" # the null character
72BLOCKSIZE = 512 # length of processing blocks
73RECORDSIZE = BLOCKSIZE * 20 # length of records
74MAGIC = "ustar" # magic tar string
75VERSION = "00" # version number
76
77LENGTH_NAME = 100 # maximum length of a filename
78LENGTH_LINK = 100 # maximum length of a linkname
79LENGTH_PREFIX = 155 # maximum length of the prefix field
80MAXSIZE_MEMBER = 077777777777L # maximum size of a file (11 octal digits)
81
82REGTYPE = "0" # regular file
83AREGTYPE = "\0" # regular file
84LNKTYPE = "1" # link (inside tarfile)
85SYMTYPE = "2" # symbolic link
86CHRTYPE = "3" # character special device
87BLKTYPE = "4" # block special device
88DIRTYPE = "5" # directory
89FIFOTYPE = "6" # fifo special device
90CONTTYPE = "7" # contiguous file
91
92GNUTYPE_LONGNAME = "L" # GNU tar extension for longnames
93GNUTYPE_LONGLINK = "K" # GNU tar extension for longlink
94GNUTYPE_SPARSE = "S" # GNU tar extension for sparse file
95
96#---------------------------------------------------------
97# tarfile constants
98#---------------------------------------------------------
99SUPPORTED_TYPES = (REGTYPE, AREGTYPE, LNKTYPE, # file types that tarfile
100 SYMTYPE, DIRTYPE, FIFOTYPE, # can cope with.
101 CONTTYPE, CHRTYPE, BLKTYPE,
102 GNUTYPE_LONGNAME, GNUTYPE_LONGLINK,
103 GNUTYPE_SPARSE)
104
105REGULAR_TYPES = (REGTYPE, AREGTYPE, # file types that somehow
106 CONTTYPE, GNUTYPE_SPARSE) # represent regular files
107
108#---------------------------------------------------------
109# Bits used in the mode field, values in octal.
110#---------------------------------------------------------
111S_IFLNK = 0120000 # symbolic link
112S_IFREG = 0100000 # regular file
113S_IFBLK = 0060000 # block device
114S_IFDIR = 0040000 # directory
115S_IFCHR = 0020000 # character device
116S_IFIFO = 0010000 # fifo
117
118TSUID = 04000 # set UID on execution
119TSGID = 02000 # set GID on execution
120TSVTX = 01000 # reserved
121
122TUREAD = 0400 # read by owner
123TUWRITE = 0200 # write by owner
124TUEXEC = 0100 # execute/search by owner
125TGREAD = 0040 # read by group
126TGWRITE = 0020 # write by group
127TGEXEC = 0010 # execute/search by group
128TOREAD = 0004 # read by other
129TOWRITE = 0002 # write by other
130TOEXEC = 0001 # execute/search by other
131
132#---------------------------------------------------------
133# Some useful functions
134#---------------------------------------------------------
135def nts(s):
136 """Convert a null-terminated string buffer to a python string.
137 """
Andrew M. Kuchling864bba12004-07-10 22:02:11 +0000138 return s.rstrip(NUL)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000139
140def calc_chksum(buf):
141 """Calculate the checksum for a member's header. It's a simple addition
142 of all bytes, treating the chksum field as if filled with spaces.
143 buf is a 512 byte long string buffer which holds the header.
144 """
145 chk = 256 # chksum field is treated as blanks,
146 # so the initial value is 8 * ord(" ")
147 for c in buf[:148]: chk += ord(c) # sum up all bytes before chksum
148 for c in buf[156:]: chk += ord(c) # sum up all bytes after chksum
149 return chk
150
151def copyfileobj(src, dst, length=None):
152 """Copy length bytes from fileobj src to fileobj dst.
153 If length is None, copy the entire content.
154 """
155 if length == 0:
156 return
157 if length is None:
158 shutil.copyfileobj(src, dst)
159 return
160
161 BUFSIZE = 16 * 1024
162 blocks, remainder = divmod(length, BUFSIZE)
163 for b in xrange(blocks):
164 buf = src.read(BUFSIZE)
165 if len(buf) < BUFSIZE:
166 raise IOError, "end of file reached"
167 dst.write(buf)
168
169 if remainder != 0:
170 buf = src.read(remainder)
171 if len(buf) < remainder:
172 raise IOError, "end of file reached"
173 dst.write(buf)
174 return
175
176filemode_table = (
177 (S_IFLNK, "l",
178 S_IFREG, "-",
179 S_IFBLK, "b",
180 S_IFDIR, "d",
181 S_IFCHR, "c",
182 S_IFIFO, "p"),
183 (TUREAD, "r"),
184 (TUWRITE, "w"),
185 (TUEXEC, "x", TSUID, "S", TUEXEC|TSUID, "s"),
186 (TGREAD, "r"),
187 (TGWRITE, "w"),
188 (TGEXEC, "x", TSGID, "S", TGEXEC|TSGID, "s"),
189 (TOREAD, "r"),
190 (TOWRITE, "w"),
191 (TOEXEC, "x", TSVTX, "T", TOEXEC|TSVTX, "t"))
192
193def filemode(mode):
194 """Convert a file's mode to a string of the form
195 -rwxrwxrwx.
196 Used by TarFile.list()
197 """
198 s = ""
199 for t in filemode_table:
200 while True:
201 if mode & t[0] == t[0]:
202 s += t[1]
203 elif len(t) > 2:
204 t = t[2:]
205 continue
206 else:
207 s += "-"
208 break
209 return s
210
211if os.sep != "/":
212 normpath = lambda path: os.path.normpath(path).replace(os.sep, "/")
213else:
214 normpath = os.path.normpath
215
216class TarError(Exception):
217 """Base exception."""
218 pass
219class ExtractError(TarError):
220 """General exception for extract errors."""
221 pass
222class ReadError(TarError):
223 """Exception for unreadble tar archives."""
224 pass
225class CompressionError(TarError):
226 """Exception for unavailable compression methods."""
227 pass
228class StreamError(TarError):
229 """Exception for unsupported operations on stream-like TarFiles."""
230 pass
231
232#---------------------------
233# internal stream interface
234#---------------------------
235class _LowLevelFile:
236 """Low-level file object. Supports reading and writing.
237 It is used instead of a regular file object for streaming
238 access.
239 """
240
241 def __init__(self, name, mode):
242 mode = {
243 "r": os.O_RDONLY,
244 "w": os.O_WRONLY | os.O_CREAT | os.O_TRUNC,
245 }[mode]
246 if hasattr(os, "O_BINARY"):
247 mode |= os.O_BINARY
248 self.fd = os.open(name, mode)
249
250 def close(self):
251 os.close(self.fd)
252
253 def read(self, size):
254 return os.read(self.fd, size)
255
256 def write(self, s):
257 os.write(self.fd, s)
258
259class _Stream:
260 """Class that serves as an adapter between TarFile and
261 a stream-like object. The stream-like object only
262 needs to have a read() or write() method and is accessed
263 blockwise. Use of gzip or bzip2 compression is possible.
264 A stream-like object could be for example: sys.stdin,
265 sys.stdout, a socket, a tape device etc.
266
267 _Stream is intended to be used only internally.
268 """
269
270 def __init__(self, name, mode, type, fileobj, bufsize):
271 """Construct a _Stream object.
272 """
273 self._extfileobj = True
274 if fileobj is None:
275 fileobj = _LowLevelFile(name, mode)
276 self._extfileobj = False
277
278 self.name = name or ""
279 self.mode = mode
280 self.type = type
281 self.fileobj = fileobj
282 self.bufsize = bufsize
283 self.buf = ""
284 self.pos = 0L
285 self.closed = False
286
287 if type == "gz":
288 try:
289 import zlib
290 except ImportError:
291 raise CompressionError, "zlib module is not available"
292 self.zlib = zlib
293 self.crc = zlib.crc32("")
294 if mode == "r":
295 self._init_read_gz()
296 else:
297 self._init_write_gz()
298
299 if type == "bz2":
300 try:
301 import bz2
302 except ImportError:
303 raise CompressionError, "bz2 module is not available"
304 if mode == "r":
305 self.dbuf = ""
306 self.cmp = bz2.BZ2Decompressor()
307 else:
308 self.cmp = bz2.BZ2Compressor()
309
310 def __del__(self):
311 if not self.closed:
312 self.close()
313
314 def _init_write_gz(self):
315 """Initialize for writing with gzip compression.
316 """
317 self.cmp = self.zlib.compressobj(9, self.zlib.DEFLATED,
318 -self.zlib.MAX_WBITS,
319 self.zlib.DEF_MEM_LEVEL,
320 0)
321 timestamp = struct.pack("<L", long(time.time()))
322 self.__write("\037\213\010\010%s\002\377" % timestamp)
323 if self.name.endswith(".gz"):
324 self.name = self.name[:-3]
325 self.__write(self.name + NUL)
326
327 def write(self, s):
328 """Write string s to the stream.
329 """
330 if self.type == "gz":
331 self.crc = self.zlib.crc32(s, self.crc)
332 self.pos += len(s)
333 if self.type != "tar":
334 s = self.cmp.compress(s)
335 self.__write(s)
336
337 def __write(self, s):
338 """Write string s to the stream if a whole new block
339 is ready to be written.
340 """
341 self.buf += s
342 while len(self.buf) > self.bufsize:
343 self.fileobj.write(self.buf[:self.bufsize])
344 self.buf = self.buf[self.bufsize:]
345
346 def close(self):
347 """Close the _Stream object. No operation should be
348 done on it afterwards.
349 """
350 if self.closed:
351 return
352
353 if self.mode == "w" and self.buf:
354 if self.type != "tar":
355 self.buf += self.cmp.flush()
356 self.fileobj.write(self.buf)
357 self.buf = ""
358 if self.type == "gz":
359 self.fileobj.write(struct.pack("<l", self.crc))
Andrew M. Kuchling10a44492003-10-24 17:38:34 +0000360 self.fileobj.write(struct.pack("<L", self.pos & 0xffffFFFFL))
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000361
362 if not self._extfileobj:
363 self.fileobj.close()
364
365 self.closed = True
366
367 def _init_read_gz(self):
368 """Initialize for reading a gzip compressed fileobj.
369 """
370 self.cmp = self.zlib.decompressobj(-self.zlib.MAX_WBITS)
371 self.dbuf = ""
372
373 # taken from gzip.GzipFile with some alterations
374 if self.__read(2) != "\037\213":
375 raise ReadError, "not a gzip file"
376 if self.__read(1) != "\010":
377 raise CompressionError, "unsupported compression method"
378
379 flag = ord(self.__read(1))
380 self.__read(6)
381
382 if flag & 4:
383 xlen = ord(self.__read(1)) + 256 * ord(self.__read(1))
384 self.read(xlen)
385 if flag & 8:
386 while True:
387 s = self.__read(1)
388 if not s or s == NUL:
389 break
390 if flag & 16:
391 while True:
392 s = self.__read(1)
393 if not s or s == NUL:
394 break
395 if flag & 2:
396 self.__read(2)
397
398 def tell(self):
399 """Return the stream's file pointer position.
400 """
401 return self.pos
402
403 def seek(self, pos=0):
404 """Set the stream's file pointer to pos. Negative seeking
405 is forbidden.
406 """
407 if pos - self.pos >= 0:
408 blocks, remainder = divmod(pos - self.pos, self.bufsize)
409 for i in xrange(blocks):
410 self.read(self.bufsize)
411 self.read(remainder)
412 else:
413 raise StreamError, "seeking backwards is not allowed"
414 return self.pos
415
416 def read(self, size=None):
417 """Return the next size number of bytes from the stream.
418 If size is not defined, return all bytes of the stream
419 up to EOF.
420 """
421 if size is None:
422 t = []
423 while True:
424 buf = self._read(self.bufsize)
425 if not buf:
426 break
427 t.append(buf)
428 buf = "".join(t)
429 else:
430 buf = self._read(size)
431 self.pos += len(buf)
432 return buf
433
434 def _read(self, size):
435 """Return size bytes from the stream.
436 """
437 if self.type == "tar":
438 return self.__read(size)
439
440 c = len(self.dbuf)
441 t = [self.dbuf]
442 while c < size:
443 buf = self.__read(self.bufsize)
444 if not buf:
445 break
446 buf = self.cmp.decompress(buf)
447 t.append(buf)
448 c += len(buf)
449 t = "".join(t)
450 self.dbuf = t[size:]
451 return t[:size]
452
453 def __read(self, size):
454 """Return size bytes from stream. If internal buffer is empty,
455 read another block from the stream.
456 """
457 c = len(self.buf)
458 t = [self.buf]
459 while c < size:
460 buf = self.fileobj.read(self.bufsize)
461 if not buf:
462 break
463 t.append(buf)
464 c += len(buf)
465 t = "".join(t)
466 self.buf = t[size:]
467 return t[:size]
468# class _Stream
469
470#------------------------
471# Extraction file object
472#------------------------
473class ExFileObject(object):
474 """File-like object for reading an archive member.
475 Is returned by TarFile.extractfile(). Support for
476 sparse files included.
477 """
478
479 def __init__(self, tarfile, tarinfo):
480 self.fileobj = tarfile.fileobj
481 self.name = tarinfo.name
482 self.mode = "r"
483 self.closed = False
484 self.offset = tarinfo.offset_data
485 self.size = tarinfo.size
486 self.pos = 0L
487 self.linebuffer = ""
488 if tarinfo.issparse():
489 self.sparse = tarinfo.sparse
490 self.read = self._readsparse
491 else:
492 self.read = self._readnormal
493
494 def __read(self, size):
495 """Overloadable read method.
496 """
497 return self.fileobj.read(size)
498
499 def readline(self, size=-1):
500 """Read a line with approx. size. If size is negative,
501 read a whole line. readline() and read() must not
502 be mixed up (!).
503 """
504 if size < 0:
505 size = sys.maxint
506
507 nl = self.linebuffer.find("\n")
508 if nl >= 0:
509 nl = min(nl, size)
510 else:
511 size -= len(self.linebuffer)
512 while nl < 0:
513 buf = self.read(min(size, 100))
514 if not buf:
515 break
516 self.linebuffer += buf
517 size -= len(buf)
518 if size <= 0:
519 break
520 nl = self.linebuffer.find("\n")
521 if nl == -1:
522 s = self.linebuffer
523 self.linebuffer = ""
524 return s
525 buf = self.linebuffer[:nl]
526 self.linebuffer = self.linebuffer[nl + 1:]
527 while buf[-1:] == "\r":
528 buf = buf[:-1]
529 return buf + "\n"
530
531 def readlines(self):
532 """Return a list with all (following) lines.
533 """
534 result = []
535 while True:
536 line = self.readline()
537 if not line: break
538 result.append(line)
539 return result
540
541 def _readnormal(self, size=None):
542 """Read operation for regular files.
543 """
544 if self.closed:
545 raise ValueError, "file is closed"
546 self.fileobj.seek(self.offset + self.pos)
547 bytesleft = self.size - self.pos
548 if size is None:
549 bytestoread = bytesleft
550 else:
551 bytestoread = min(size, bytesleft)
552 self.pos += bytestoread
553 return self.__read(bytestoread)
554
555 def _readsparse(self, size=None):
556 """Read operation for sparse files.
557 """
558 if self.closed:
559 raise ValueError, "file is closed"
560
561 if size is None:
562 size = self.size - self.pos
563
564 data = []
565 while size > 0:
566 buf = self._readsparsesection(size)
567 if not buf:
568 break
569 size -= len(buf)
570 data.append(buf)
571 return "".join(data)
572
573 def _readsparsesection(self, size):
574 """Read a single section of a sparse file.
575 """
576 section = self.sparse.find(self.pos)
577
578 if section is None:
579 return ""
580
581 toread = min(size, section.offset + section.size - self.pos)
582 if isinstance(section, _data):
583 realpos = section.realpos + self.pos - section.offset
584 self.pos += toread
585 self.fileobj.seek(self.offset + realpos)
586 return self.__read(toread)
587 else:
588 self.pos += toread
589 return NUL * toread
590
591 def tell(self):
592 """Return the current file position.
593 """
594 return self.pos
595
596 def seek(self, pos, whence=0):
597 """Seek to a position in the file.
598 """
599 self.linebuffer = ""
600 if whence == 0:
601 self.pos = min(max(pos, 0), self.size)
602 if whence == 1:
603 if pos < 0:
604 self.pos = max(self.pos + pos, 0)
605 else:
606 self.pos = min(self.pos + pos, self.size)
607 if whence == 2:
608 self.pos = max(min(self.size + pos, self.size), 0)
609
610 def close(self):
611 """Close the file object.
612 """
613 self.closed = True
614#class ExFileObject
615
616#------------------
617# Exported Classes
618#------------------
619class TarInfo(object):
620 """Informational class which holds the details about an
621 archive member given by a tar header block.
622 TarInfo objects are returned by TarFile.getmember(),
623 TarFile.getmembers() and TarFile.gettarinfo() and are
624 usually created internally.
625 """
626
627 def __init__(self, name=""):
628 """Construct a TarInfo object. name is the optional name
629 of the member.
630 """
631
632 self.name = name # member name (dirnames must end with '/')
633 self.mode = 0666 # file permissions
634 self.uid = 0 # user id
635 self.gid = 0 # group id
636 self.size = 0 # file size
637 self.mtime = 0 # modification time
638 self.chksum = 0 # header checksum
639 self.type = REGTYPE # member type
640 self.linkname = "" # link name
641 self.uname = "user" # user name
642 self.gname = "group" # group name
643 self.devmajor = 0 #-
644 self.devminor = 0 #-for use with CHRTYPE and BLKTYPE
645 self.prefix = "" # prefix to filename or holding information
646 # about sparse files
647
648 self.offset = 0 # the tar header starts here
649 self.offset_data = 0 # the file's data starts here
650
651 def __repr__(self):
652 return "<%s %r at %#x>" % (self.__class__.__name__,self.name,id(self))
653
654 def frombuf(cls, buf):
655 """Construct a TarInfo object from a 512 byte string buffer.
656 """
657 tarinfo = cls()
658 tarinfo.name = nts(buf[0:100])
659 tarinfo.mode = int(buf[100:108], 8)
660 tarinfo.uid = int(buf[108:116],8)
661 tarinfo.gid = int(buf[116:124],8)
662 tarinfo.size = long(buf[124:136], 8)
663 tarinfo.mtime = long(buf[136:148], 8)
664 tarinfo.chksum = int(buf[148:156], 8)
665 tarinfo.type = buf[156:157]
666 tarinfo.linkname = nts(buf[157:257])
667 tarinfo.uname = nts(buf[265:297])
668 tarinfo.gname = nts(buf[297:329])
669 try:
670 tarinfo.devmajor = int(buf[329:337], 8)
671 tarinfo.devminor = int(buf[337:345], 8)
672 except ValueError:
673 tarinfo.devmajor = tarinfo.devmajor = 0
Jack Jansen2b4b5a52003-04-22 22:03:11 +0000674 tarinfo.prefix = buf[345:500]
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000675
676 # The prefix field is used for filenames > 100 in
677 # the POSIX standard.
Jack Jansen2b4b5a52003-04-22 22:03:11 +0000678 # name = prefix + '/' + name
679 if tarinfo.type != GNUTYPE_SPARSE:
680 tarinfo.name = normpath(os.path.join(nts(tarinfo.prefix), tarinfo.name))
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000681
682 # Directory names should have a '/' at the end.
683 if tarinfo.isdir() and tarinfo.name[-1:] != "/":
684 tarinfo.name += "/"
685 return tarinfo
686
687 frombuf = classmethod(frombuf)
688
689 def tobuf(self):
690 """Return a tar header block as a 512 byte string.
691 """
692 name = self.name
693
694 # The following code was contributed by Detlef Lannert.
695 parts = []
696 for value, fieldsize in (
697 (name, 100),
698 ("%07o" % (self.mode & 07777), 8),
699 ("%07o" % self.uid, 8),
700 ("%07o" % self.gid, 8),
701 ("%011o" % self.size, 12),
702 ("%011o" % self.mtime, 12),
703 (" ", 8),
704 (self.type, 1),
705 (self.linkname, 100),
706 (MAGIC, 6),
707 (VERSION, 2),
708 (self.uname, 32),
709 (self.gname, 32),
710 ("%07o" % self.devmajor, 8),
711 ("%07o" % self.devminor, 8),
712 (self.prefix, 155)
713 ):
714 l = len(value)
Andrew M. Kuchling864bba12004-07-10 22:02:11 +0000715 parts.append(value[:fieldsize] + (fieldsize - l) * NUL)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000716
717 buf = "".join(parts)
718 chksum = calc_chksum(buf)
719 buf = buf[:148] + "%06o\0" % chksum + buf[155:]
720 buf += (BLOCKSIZE - len(buf)) * NUL
721 self.buf = buf
722 return buf
723
724 def isreg(self):
725 return self.type in REGULAR_TYPES
726 def isfile(self):
727 return self.isreg()
728 def isdir(self):
729 return self.type == DIRTYPE
730 def issym(self):
731 return self.type == SYMTYPE
732 def islnk(self):
733 return self.type == LNKTYPE
734 def ischr(self):
735 return self.type == CHRTYPE
736 def isblk(self):
737 return self.type == BLKTYPE
738 def isfifo(self):
739 return self.type == FIFOTYPE
740 def issparse(self):
741 return self.type == GNUTYPE_SPARSE
742 def isdev(self):
743 return self.type in (CHRTYPE, BLKTYPE, FIFOTYPE)
744# class TarInfo
745
746class TarFile(object):
747 """The TarFile Class provides an interface to tar archives.
748 """
749
750 debug = 0 # May be set from 0 (no msgs) to 3 (all msgs)
751
752 dereference = False # If true, add content of linked file to the
753 # tar file, else the link.
754
755 ignore_zeros = False # If true, skips empty or invalid blocks and
756 # continues processing.
757
758 errorlevel = 0 # If 0, fatal errors only appear in debug
759 # messages (if debug >= 0). If > 0, errors
760 # are passed to the caller as exceptions.
761
762 posix = True # If True, generates POSIX.1-1990-compliant
763 # archives (no GNU extensions!)
764
765 fileobject = ExFileObject
766
767 def __init__(self, name=None, mode="r", fileobj=None):
768 """Open an (uncompressed) tar archive `name'. `mode' is either 'r' to
769 read from an existing archive, 'a' to append data to an existing
770 file or 'w' to create a new file overwriting an existing one. `mode'
771 defaults to 'r'.
772 If `fileobj' is given, it is used for reading or writing data. If it
773 can be determined, `mode' is overridden by `fileobj's mode.
774 `fileobj' is not closed, when TarFile is closed.
775 """
776 self.name = name
777
778 if len(mode) > 1 or mode not in "raw":
779 raise ValueError, "mode must be 'r', 'a' or 'w'"
780 self._mode = mode
781 self.mode = {"r": "rb", "a": "r+b", "w": "wb"}[mode]
782
783 if not fileobj:
784 fileobj = file(self.name, self.mode)
785 self._extfileobj = False
786 else:
787 if self.name is None and hasattr(fileobj, "name"):
788 self.name = fileobj.name
789 if hasattr(fileobj, "mode"):
790 self.mode = fileobj.mode
791 self._extfileobj = True
792 self.fileobj = fileobj
793
794 # Init datastructures
795 self.closed = False
796 self.members = [] # list of members as TarInfo objects
797 self.membernames = [] # names of members
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000798 self._loaded = False # flag if all members have been read
799 self.offset = 0L # current position in the archive file
800 self.inodes = {} # dictionary caching the inodes of
801 # archive members already added
802
803 if self._mode == "r":
804 self.firstmember = None
805 self.firstmember = self.next()
806
807 if self._mode == "a":
808 # Move to the end of the archive,
809 # before the first empty block.
810 self.firstmember = None
811 while True:
812 try:
813 tarinfo = self.next()
814 except ReadError:
815 self.fileobj.seek(0)
816 break
817 if tarinfo is None:
818 self.fileobj.seek(- BLOCKSIZE, 1)
819 break
820
821 if self._mode in "aw":
822 self._loaded = True
823
824 #--------------------------------------------------------------------------
825 # Below are the classmethods which act as alternate constructors to the
826 # TarFile class. The open() method is the only one that is needed for
827 # public use; it is the "super"-constructor and is able to select an
828 # adequate "sub"-constructor for a particular compression using the mapping
829 # from OPEN_METH.
830 #
831 # This concept allows one to subclass TarFile without losing the comfort of
832 # the super-constructor. A sub-constructor is registered and made available
833 # by adding it to the mapping in OPEN_METH.
834
835 def open(cls, name=None, mode="r", fileobj=None, bufsize=20*512):
836 """Open a tar archive for reading, writing or appending. Return
837 an appropriate TarFile class.
838
839 mode:
840 'r' open for reading with transparent compression
841 'r:' open for reading exclusively uncompressed
842 'r:gz' open for reading with gzip compression
843 'r:bz2' open for reading with bzip2 compression
844 'a' or 'a:' open for appending
845 'w' or 'w:' open for writing without compression
846 'w:gz' open for writing with gzip compression
847 'w:bz2' open for writing with bzip2 compression
848 'r|' open an uncompressed stream of tar blocks for reading
849 'r|gz' open a gzip compressed stream of tar blocks
850 'r|bz2' open a bzip2 compressed stream of tar blocks
851 'w|' open an uncompressed stream for writing
852 'w|gz' open a gzip compressed stream for writing
853 'w|bz2' open a bzip2 compressed stream for writing
854 """
855
856 if not name and not fileobj:
857 raise ValueError, "nothing to open"
858
859 if ":" in mode:
860 filemode, comptype = mode.split(":", 1)
861 filemode = filemode or "r"
862 comptype = comptype or "tar"
863
864 # Select the *open() function according to
865 # given compression.
866 if comptype in cls.OPEN_METH:
867 func = getattr(cls, cls.OPEN_METH[comptype])
868 else:
869 raise CompressionError, "unknown compression type %r" % comptype
870 return func(name, filemode, fileobj)
871
872 elif "|" in mode:
873 filemode, comptype = mode.split("|", 1)
874 filemode = filemode or "r"
875 comptype = comptype or "tar"
876
877 if filemode not in "rw":
878 raise ValueError, "mode must be 'r' or 'w'"
879
880 t = cls(name, filemode,
881 _Stream(name, filemode, comptype, fileobj, bufsize))
882 t._extfileobj = False
883 return t
884
885 elif mode == "r":
886 # Find out which *open() is appropriate for opening the file.
887 for comptype in cls.OPEN_METH:
888 func = getattr(cls, cls.OPEN_METH[comptype])
889 try:
890 return func(name, "r", fileobj)
891 except (ReadError, CompressionError):
892 continue
893 raise ReadError, "file could not be opened successfully"
894
895 elif mode in "aw":
896 return cls.taropen(name, mode, fileobj)
897
898 raise ValueError, "undiscernible mode"
899
900 open = classmethod(open)
901
902 def taropen(cls, name, mode="r", fileobj=None):
903 """Open uncompressed tar archive name for reading or writing.
904 """
905 if len(mode) > 1 or mode not in "raw":
906 raise ValueError, "mode must be 'r', 'a' or 'w'"
907 return cls(name, mode, fileobj)
908
909 taropen = classmethod(taropen)
910
911 def gzopen(cls, name, mode="r", fileobj=None, compresslevel=9):
912 """Open gzip compressed tar archive name for reading or writing.
913 Appending is not allowed.
914 """
915 if len(mode) > 1 or mode not in "rw":
916 raise ValueError, "mode must be 'r' or 'w'"
917
918 try:
919 import gzip
Neal Norwitz4ec68242003-04-11 03:05:56 +0000920 gzip.GzipFile
921 except (ImportError, AttributeError):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000922 raise CompressionError, "gzip module is not available"
923
924 pre, ext = os.path.splitext(name)
925 pre = os.path.basename(pre)
926 if ext == ".tgz":
927 ext = ".tar"
928 if ext == ".gz":
929 ext = ""
930 tarname = pre + ext
931
932 if fileobj is None:
933 fileobj = file(name, mode + "b")
934
935 if mode != "r":
936 name = tarname
937
938 try:
939 t = cls.taropen(tarname, mode,
940 gzip.GzipFile(name, mode, compresslevel, fileobj)
941 )
942 except IOError:
943 raise ReadError, "not a gzip file"
944 t._extfileobj = False
945 return t
946
947 gzopen = classmethod(gzopen)
948
949 def bz2open(cls, name, mode="r", fileobj=None, compresslevel=9):
950 """Open bzip2 compressed tar archive name for reading or writing.
951 Appending is not allowed.
952 """
953 if len(mode) > 1 or mode not in "rw":
954 raise ValueError, "mode must be 'r' or 'w'."
955
956 try:
957 import bz2
958 except ImportError:
959 raise CompressionError, "bz2 module is not available"
960
961 pre, ext = os.path.splitext(name)
962 pre = os.path.basename(pre)
963 if ext == ".tbz2":
964 ext = ".tar"
965 if ext == ".bz2":
966 ext = ""
967 tarname = pre + ext
968
969 if fileobj is not None:
970 raise ValueError, "no support for external file objects"
971
972 try:
973 t = cls.taropen(tarname, mode, bz2.BZ2File(name, mode, compresslevel=compresslevel))
974 except IOError:
975 raise ReadError, "not a bzip2 file"
976 t._extfileobj = False
977 return t
978
979 bz2open = classmethod(bz2open)
980
981 # All *open() methods are registered here.
982 OPEN_METH = {
983 "tar": "taropen", # uncompressed tar
984 "gz": "gzopen", # gzip compressed tar
985 "bz2": "bz2open" # bzip2 compressed tar
986 }
987
988 #--------------------------------------------------------------------------
989 # The public methods which TarFile provides:
990
991 def close(self):
992 """Close the TarFile. In write-mode, two finishing zero blocks are
993 appended to the archive.
994 """
995 if self.closed:
996 return
997
998 if self._mode in "aw":
999 self.fileobj.write(NUL * (BLOCKSIZE * 2))
1000 self.offset += (BLOCKSIZE * 2)
1001 # fill up the end with zero-blocks
1002 # (like option -b20 for tar does)
1003 blocks, remainder = divmod(self.offset, RECORDSIZE)
1004 if remainder > 0:
1005 self.fileobj.write(NUL * (RECORDSIZE - remainder))
1006
1007 if not self._extfileobj:
1008 self.fileobj.close()
1009 self.closed = True
1010
1011 def getmember(self, name):
1012 """Return a TarInfo object for member `name'. If `name' can not be
1013 found in the archive, KeyError is raised. If a member occurs more
1014 than once in the archive, its last occurence is assumed to be the
1015 most up-to-date version.
1016 """
1017 self._check()
1018 if name not in self.membernames and not self._loaded:
1019 self._load()
1020 if name not in self.membernames:
1021 raise KeyError, "filename %r not found" % name
1022 return self._getmember(name)
1023
1024 def getmembers(self):
1025 """Return the members of the archive as a list of TarInfo objects. The
1026 list has the same order as the members in the archive.
1027 """
1028 self._check()
1029 if not self._loaded: # if we want to obtain a list of
1030 self._load() # all members, we first have to
1031 # scan the whole archive.
1032 return self.members
1033
1034 def getnames(self):
1035 """Return the members of the archive as a list of their names. It has
1036 the same order as the list returned by getmembers().
1037 """
1038 self._check()
1039 if not self._loaded:
1040 self._load()
1041 return self.membernames
1042
1043 def gettarinfo(self, name=None, arcname=None, fileobj=None):
1044 """Create a TarInfo object for either the file `name' or the file
1045 object `fileobj' (using os.fstat on its file descriptor). You can
1046 modify some of the TarInfo's attributes before you add it using
1047 addfile(). If given, `arcname' specifies an alternative name for the
1048 file in the archive.
1049 """
1050 self._check("aw")
1051
1052 # When fileobj is given, replace name by
1053 # fileobj's real name.
1054 if fileobj is not None:
1055 name = fileobj.name
1056
1057 # Building the name of the member in the archive.
1058 # Backward slashes are converted to forward slashes,
1059 # Absolute paths are turned to relative paths.
1060 if arcname is None:
1061 arcname = name
1062 arcname = normpath(arcname)
1063 drv, arcname = os.path.splitdrive(arcname)
1064 while arcname[0:1] == "/":
1065 arcname = arcname[1:]
1066
1067 # Now, fill the TarInfo object with
1068 # information specific for the file.
1069 tarinfo = TarInfo()
1070
1071 # Use os.stat or os.lstat, depending on platform
1072 # and if symlinks shall be resolved.
1073 if fileobj is None:
1074 if hasattr(os, "lstat") and not self.dereference:
1075 statres = os.lstat(name)
1076 else:
1077 statres = os.stat(name)
1078 else:
1079 statres = os.fstat(fileobj.fileno())
1080 linkname = ""
1081
1082 stmd = statres.st_mode
1083 if stat.S_ISREG(stmd):
1084 inode = (statres.st_ino, statres.st_dev)
1085 if inode in self.inodes and not self.dereference:
1086 # Is it a hardlink to an already
1087 # archived file?
1088 type = LNKTYPE
1089 linkname = self.inodes[inode]
1090 else:
1091 # The inode is added only if its valid.
1092 # For win32 it is always 0.
1093 type = REGTYPE
1094 if inode[0]:
1095 self.inodes[inode] = arcname
1096 elif stat.S_ISDIR(stmd):
1097 type = DIRTYPE
1098 if arcname[-1:] != "/":
1099 arcname += "/"
1100 elif stat.S_ISFIFO(stmd):
1101 type = FIFOTYPE
1102 elif stat.S_ISLNK(stmd):
1103 type = SYMTYPE
1104 linkname = os.readlink(name)
1105 elif stat.S_ISCHR(stmd):
1106 type = CHRTYPE
1107 elif stat.S_ISBLK(stmd):
1108 type = BLKTYPE
1109 else:
1110 return None
1111
1112 # Fill the TarInfo object with all
1113 # information we can get.
1114 tarinfo.name = arcname
1115 tarinfo.mode = stmd
1116 tarinfo.uid = statres.st_uid
1117 tarinfo.gid = statres.st_gid
1118 tarinfo.size = statres.st_size
1119 tarinfo.mtime = statres.st_mtime
1120 tarinfo.type = type
1121 tarinfo.linkname = linkname
1122 if pwd:
1123 try:
1124 tarinfo.uname = pwd.getpwuid(tarinfo.uid)[0]
1125 except KeyError:
1126 pass
1127 if grp:
1128 try:
1129 tarinfo.gname = grp.getgrgid(tarinfo.gid)[0]
1130 except KeyError:
1131 pass
1132
1133 if type in (CHRTYPE, BLKTYPE):
1134 if hasattr(os, "major") and hasattr(os, "minor"):
1135 tarinfo.devmajor = os.major(statres.st_rdev)
1136 tarinfo.devminor = os.minor(statres.st_rdev)
1137 return tarinfo
1138
1139 def list(self, verbose=True):
1140 """Print a table of contents to sys.stdout. If `verbose' is False, only
1141 the names of the members are printed. If it is True, an `ls -l'-like
1142 output is produced.
1143 """
1144 self._check()
1145
1146 for tarinfo in self:
1147 if verbose:
1148 print filemode(tarinfo.mode),
1149 print "%s/%s" % (tarinfo.uname or tarinfo.uid,
1150 tarinfo.gname or tarinfo.gid),
1151 if tarinfo.ischr() or tarinfo.isblk():
1152 print "%10s" % ("%d,%d" \
1153 % (tarinfo.devmajor, tarinfo.devminor)),
1154 else:
1155 print "%10d" % tarinfo.size,
1156 print "%d-%02d-%02d %02d:%02d:%02d" \
1157 % time.localtime(tarinfo.mtime)[:6],
1158
1159 print tarinfo.name,
1160
1161 if verbose:
1162 if tarinfo.issym():
1163 print "->", tarinfo.linkname,
1164 if tarinfo.islnk():
1165 print "link to", tarinfo.linkname,
1166 print
1167
1168 def add(self, name, arcname=None, recursive=True):
1169 """Add the file `name' to the archive. `name' may be any type of file
1170 (directory, fifo, symbolic link, etc.). If given, `arcname'
1171 specifies an alternative name for the file in the archive.
1172 Directories are added recursively by default. This can be avoided by
1173 setting `recursive' to False.
1174 """
1175 self._check("aw")
1176
1177 if arcname is None:
1178 arcname = name
1179
1180 # Skip if somebody tries to archive the archive...
1181 if self.name is not None \
1182 and os.path.abspath(name) == os.path.abspath(self.name):
1183 self._dbg(2, "tarfile: Skipped %r" % name)
1184 return
1185
1186 # Special case: The user wants to add the current
1187 # working directory.
1188 if name == ".":
1189 if recursive:
1190 if arcname == ".":
1191 arcname = ""
1192 for f in os.listdir("."):
1193 self.add(f, os.path.join(arcname, f))
1194 return
1195
1196 self._dbg(1, name)
1197
1198 # Create a TarInfo object from the file.
1199 tarinfo = self.gettarinfo(name, arcname)
1200
1201 if tarinfo is None:
1202 self._dbg(1, "tarfile: Unsupported type %r" % name)
1203 return
1204
1205 # Append the tar header and data to the archive.
1206 if tarinfo.isreg():
1207 f = file(name, "rb")
1208 self.addfile(tarinfo, f)
1209 f.close()
1210
1211 if tarinfo.type in (LNKTYPE, SYMTYPE, FIFOTYPE, CHRTYPE, BLKTYPE):
1212 tarinfo.size = 0L
1213 self.addfile(tarinfo)
1214
1215 if tarinfo.isdir():
1216 self.addfile(tarinfo)
1217 if recursive:
1218 for f in os.listdir(name):
1219 self.add(os.path.join(name, f), os.path.join(arcname, f))
1220
1221 def addfile(self, tarinfo, fileobj=None):
1222 """Add the TarInfo object `tarinfo' to the archive. If `fileobj' is
1223 given, tarinfo.size bytes are read from it and added to the archive.
1224 You can create TarInfo objects using gettarinfo().
1225 On Windows platforms, `fileobj' should always be opened with mode
1226 'rb' to avoid irritation about the file size.
1227 """
1228 self._check("aw")
1229
1230 tarinfo.name = normpath(tarinfo.name)
1231 if tarinfo.isdir():
1232 # directories should end with '/'
1233 tarinfo.name += "/"
1234
1235 if tarinfo.linkname:
1236 tarinfo.linkname = normpath(tarinfo.linkname)
1237
1238 if tarinfo.size > MAXSIZE_MEMBER:
1239 raise ValueError, "file is too large (>8GB)"
1240
1241 if len(tarinfo.linkname) > LENGTH_LINK:
1242 if self.posix:
1243 raise ValueError, "linkname is too long (>%d)" \
1244 % (LENGTH_LINK)
1245 else:
1246 self._create_gnulong(tarinfo.linkname, GNUTYPE_LONGLINK)
1247 tarinfo.linkname = tarinfo.linkname[:LENGTH_LINK -1]
1248 self._dbg(2, "tarfile: Created GNU tar extension LONGLINK")
1249
1250 if len(tarinfo.name) > LENGTH_NAME:
1251 if self.posix:
1252 prefix = tarinfo.name[:LENGTH_PREFIX + 1]
1253 while prefix and prefix[-1] != "/":
Tim Peters2c60f7a2003-01-29 03:49:43 +00001254 prefix = prefix[:-1]
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001255
1256 name = tarinfo.name[len(prefix):]
1257 prefix = prefix[:-1]
1258
1259 if not prefix or len(name) > LENGTH_NAME:
1260 raise ValueError, "name is too long (>%d)" \
1261 % (LENGTH_NAME)
1262
1263 tarinfo.name = name
1264 tarinfo.prefix = prefix
1265 else:
1266 self._create_gnulong(tarinfo.name, GNUTYPE_LONGNAME)
1267 tarinfo.name = tarinfo.name[:LENGTH_NAME - 1]
1268 self._dbg(2, "tarfile: Created GNU tar extension LONGNAME")
1269
1270 self.fileobj.write(tarinfo.tobuf())
1271 self.offset += BLOCKSIZE
1272
1273 # If there's data to follow, append it.
1274 if fileobj is not None:
1275 copyfileobj(fileobj, self.fileobj, tarinfo.size)
1276 blocks, remainder = divmod(tarinfo.size, BLOCKSIZE)
1277 if remainder > 0:
1278 self.fileobj.write(NUL * (BLOCKSIZE - remainder))
1279 blocks += 1
1280 self.offset += blocks * BLOCKSIZE
1281
Andrew M. Kuchling864bba12004-07-10 22:02:11 +00001282 self._record_member(tarinfo)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001283
1284 def extract(self, member, path=""):
1285 """Extract a member from the archive to the current working directory,
1286 using its full name. Its file information is extracted as accurately
1287 as possible. `member' may be a filename or a TarInfo object. You can
1288 specify a different directory using `path'.
1289 """
1290 self._check("r")
1291
1292 if isinstance(member, TarInfo):
1293 tarinfo = member
1294 else:
1295 tarinfo = self.getmember(member)
1296
Neal Norwitza4f651a2004-07-20 22:07:44 +00001297 # Prepare the link target for makelink().
1298 if tarinfo.islnk():
1299 tarinfo._link_target = os.path.join(path, tarinfo.linkname)
1300
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001301 try:
1302 self._extract_member(tarinfo, os.path.join(path, tarinfo.name))
1303 except EnvironmentError, e:
1304 if self.errorlevel > 0:
1305 raise
1306 else:
1307 if e.filename is None:
1308 self._dbg(1, "tarfile: %s" % e.strerror)
1309 else:
1310 self._dbg(1, "tarfile: %s %r" % (e.strerror, e.filename))
1311 except ExtractError, e:
1312 if self.errorlevel > 1:
1313 raise
1314 else:
1315 self._dbg(1, "tarfile: %s" % e)
1316
1317 def extractfile(self, member):
1318 """Extract a member from the archive as a file object. `member' may be
1319 a filename or a TarInfo object. If `member' is a regular file, a
1320 file-like object is returned. If `member' is a link, a file-like
1321 object is constructed from the link's target. If `member' is none of
1322 the above, None is returned.
1323 The file-like object is read-only and provides the following
1324 methods: read(), readline(), readlines(), seek() and tell()
1325 """
1326 self._check("r")
1327
1328 if isinstance(member, TarInfo):
1329 tarinfo = member
1330 else:
1331 tarinfo = self.getmember(member)
1332
1333 if tarinfo.isreg():
1334 return self.fileobject(self, tarinfo)
1335
1336 elif tarinfo.type not in SUPPORTED_TYPES:
1337 # If a member's type is unknown, it is treated as a
1338 # regular file.
1339 return self.fileobject(self, tarinfo)
1340
1341 elif tarinfo.islnk() or tarinfo.issym():
1342 if isinstance(self.fileobj, _Stream):
1343 # A small but ugly workaround for the case that someone tries
1344 # to extract a (sym)link as a file-object from a non-seekable
1345 # stream of tar blocks.
1346 raise StreamError, "cannot extract (sym)link as file object"
1347 else:
1348 # A (sym)link's file object is it's target's file object.
1349 return self.extractfile(self._getmember(tarinfo.linkname,
1350 tarinfo))
1351 else:
1352 # If there's no data associated with the member (directory, chrdev,
1353 # blkdev, etc.), return None instead of a file object.
1354 return None
1355
1356 def _extract_member(self, tarinfo, targetpath):
1357 """Extract the TarInfo object tarinfo to a physical
1358 file called targetpath.
1359 """
1360 # Fetch the TarInfo object for the given name
1361 # and build the destination pathname, replacing
1362 # forward slashes to platform specific separators.
1363 if targetpath[-1:] == "/":
1364 targetpath = targetpath[:-1]
1365 targetpath = os.path.normpath(targetpath)
1366
1367 # Create all upper directories.
1368 upperdirs = os.path.dirname(targetpath)
1369 if upperdirs and not os.path.exists(upperdirs):
1370 ti = TarInfo()
1371 ti.name = upperdirs
1372 ti.type = DIRTYPE
1373 ti.mode = 0777
1374 ti.mtime = tarinfo.mtime
1375 ti.uid = tarinfo.uid
1376 ti.gid = tarinfo.gid
1377 ti.uname = tarinfo.uname
1378 ti.gname = tarinfo.gname
1379 try:
1380 self._extract_member(ti, ti.name)
1381 except:
1382 pass
1383
1384 if tarinfo.islnk() or tarinfo.issym():
1385 self._dbg(1, "%s -> %s" % (tarinfo.name, tarinfo.linkname))
1386 else:
1387 self._dbg(1, tarinfo.name)
1388
1389 if tarinfo.isreg():
1390 self.makefile(tarinfo, targetpath)
1391 elif tarinfo.isdir():
1392 self.makedir(tarinfo, targetpath)
1393 elif tarinfo.isfifo():
1394 self.makefifo(tarinfo, targetpath)
1395 elif tarinfo.ischr() or tarinfo.isblk():
1396 self.makedev(tarinfo, targetpath)
1397 elif tarinfo.islnk() or tarinfo.issym():
1398 self.makelink(tarinfo, targetpath)
1399 elif tarinfo.type not in SUPPORTED_TYPES:
1400 self.makeunknown(tarinfo, targetpath)
1401 else:
1402 self.makefile(tarinfo, targetpath)
1403
1404 self.chown(tarinfo, targetpath)
1405 if not tarinfo.issym():
1406 self.chmod(tarinfo, targetpath)
1407 self.utime(tarinfo, targetpath)
1408
1409 #--------------------------------------------------------------------------
1410 # Below are the different file methods. They are called via
1411 # _extract_member() when extract() is called. They can be replaced in a
1412 # subclass to implement other functionality.
1413
1414 def makedir(self, tarinfo, targetpath):
1415 """Make a directory called targetpath.
1416 """
1417 try:
1418 os.mkdir(targetpath)
1419 except EnvironmentError, e:
1420 if e.errno != errno.EEXIST:
1421 raise
1422
1423 def makefile(self, tarinfo, targetpath):
1424 """Make a file called targetpath.
1425 """
1426 source = self.extractfile(tarinfo)
1427 target = file(targetpath, "wb")
1428 copyfileobj(source, target)
1429 source.close()
1430 target.close()
1431
1432 def makeunknown(self, tarinfo, targetpath):
1433 """Make a file from a TarInfo object with an unknown type
1434 at targetpath.
1435 """
1436 self.makefile(tarinfo, targetpath)
1437 self._dbg(1, "tarfile: Unknown file type %r, " \
1438 "extracted as regular file." % tarinfo.type)
1439
1440 def makefifo(self, tarinfo, targetpath):
1441 """Make a fifo called targetpath.
1442 """
1443 if hasattr(os, "mkfifo"):
1444 os.mkfifo(targetpath)
1445 else:
1446 raise ExtractError, "fifo not supported by system"
1447
1448 def makedev(self, tarinfo, targetpath):
1449 """Make a character or block device called targetpath.
1450 """
1451 if not hasattr(os, "mknod") or not hasattr(os, "makedev"):
1452 raise ExtractError, "special devices not supported by system"
1453
1454 mode = tarinfo.mode
1455 if tarinfo.isblk():
1456 mode |= stat.S_IFBLK
1457 else:
1458 mode |= stat.S_IFCHR
1459
1460 os.mknod(targetpath, mode,
1461 os.makedev(tarinfo.devmajor, tarinfo.devminor))
1462
1463 def makelink(self, tarinfo, targetpath):
1464 """Make a (symbolic) link called targetpath. If it cannot be created
1465 (platform limitation), we try to make a copy of the referenced file
1466 instead of a link.
1467 """
1468 linkpath = tarinfo.linkname
1469 try:
1470 if tarinfo.issym():
1471 os.symlink(linkpath, targetpath)
1472 else:
Neal Norwitza4f651a2004-07-20 22:07:44 +00001473 # See extract().
1474 os.link(tarinfo._link_target, targetpath)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001475 except AttributeError:
1476 if tarinfo.issym():
1477 linkpath = os.path.join(os.path.dirname(tarinfo.name),
1478 linkpath)
1479 linkpath = normpath(linkpath)
1480
1481 try:
1482 self._extract_member(self.getmember(linkpath), targetpath)
1483 except (EnvironmentError, KeyError), e:
1484 linkpath = os.path.normpath(linkpath)
1485 try:
1486 shutil.copy2(linkpath, targetpath)
1487 except EnvironmentError, e:
1488 raise IOError, "link could not be created"
1489
1490 def chown(self, tarinfo, targetpath):
1491 """Set owner of targetpath according to tarinfo.
1492 """
1493 if pwd and hasattr(os, "geteuid") and os.geteuid() == 0:
1494 # We have to be root to do so.
1495 try:
1496 g = grp.getgrnam(tarinfo.gname)[2]
1497 except KeyError:
1498 try:
1499 g = grp.getgrgid(tarinfo.gid)[2]
1500 except KeyError:
1501 g = os.getgid()
1502 try:
1503 u = pwd.getpwnam(tarinfo.uname)[2]
1504 except KeyError:
1505 try:
1506 u = pwd.getpwuid(tarinfo.uid)[2]
1507 except KeyError:
1508 u = os.getuid()
1509 try:
1510 if tarinfo.issym() and hasattr(os, "lchown"):
1511 os.lchown(targetpath, u, g)
1512 else:
Andrew MacIntyre7970d202003-02-19 12:51:34 +00001513 if sys.platform != "os2emx":
1514 os.chown(targetpath, u, g)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001515 except EnvironmentError, e:
1516 raise ExtractError, "could not change owner"
1517
1518 def chmod(self, tarinfo, targetpath):
1519 """Set file permissions of targetpath according to tarinfo.
1520 """
Jack Jansen834eff62003-03-07 12:47:06 +00001521 if hasattr(os, 'chmod'):
1522 try:
1523 os.chmod(targetpath, tarinfo.mode)
1524 except EnvironmentError, e:
1525 raise ExtractError, "could not change mode"
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001526
1527 def utime(self, tarinfo, targetpath):
1528 """Set modification time of targetpath according to tarinfo.
1529 """
Jack Jansen834eff62003-03-07 12:47:06 +00001530 if not hasattr(os, 'utime'):
Tim Petersf9347782003-03-07 15:36:41 +00001531 return
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001532 if sys.platform == "win32" and tarinfo.isdir():
1533 # According to msdn.microsoft.com, it is an error (EACCES)
1534 # to use utime() on directories.
1535 return
1536 try:
1537 os.utime(targetpath, (tarinfo.mtime, tarinfo.mtime))
1538 except EnvironmentError, e:
1539 raise ExtractError, "could not change modification time"
1540
1541 #--------------------------------------------------------------------------
1542
1543 def next(self):
1544 """Return the next member of the archive as a TarInfo object, when
1545 TarFile is opened for reading. Return None if there is no more
1546 available.
1547 """
1548 self._check("ra")
1549 if self.firstmember is not None:
1550 m = self.firstmember
1551 self.firstmember = None
1552 return m
1553
1554 # Read the next block.
Andrew M. Kuchling864bba12004-07-10 22:02:11 +00001555 self.fileobj.seek(self.offset)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001556 while True:
1557 buf = self.fileobj.read(BLOCKSIZE)
1558 if not buf:
1559 return None
1560 try:
1561 tarinfo = TarInfo.frombuf(buf)
1562 except ValueError:
1563 if self.ignore_zeros:
1564 if buf.count(NUL) == BLOCKSIZE:
1565 adj = "empty"
1566 else:
1567 adj = "invalid"
1568 self._dbg(2, "0x%X: %s block" % (self.offset, adj))
1569 self.offset += BLOCKSIZE
1570 continue
1571 else:
1572 # Block is empty or unreadable.
Andrew M. Kuchling864bba12004-07-10 22:02:11 +00001573 if self.offset == 0:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001574 # If the first block is invalid. That does not
1575 # look like a tar archive we can handle.
1576 raise ReadError,"empty, unreadable or compressed file"
1577 return None
1578 break
1579
1580 # We shouldn't rely on this checksum, because some tar programs
1581 # calculate it differently and it is merely validating the
1582 # header block. We could just as well skip this part, which would
1583 # have a slight effect on performance...
1584 if tarinfo.chksum != calc_chksum(buf):
1585 self._dbg(1, "tarfile: Bad Checksum %r" % tarinfo.name)
1586
1587 # Set the TarInfo object's offset to the current position of the
1588 # TarFile and set self.offset to the position where the data blocks
1589 # should begin.
1590 tarinfo.offset = self.offset
1591 self.offset += BLOCKSIZE
1592
1593 # Check if the TarInfo object has a typeflag for which a callback
1594 # method is registered in the TYPE_METH. If so, then call it.
1595 if tarinfo.type in self.TYPE_METH:
Andrew M. Kuchling864bba12004-07-10 22:02:11 +00001596 return self.TYPE_METH[tarinfo.type](self, tarinfo)
1597
1598 tarinfo.offset_data = self.offset
1599 if tarinfo.isreg() or tarinfo.type not in SUPPORTED_TYPES:
1600 # Skip the following data blocks.
1601 self.offset += self._block(tarinfo.size)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001602
1603 if tarinfo.isreg() and tarinfo.name[:-1] == "/":
1604 # some old tar programs don't know DIRTYPE
1605 tarinfo.type = DIRTYPE
1606
Andrew M. Kuchling864bba12004-07-10 22:02:11 +00001607 self._record_member(tarinfo)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001608 return tarinfo
1609
1610 #--------------------------------------------------------------------------
1611 # Below are some methods which are called for special typeflags in the
1612 # next() method, e.g. for unwrapping GNU longname/longlink blocks. They
1613 # are registered in TYPE_METH below. You can register your own methods
1614 # with this mapping.
1615 # A registered method is called with a TarInfo object as only argument.
1616 #
1617 # During its execution the method MUST perform the following tasks:
1618 # 1. set tarinfo.offset_data to the position where the data blocks begin,
1619 # if there is data to follow.
1620 # 2. set self.offset to the position where the next member's header will
1621 # begin.
Andrew M. Kuchling864bba12004-07-10 22:02:11 +00001622 # 3. call self._record_member() if the tarinfo object is supposed to
1623 # appear as a member of the TarFile object.
1624 # 4. return tarinfo or another valid TarInfo object.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001625
1626 def proc_gnulong(self, tarinfo):
1627 """Evaluate the blocks that hold a GNU longname
1628 or longlink member.
1629 """
1630 buf = ""
1631 name = None
1632 linkname = None
1633 count = tarinfo.size
1634 while count > 0:
1635 block = self.fileobj.read(BLOCKSIZE)
1636 buf += block
1637 self.offset += BLOCKSIZE
1638 count -= BLOCKSIZE
1639
Andrew M. Kuchling864bba12004-07-10 22:02:11 +00001640 # Fetch the next header
1641 next = self.next()
1642
1643 next.offset = tarinfo.offset
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001644 if tarinfo.type == GNUTYPE_LONGNAME:
Andrew M. Kuchling864bba12004-07-10 22:02:11 +00001645 next.name = nts(buf)
1646 elif tarinfo.type == GNUTYPE_LONGLINK:
1647 next.linkname = nts(buf)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001648
Andrew M. Kuchling864bba12004-07-10 22:02:11 +00001649 return next
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001650
1651 def proc_sparse(self, tarinfo):
1652 """Analyze a GNU sparse header plus extra headers.
1653 """
1654 buf = tarinfo.tobuf()
1655 sp = _ringbuffer()
1656 pos = 386
1657 lastpos = 0L
1658 realpos = 0L
1659 # There are 4 possible sparse structs in the
1660 # first header.
1661 for i in xrange(4):
1662 try:
1663 offset = int(buf[pos:pos + 12], 8)
1664 numbytes = int(buf[pos + 12:pos + 24], 8)
1665 except ValueError:
1666 break
1667 if offset > lastpos:
1668 sp.append(_hole(lastpos, offset - lastpos))
1669 sp.append(_data(offset, numbytes, realpos))
1670 realpos += numbytes
1671 lastpos = offset + numbytes
1672 pos += 24
1673
1674 isextended = ord(buf[482])
1675 origsize = int(buf[483:495], 8)
1676
1677 # If the isextended flag is given,
1678 # there are extra headers to process.
1679 while isextended == 1:
1680 buf = self.fileobj.read(BLOCKSIZE)
1681 self.offset += BLOCKSIZE
1682 pos = 0
1683 for i in xrange(21):
1684 try:
1685 offset = int(buf[pos:pos + 12], 8)
1686 numbytes = int(buf[pos + 12:pos + 24], 8)
1687 except ValueError:
1688 break
1689 if offset > lastpos:
1690 sp.append(_hole(lastpos, offset - lastpos))
1691 sp.append(_data(offset, numbytes, realpos))
1692 realpos += numbytes
1693 lastpos = offset + numbytes
1694 pos += 24
1695 isextended = ord(buf[504])
1696
1697 if lastpos < origsize:
1698 sp.append(_hole(lastpos, origsize - lastpos))
1699
1700 tarinfo.sparse = sp
1701
1702 tarinfo.offset_data = self.offset
1703 self.offset += self._block(tarinfo.size)
1704 tarinfo.size = origsize
Andrew M. Kuchling864bba12004-07-10 22:02:11 +00001705
1706 self._record_member(tarinfo)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001707 return tarinfo
1708
1709 # The type mapping for the next() method. The keys are single character
1710 # strings, the typeflag. The values are methods which are called when
1711 # next() encounters such a typeflag.
1712 TYPE_METH = {
1713 GNUTYPE_LONGNAME: proc_gnulong,
1714 GNUTYPE_LONGLINK: proc_gnulong,
1715 GNUTYPE_SPARSE: proc_sparse
1716 }
1717
1718 #--------------------------------------------------------------------------
1719 # Little helper methods:
1720
1721 def _block(self, count):
1722 """Round up a byte count by BLOCKSIZE and return it,
1723 e.g. _block(834) => 1024.
1724 """
1725 blocks, remainder = divmod(count, BLOCKSIZE)
1726 if remainder:
1727 blocks += 1
1728 return blocks * BLOCKSIZE
1729
1730 def _getmember(self, name, tarinfo=None):
1731 """Find an archive member by name from bottom to top.
1732 If tarinfo is given, it is used as the starting point.
1733 """
1734 if tarinfo is None:
1735 end = len(self.members)
1736 else:
1737 end = self.members.index(tarinfo)
1738
1739 for i in xrange(end - 1, -1, -1):
1740 if name == self.membernames[i]:
1741 return self.members[i]
1742
Andrew M. Kuchling864bba12004-07-10 22:02:11 +00001743 def _record_member(self, tarinfo):
1744 """Record a tarinfo object in the internal datastructures.
1745 """
1746 self.members.append(tarinfo)
1747 self.membernames.append(tarinfo.name)
1748
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001749 def _load(self):
1750 """Read through the entire archive file and look for readable
1751 members.
1752 """
1753 while True:
1754 tarinfo = self.next()
1755 if tarinfo is None:
1756 break
1757 self._loaded = True
1758
1759 def _check(self, mode=None):
1760 """Check if TarFile is still open, and if the operation's mode
1761 corresponds to TarFile's mode.
1762 """
1763 if self.closed:
1764 raise IOError, "%s is closed" % self.__class__.__name__
1765 if mode is not None and self._mode not in mode:
1766 raise IOError, "bad operation for mode %r" % self._mode
1767
1768 def __iter__(self):
1769 """Provide an iterator object.
1770 """
1771 if self._loaded:
1772 return iter(self.members)
1773 else:
1774 return TarIter(self)
1775
1776 def _create_gnulong(self, name, type):
1777 """Write a GNU longname/longlink member to the TarFile.
1778 It consists of an extended tar header, with the length
1779 of the longname as size, followed by data blocks,
1780 which contain the longname as a null terminated string.
1781 """
Neal Norwitz0662f8a2004-07-20 21:54:18 +00001782 name += NUL
1783
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001784 tarinfo = TarInfo()
1785 tarinfo.name = "././@LongLink"
1786 tarinfo.type = type
1787 tarinfo.mode = 0
1788 tarinfo.size = len(name)
1789
1790 # write extended header
1791 self.fileobj.write(tarinfo.tobuf())
Neal Norwitz0662f8a2004-07-20 21:54:18 +00001792 self.offset += BLOCKSIZE
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001793 # write name blocks
1794 self.fileobj.write(name)
1795 blocks, remainder = divmod(tarinfo.size, BLOCKSIZE)
1796 if remainder > 0:
1797 self.fileobj.write(NUL * (BLOCKSIZE - remainder))
1798 blocks += 1
1799 self.offset += blocks * BLOCKSIZE
1800
1801 def _dbg(self, level, msg):
1802 """Write debugging output to sys.stderr.
1803 """
1804 if level <= self.debug:
1805 print >> sys.stderr, msg
1806# class TarFile
1807
1808class TarIter:
1809 """Iterator Class.
1810
1811 for tarinfo in TarFile(...):
1812 suite...
1813 """
1814
1815 def __init__(self, tarfile):
1816 """Construct a TarIter object.
1817 """
1818 self.tarfile = tarfile
1819 def __iter__(self):
1820 """Return iterator object.
1821 """
1822 return self
1823 def next(self):
1824 """Return the next item using TarFile's next() method.
1825 When all members have been read, set TarFile as _loaded.
1826 """
1827 tarinfo = self.tarfile.next()
1828 if not tarinfo:
1829 self.tarfile._loaded = True
1830 raise StopIteration
1831 return tarinfo
1832
1833# Helper classes for sparse file support
1834class _section:
1835 """Base class for _data and _hole.
1836 """
1837 def __init__(self, offset, size):
1838 self.offset = offset
1839 self.size = size
1840 def __contains__(self, offset):
1841 return self.offset <= offset < self.offset + self.size
1842
1843class _data(_section):
1844 """Represent a data section in a sparse file.
1845 """
1846 def __init__(self, offset, size, realpos):
1847 _section.__init__(self, offset, size)
1848 self.realpos = realpos
1849
1850class _hole(_section):
1851 """Represent a hole section in a sparse file.
1852 """
1853 pass
1854
1855class _ringbuffer(list):
1856 """Ringbuffer class which increases performance
1857 over a regular list.
1858 """
1859 def __init__(self):
1860 self.idx = 0
1861 def find(self, offset):
1862 idx = self.idx
1863 while True:
1864 item = self[idx]
1865 if offset in item:
1866 break
1867 idx += 1
1868 if idx == len(self):
1869 idx = 0
1870 if idx == self.idx:
1871 # End of File
1872 return None
1873 self.idx = idx
1874 return item
1875
1876#---------------------------------------------
1877# zipfile compatible TarFile class
1878#---------------------------------------------
1879TAR_PLAIN = 0 # zipfile.ZIP_STORED
1880TAR_GZIPPED = 8 # zipfile.ZIP_DEFLATED
1881class TarFileCompat:
1882 """TarFile class compatible with standard module zipfile's
1883 ZipFile class.
1884 """
1885 def __init__(self, file, mode="r", compression=TAR_PLAIN):
1886 if compression == TAR_PLAIN:
1887 self.tarfile = TarFile.taropen(file, mode)
1888 elif compression == TAR_GZIPPED:
1889 self.tarfile = TarFile.gzopen(file, mode)
1890 else:
1891 raise ValueError, "unknown compression constant"
1892 if mode[0:1] == "r":
1893 members = self.tarfile.getmembers()
1894 for i in xrange(len(members)):
1895 m = members[i]
1896 m.filename = m.name
1897 m.file_size = m.size
1898 m.date_time = time.gmtime(m.mtime)[:6]
1899 def namelist(self):
1900 return map(lambda m: m.name, self.infolist())
1901 def infolist(self):
1902 return filter(lambda m: m.type in REGULAR_TYPES,
1903 self.tarfile.getmembers())
1904 def printdir(self):
1905 self.tarfile.list()
1906 def testzip(self):
1907 return
1908 def getinfo(self, name):
1909 return self.tarfile.getmember(name)
1910 def read(self, name):
1911 return self.tarfile.extractfile(self.tarfile.getmember(name)).read()
1912 def write(self, filename, arcname=None, compress_type=None):
1913 self.tarfile.add(filename, arcname)
1914 def writestr(self, zinfo, bytes):
1915 import StringIO
1916 import calendar
1917 zinfo.name = zinfo.filename
1918 zinfo.size = zinfo.file_size
1919 zinfo.mtime = calendar.timegm(zinfo.date_time)
1920 self.tarfile.addfile(zinfo, StringIO.StringIO(bytes))
1921 def close(self):
1922 self.tarfile.close()
1923#class TarFileCompat
1924
1925#--------------------
1926# exported functions
1927#--------------------
1928def is_tarfile(name):
1929 """Return True if name points to a tar archive that we
1930 are able to handle, else return False.
1931 """
1932 try:
1933 t = open(name)
1934 t.close()
1935 return True
1936 except TarError:
1937 return False
1938
1939open = TarFile.open