blob: 41257f1eb9993a687e31c4bd1804b2a3972a7a99 [file] [log] [blame]
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001#!/usr/bin/env python
2# -*- coding: iso-8859-1 -*-
3#-------------------------------------------------------------------
4# tarfile.py
5#-------------------------------------------------------------------
6# Copyright (C) 2002 Lars Gustäbel <lars@gustaebel.de>
7# All rights reserved.
8#
9# Permission is hereby granted, free of charge, to any person
10# obtaining a copy of this software and associated documentation
11# files (the "Software"), to deal in the Software without
12# restriction, including without limitation the rights to use,
13# copy, modify, merge, publish, distribute, sublicense, and/or sell
14# copies of the Software, and to permit persons to whom the
15# Software is furnished to do so, subject to the following
16# conditions:
17#
18# The above copyright notice and this permission notice shall be
19# included in all copies or substantial portions of the Software.
20#
21# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
22# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
23# OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
24# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
25# HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
26# WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
27# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
28# OTHER DEALINGS IN THE SOFTWARE.
29#
30"""Read from and write to tar format archives.
31"""
32
33__version__ = "$Revision$"
34# $Source$
35
36version = "0.6.4"
37__author__ = "Lars Gustäbel (lars@gustaebel.de)"
38__date__ = "$Date$"
39__cvsid__ = "$Id$"
40__credits__ = "Gustavo Niemeyer, Niels Gustäbel, Richard Townsend."
41
42#---------
43# Imports
44#---------
45import sys
46import os
47import shutil
48import stat
49import errno
50import time
51import struct
52
Jack Jansencfc49022003-03-07 13:37:32 +000053if sys.platform == 'mac':
54 # This module needs work for MacOS9, especially in the area of pathname
55 # handling. In many places it is assumed a simple substitution of / by the
56 # local os.path.sep is good enough to convert pathnames, but this does not
57 # work with the mac rooted:path:name versus :nonrooted:path:name syntax
58 raise ImportError, "tarfile does not work for platform==mac"
59
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +000060try:
61 import grp, pwd
62except ImportError:
63 grp = pwd = None
64
65# from tarfile import *
66__all__ = ["TarFile", "TarInfo", "is_tarfile", "TarError"]
67
68#---------------------------------------------------------
69# tar constants
70#---------------------------------------------------------
71NUL = "\0" # the null character
72BLOCKSIZE = 512 # length of processing blocks
73RECORDSIZE = BLOCKSIZE * 20 # length of records
74MAGIC = "ustar" # magic tar string
75VERSION = "00" # version number
76
77LENGTH_NAME = 100 # maximum length of a filename
78LENGTH_LINK = 100 # maximum length of a linkname
79LENGTH_PREFIX = 155 # maximum length of the prefix field
80MAXSIZE_MEMBER = 077777777777L # maximum size of a file (11 octal digits)
81
82REGTYPE = "0" # regular file
83AREGTYPE = "\0" # regular file
84LNKTYPE = "1" # link (inside tarfile)
85SYMTYPE = "2" # symbolic link
86CHRTYPE = "3" # character special device
87BLKTYPE = "4" # block special device
88DIRTYPE = "5" # directory
89FIFOTYPE = "6" # fifo special device
90CONTTYPE = "7" # contiguous file
91
92GNUTYPE_LONGNAME = "L" # GNU tar extension for longnames
93GNUTYPE_LONGLINK = "K" # GNU tar extension for longlink
94GNUTYPE_SPARSE = "S" # GNU tar extension for sparse file
95
96#---------------------------------------------------------
97# tarfile constants
98#---------------------------------------------------------
99SUPPORTED_TYPES = (REGTYPE, AREGTYPE, LNKTYPE, # file types that tarfile
100 SYMTYPE, DIRTYPE, FIFOTYPE, # can cope with.
101 CONTTYPE, CHRTYPE, BLKTYPE,
102 GNUTYPE_LONGNAME, GNUTYPE_LONGLINK,
103 GNUTYPE_SPARSE)
104
105REGULAR_TYPES = (REGTYPE, AREGTYPE, # file types that somehow
106 CONTTYPE, GNUTYPE_SPARSE) # represent regular files
107
108#---------------------------------------------------------
109# Bits used in the mode field, values in octal.
110#---------------------------------------------------------
111S_IFLNK = 0120000 # symbolic link
112S_IFREG = 0100000 # regular file
113S_IFBLK = 0060000 # block device
114S_IFDIR = 0040000 # directory
115S_IFCHR = 0020000 # character device
116S_IFIFO = 0010000 # fifo
117
118TSUID = 04000 # set UID on execution
119TSGID = 02000 # set GID on execution
120TSVTX = 01000 # reserved
121
122TUREAD = 0400 # read by owner
123TUWRITE = 0200 # write by owner
124TUEXEC = 0100 # execute/search by owner
125TGREAD = 0040 # read by group
126TGWRITE = 0020 # write by group
127TGEXEC = 0010 # execute/search by group
128TOREAD = 0004 # read by other
129TOWRITE = 0002 # write by other
130TOEXEC = 0001 # execute/search by other
131
132#---------------------------------------------------------
133# Some useful functions
134#---------------------------------------------------------
135def nts(s):
136 """Convert a null-terminated string buffer to a python string.
137 """
Andrew M. Kuchling864bba12004-07-10 22:02:11 +0000138 return s.rstrip(NUL)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000139
140def calc_chksum(buf):
141 """Calculate the checksum for a member's header. It's a simple addition
142 of all bytes, treating the chksum field as if filled with spaces.
143 buf is a 512 byte long string buffer which holds the header.
144 """
145 chk = 256 # chksum field is treated as blanks,
146 # so the initial value is 8 * ord(" ")
147 for c in buf[:148]: chk += ord(c) # sum up all bytes before chksum
148 for c in buf[156:]: chk += ord(c) # sum up all bytes after chksum
149 return chk
150
151def copyfileobj(src, dst, length=None):
152 """Copy length bytes from fileobj src to fileobj dst.
153 If length is None, copy the entire content.
154 """
155 if length == 0:
156 return
157 if length is None:
158 shutil.copyfileobj(src, dst)
159 return
160
161 BUFSIZE = 16 * 1024
162 blocks, remainder = divmod(length, BUFSIZE)
163 for b in xrange(blocks):
164 buf = src.read(BUFSIZE)
165 if len(buf) < BUFSIZE:
166 raise IOError, "end of file reached"
167 dst.write(buf)
168
169 if remainder != 0:
170 buf = src.read(remainder)
171 if len(buf) < remainder:
172 raise IOError, "end of file reached"
173 dst.write(buf)
174 return
175
176filemode_table = (
177 (S_IFLNK, "l",
178 S_IFREG, "-",
179 S_IFBLK, "b",
180 S_IFDIR, "d",
181 S_IFCHR, "c",
182 S_IFIFO, "p"),
183 (TUREAD, "r"),
184 (TUWRITE, "w"),
185 (TUEXEC, "x", TSUID, "S", TUEXEC|TSUID, "s"),
186 (TGREAD, "r"),
187 (TGWRITE, "w"),
188 (TGEXEC, "x", TSGID, "S", TGEXEC|TSGID, "s"),
189 (TOREAD, "r"),
190 (TOWRITE, "w"),
191 (TOEXEC, "x", TSVTX, "T", TOEXEC|TSVTX, "t"))
192
193def filemode(mode):
194 """Convert a file's mode to a string of the form
195 -rwxrwxrwx.
196 Used by TarFile.list()
197 """
198 s = ""
199 for t in filemode_table:
200 while True:
201 if mode & t[0] == t[0]:
202 s += t[1]
203 elif len(t) > 2:
204 t = t[2:]
205 continue
206 else:
207 s += "-"
208 break
209 return s
210
211if os.sep != "/":
212 normpath = lambda path: os.path.normpath(path).replace(os.sep, "/")
213else:
214 normpath = os.path.normpath
215
216class TarError(Exception):
217 """Base exception."""
218 pass
219class ExtractError(TarError):
220 """General exception for extract errors."""
221 pass
222class ReadError(TarError):
223 """Exception for unreadble tar archives."""
224 pass
225class CompressionError(TarError):
226 """Exception for unavailable compression methods."""
227 pass
228class StreamError(TarError):
229 """Exception for unsupported operations on stream-like TarFiles."""
230 pass
231
232#---------------------------
233# internal stream interface
234#---------------------------
235class _LowLevelFile:
236 """Low-level file object. Supports reading and writing.
237 It is used instead of a regular file object for streaming
238 access.
239 """
240
241 def __init__(self, name, mode):
242 mode = {
243 "r": os.O_RDONLY,
244 "w": os.O_WRONLY | os.O_CREAT | os.O_TRUNC,
245 }[mode]
246 if hasattr(os, "O_BINARY"):
247 mode |= os.O_BINARY
248 self.fd = os.open(name, mode)
249
250 def close(self):
251 os.close(self.fd)
252
253 def read(self, size):
254 return os.read(self.fd, size)
255
256 def write(self, s):
257 os.write(self.fd, s)
258
259class _Stream:
260 """Class that serves as an adapter between TarFile and
261 a stream-like object. The stream-like object only
262 needs to have a read() or write() method and is accessed
263 blockwise. Use of gzip or bzip2 compression is possible.
264 A stream-like object could be for example: sys.stdin,
265 sys.stdout, a socket, a tape device etc.
266
267 _Stream is intended to be used only internally.
268 """
269
270 def __init__(self, name, mode, type, fileobj, bufsize):
271 """Construct a _Stream object.
272 """
273 self._extfileobj = True
274 if fileobj is None:
275 fileobj = _LowLevelFile(name, mode)
276 self._extfileobj = False
277
278 self.name = name or ""
279 self.mode = mode
280 self.type = type
281 self.fileobj = fileobj
282 self.bufsize = bufsize
283 self.buf = ""
284 self.pos = 0L
285 self.closed = False
286
287 if type == "gz":
288 try:
289 import zlib
290 except ImportError:
291 raise CompressionError, "zlib module is not available"
292 self.zlib = zlib
293 self.crc = zlib.crc32("")
294 if mode == "r":
295 self._init_read_gz()
296 else:
297 self._init_write_gz()
298
299 if type == "bz2":
300 try:
301 import bz2
302 except ImportError:
303 raise CompressionError, "bz2 module is not available"
304 if mode == "r":
305 self.dbuf = ""
306 self.cmp = bz2.BZ2Decompressor()
307 else:
308 self.cmp = bz2.BZ2Compressor()
309
310 def __del__(self):
311 if not self.closed:
312 self.close()
313
314 def _init_write_gz(self):
315 """Initialize for writing with gzip compression.
316 """
317 self.cmp = self.zlib.compressobj(9, self.zlib.DEFLATED,
318 -self.zlib.MAX_WBITS,
319 self.zlib.DEF_MEM_LEVEL,
320 0)
321 timestamp = struct.pack("<L", long(time.time()))
322 self.__write("\037\213\010\010%s\002\377" % timestamp)
323 if self.name.endswith(".gz"):
324 self.name = self.name[:-3]
325 self.__write(self.name + NUL)
326
327 def write(self, s):
328 """Write string s to the stream.
329 """
330 if self.type == "gz":
331 self.crc = self.zlib.crc32(s, self.crc)
332 self.pos += len(s)
333 if self.type != "tar":
334 s = self.cmp.compress(s)
335 self.__write(s)
336
337 def __write(self, s):
338 """Write string s to the stream if a whole new block
339 is ready to be written.
340 """
341 self.buf += s
342 while len(self.buf) > self.bufsize:
343 self.fileobj.write(self.buf[:self.bufsize])
344 self.buf = self.buf[self.bufsize:]
345
346 def close(self):
347 """Close the _Stream object. No operation should be
348 done on it afterwards.
349 """
350 if self.closed:
351 return
352
353 if self.mode == "w" and self.buf:
354 if self.type != "tar":
355 self.buf += self.cmp.flush()
Andrew M. Kuchling6e4f7a82004-01-02 15:44:29 +0000356 self.__write("") # Write remaining blocks to output
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000357 self.fileobj.write(self.buf)
358 self.buf = ""
359 if self.type == "gz":
360 self.fileobj.write(struct.pack("<l", self.crc))
Andrew M. Kuchling10a44492003-10-24 17:38:34 +0000361 self.fileobj.write(struct.pack("<L", self.pos & 0xffffFFFFL))
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000362
363 if not self._extfileobj:
364 self.fileobj.close()
365
366 self.closed = True
367
368 def _init_read_gz(self):
369 """Initialize for reading a gzip compressed fileobj.
370 """
371 self.cmp = self.zlib.decompressobj(-self.zlib.MAX_WBITS)
372 self.dbuf = ""
373
374 # taken from gzip.GzipFile with some alterations
375 if self.__read(2) != "\037\213":
376 raise ReadError, "not a gzip file"
377 if self.__read(1) != "\010":
378 raise CompressionError, "unsupported compression method"
379
380 flag = ord(self.__read(1))
381 self.__read(6)
382
383 if flag & 4:
384 xlen = ord(self.__read(1)) + 256 * ord(self.__read(1))
385 self.read(xlen)
386 if flag & 8:
387 while True:
388 s = self.__read(1)
389 if not s or s == NUL:
390 break
391 if flag & 16:
392 while True:
393 s = self.__read(1)
394 if not s or s == NUL:
395 break
396 if flag & 2:
397 self.__read(2)
398
399 def tell(self):
400 """Return the stream's file pointer position.
401 """
402 return self.pos
403
404 def seek(self, pos=0):
405 """Set the stream's file pointer to pos. Negative seeking
406 is forbidden.
407 """
408 if pos - self.pos >= 0:
409 blocks, remainder = divmod(pos - self.pos, self.bufsize)
410 for i in xrange(blocks):
411 self.read(self.bufsize)
412 self.read(remainder)
413 else:
414 raise StreamError, "seeking backwards is not allowed"
415 return self.pos
416
417 def read(self, size=None):
418 """Return the next size number of bytes from the stream.
419 If size is not defined, return all bytes of the stream
420 up to EOF.
421 """
422 if size is None:
423 t = []
424 while True:
425 buf = self._read(self.bufsize)
426 if not buf:
427 break
428 t.append(buf)
429 buf = "".join(t)
430 else:
431 buf = self._read(size)
432 self.pos += len(buf)
433 return buf
434
435 def _read(self, size):
436 """Return size bytes from the stream.
437 """
438 if self.type == "tar":
439 return self.__read(size)
440
441 c = len(self.dbuf)
442 t = [self.dbuf]
443 while c < size:
444 buf = self.__read(self.bufsize)
445 if not buf:
446 break
447 buf = self.cmp.decompress(buf)
448 t.append(buf)
449 c += len(buf)
450 t = "".join(t)
451 self.dbuf = t[size:]
452 return t[:size]
453
454 def __read(self, size):
455 """Return size bytes from stream. If internal buffer is empty,
456 read another block from the stream.
457 """
458 c = len(self.buf)
459 t = [self.buf]
460 while c < size:
461 buf = self.fileobj.read(self.bufsize)
462 if not buf:
463 break
464 t.append(buf)
465 c += len(buf)
466 t = "".join(t)
467 self.buf = t[size:]
468 return t[:size]
469# class _Stream
470
471#------------------------
472# Extraction file object
473#------------------------
474class ExFileObject(object):
475 """File-like object for reading an archive member.
476 Is returned by TarFile.extractfile(). Support for
477 sparse files included.
478 """
479
480 def __init__(self, tarfile, tarinfo):
481 self.fileobj = tarfile.fileobj
482 self.name = tarinfo.name
483 self.mode = "r"
484 self.closed = False
485 self.offset = tarinfo.offset_data
486 self.size = tarinfo.size
487 self.pos = 0L
488 self.linebuffer = ""
489 if tarinfo.issparse():
490 self.sparse = tarinfo.sparse
491 self.read = self._readsparse
492 else:
493 self.read = self._readnormal
494
495 def __read(self, size):
496 """Overloadable read method.
497 """
498 return self.fileobj.read(size)
499
500 def readline(self, size=-1):
501 """Read a line with approx. size. If size is negative,
502 read a whole line. readline() and read() must not
503 be mixed up (!).
504 """
505 if size < 0:
506 size = sys.maxint
507
508 nl = self.linebuffer.find("\n")
509 if nl >= 0:
510 nl = min(nl, size)
511 else:
512 size -= len(self.linebuffer)
513 while nl < 0:
514 buf = self.read(min(size, 100))
515 if not buf:
516 break
517 self.linebuffer += buf
518 size -= len(buf)
519 if size <= 0:
520 break
521 nl = self.linebuffer.find("\n")
522 if nl == -1:
523 s = self.linebuffer
524 self.linebuffer = ""
525 return s
526 buf = self.linebuffer[:nl]
527 self.linebuffer = self.linebuffer[nl + 1:]
528 while buf[-1:] == "\r":
529 buf = buf[:-1]
530 return buf + "\n"
531
532 def readlines(self):
533 """Return a list with all (following) lines.
534 """
535 result = []
536 while True:
537 line = self.readline()
538 if not line: break
539 result.append(line)
540 return result
541
542 def _readnormal(self, size=None):
543 """Read operation for regular files.
544 """
545 if self.closed:
546 raise ValueError, "file is closed"
547 self.fileobj.seek(self.offset + self.pos)
548 bytesleft = self.size - self.pos
549 if size is None:
550 bytestoread = bytesleft
551 else:
552 bytestoread = min(size, bytesleft)
553 self.pos += bytestoread
554 return self.__read(bytestoread)
555
556 def _readsparse(self, size=None):
557 """Read operation for sparse files.
558 """
559 if self.closed:
560 raise ValueError, "file is closed"
561
562 if size is None:
563 size = self.size - self.pos
564
565 data = []
566 while size > 0:
567 buf = self._readsparsesection(size)
568 if not buf:
569 break
570 size -= len(buf)
571 data.append(buf)
572 return "".join(data)
573
574 def _readsparsesection(self, size):
575 """Read a single section of a sparse file.
576 """
577 section = self.sparse.find(self.pos)
578
579 if section is None:
580 return ""
581
582 toread = min(size, section.offset + section.size - self.pos)
583 if isinstance(section, _data):
584 realpos = section.realpos + self.pos - section.offset
585 self.pos += toread
586 self.fileobj.seek(self.offset + realpos)
587 return self.__read(toread)
588 else:
589 self.pos += toread
590 return NUL * toread
591
592 def tell(self):
593 """Return the current file position.
594 """
595 return self.pos
596
597 def seek(self, pos, whence=0):
598 """Seek to a position in the file.
599 """
600 self.linebuffer = ""
601 if whence == 0:
602 self.pos = min(max(pos, 0), self.size)
603 if whence == 1:
604 if pos < 0:
605 self.pos = max(self.pos + pos, 0)
606 else:
607 self.pos = min(self.pos + pos, self.size)
608 if whence == 2:
609 self.pos = max(min(self.size + pos, self.size), 0)
610
611 def close(self):
612 """Close the file object.
613 """
614 self.closed = True
615#class ExFileObject
616
617#------------------
618# Exported Classes
619#------------------
620class TarInfo(object):
621 """Informational class which holds the details about an
622 archive member given by a tar header block.
623 TarInfo objects are returned by TarFile.getmember(),
624 TarFile.getmembers() and TarFile.gettarinfo() and are
625 usually created internally.
626 """
627
628 def __init__(self, name=""):
629 """Construct a TarInfo object. name is the optional name
630 of the member.
631 """
632
633 self.name = name # member name (dirnames must end with '/')
634 self.mode = 0666 # file permissions
635 self.uid = 0 # user id
636 self.gid = 0 # group id
637 self.size = 0 # file size
638 self.mtime = 0 # modification time
639 self.chksum = 0 # header checksum
640 self.type = REGTYPE # member type
641 self.linkname = "" # link name
642 self.uname = "user" # user name
643 self.gname = "group" # group name
644 self.devmajor = 0 #-
645 self.devminor = 0 #-for use with CHRTYPE and BLKTYPE
646 self.prefix = "" # prefix to filename or holding information
647 # about sparse files
648
649 self.offset = 0 # the tar header starts here
650 self.offset_data = 0 # the file's data starts here
651
652 def __repr__(self):
653 return "<%s %r at %#x>" % (self.__class__.__name__,self.name,id(self))
654
655 def frombuf(cls, buf):
656 """Construct a TarInfo object from a 512 byte string buffer.
657 """
658 tarinfo = cls()
659 tarinfo.name = nts(buf[0:100])
660 tarinfo.mode = int(buf[100:108], 8)
661 tarinfo.uid = int(buf[108:116],8)
662 tarinfo.gid = int(buf[116:124],8)
663 tarinfo.size = long(buf[124:136], 8)
664 tarinfo.mtime = long(buf[136:148], 8)
665 tarinfo.chksum = int(buf[148:156], 8)
666 tarinfo.type = buf[156:157]
667 tarinfo.linkname = nts(buf[157:257])
668 tarinfo.uname = nts(buf[265:297])
669 tarinfo.gname = nts(buf[297:329])
670 try:
671 tarinfo.devmajor = int(buf[329:337], 8)
672 tarinfo.devminor = int(buf[337:345], 8)
673 except ValueError:
674 tarinfo.devmajor = tarinfo.devmajor = 0
Jack Jansen2b4b5a52003-04-22 22:03:11 +0000675 tarinfo.prefix = buf[345:500]
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000676
677 # The prefix field is used for filenames > 100 in
678 # the POSIX standard.
Jack Jansen2b4b5a52003-04-22 22:03:11 +0000679 # name = prefix + '/' + name
680 if tarinfo.type != GNUTYPE_SPARSE:
681 tarinfo.name = normpath(os.path.join(nts(tarinfo.prefix), tarinfo.name))
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000682
683 # Directory names should have a '/' at the end.
684 if tarinfo.isdir() and tarinfo.name[-1:] != "/":
685 tarinfo.name += "/"
686 return tarinfo
687
688 frombuf = classmethod(frombuf)
689
690 def tobuf(self):
691 """Return a tar header block as a 512 byte string.
692 """
693 name = self.name
694
695 # The following code was contributed by Detlef Lannert.
696 parts = []
697 for value, fieldsize in (
698 (name, 100),
699 ("%07o" % (self.mode & 07777), 8),
700 ("%07o" % self.uid, 8),
701 ("%07o" % self.gid, 8),
702 ("%011o" % self.size, 12),
703 ("%011o" % self.mtime, 12),
704 (" ", 8),
705 (self.type, 1),
706 (self.linkname, 100),
707 (MAGIC, 6),
708 (VERSION, 2),
709 (self.uname, 32),
710 (self.gname, 32),
711 ("%07o" % self.devmajor, 8),
712 ("%07o" % self.devminor, 8),
713 (self.prefix, 155)
714 ):
715 l = len(value)
Andrew M. Kuchling864bba12004-07-10 22:02:11 +0000716 parts.append(value[:fieldsize] + (fieldsize - l) * NUL)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000717
718 buf = "".join(parts)
719 chksum = calc_chksum(buf)
720 buf = buf[:148] + "%06o\0" % chksum + buf[155:]
721 buf += (BLOCKSIZE - len(buf)) * NUL
722 self.buf = buf
723 return buf
724
725 def isreg(self):
726 return self.type in REGULAR_TYPES
727 def isfile(self):
728 return self.isreg()
729 def isdir(self):
730 return self.type == DIRTYPE
731 def issym(self):
732 return self.type == SYMTYPE
733 def islnk(self):
734 return self.type == LNKTYPE
735 def ischr(self):
736 return self.type == CHRTYPE
737 def isblk(self):
738 return self.type == BLKTYPE
739 def isfifo(self):
740 return self.type == FIFOTYPE
741 def issparse(self):
742 return self.type == GNUTYPE_SPARSE
743 def isdev(self):
744 return self.type in (CHRTYPE, BLKTYPE, FIFOTYPE)
745# class TarInfo
746
747class TarFile(object):
748 """The TarFile Class provides an interface to tar archives.
749 """
750
751 debug = 0 # May be set from 0 (no msgs) to 3 (all msgs)
752
753 dereference = False # If true, add content of linked file to the
754 # tar file, else the link.
755
756 ignore_zeros = False # If true, skips empty or invalid blocks and
757 # continues processing.
758
759 errorlevel = 0 # If 0, fatal errors only appear in debug
760 # messages (if debug >= 0). If > 0, errors
761 # are passed to the caller as exceptions.
762
763 posix = True # If True, generates POSIX.1-1990-compliant
764 # archives (no GNU extensions!)
765
766 fileobject = ExFileObject
767
768 def __init__(self, name=None, mode="r", fileobj=None):
769 """Open an (uncompressed) tar archive `name'. `mode' is either 'r' to
770 read from an existing archive, 'a' to append data to an existing
771 file or 'w' to create a new file overwriting an existing one. `mode'
772 defaults to 'r'.
773 If `fileobj' is given, it is used for reading or writing data. If it
774 can be determined, `mode' is overridden by `fileobj's mode.
775 `fileobj' is not closed, when TarFile is closed.
776 """
777 self.name = name
778
779 if len(mode) > 1 or mode not in "raw":
780 raise ValueError, "mode must be 'r', 'a' or 'w'"
781 self._mode = mode
782 self.mode = {"r": "rb", "a": "r+b", "w": "wb"}[mode]
783
784 if not fileobj:
785 fileobj = file(self.name, self.mode)
786 self._extfileobj = False
787 else:
788 if self.name is None and hasattr(fileobj, "name"):
789 self.name = fileobj.name
790 if hasattr(fileobj, "mode"):
791 self.mode = fileobj.mode
792 self._extfileobj = True
793 self.fileobj = fileobj
794
795 # Init datastructures
796 self.closed = False
797 self.members = [] # list of members as TarInfo objects
798 self.membernames = [] # names of members
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000799 self._loaded = False # flag if all members have been read
800 self.offset = 0L # current position in the archive file
801 self.inodes = {} # dictionary caching the inodes of
802 # archive members already added
803
804 if self._mode == "r":
805 self.firstmember = None
806 self.firstmember = self.next()
807
808 if self._mode == "a":
809 # Move to the end of the archive,
810 # before the first empty block.
811 self.firstmember = None
812 while True:
813 try:
814 tarinfo = self.next()
815 except ReadError:
816 self.fileobj.seek(0)
817 break
818 if tarinfo is None:
819 self.fileobj.seek(- BLOCKSIZE, 1)
820 break
821
822 if self._mode in "aw":
823 self._loaded = True
824
825 #--------------------------------------------------------------------------
826 # Below are the classmethods which act as alternate constructors to the
827 # TarFile class. The open() method is the only one that is needed for
828 # public use; it is the "super"-constructor and is able to select an
829 # adequate "sub"-constructor for a particular compression using the mapping
830 # from OPEN_METH.
831 #
832 # This concept allows one to subclass TarFile without losing the comfort of
833 # the super-constructor. A sub-constructor is registered and made available
834 # by adding it to the mapping in OPEN_METH.
835
836 def open(cls, name=None, mode="r", fileobj=None, bufsize=20*512):
837 """Open a tar archive for reading, writing or appending. Return
838 an appropriate TarFile class.
839
840 mode:
841 'r' open for reading with transparent compression
842 'r:' open for reading exclusively uncompressed
843 'r:gz' open for reading with gzip compression
844 'r:bz2' open for reading with bzip2 compression
845 'a' or 'a:' open for appending
846 'w' or 'w:' open for writing without compression
847 'w:gz' open for writing with gzip compression
848 'w:bz2' open for writing with bzip2 compression
849 'r|' open an uncompressed stream of tar blocks for reading
850 'r|gz' open a gzip compressed stream of tar blocks
851 'r|bz2' open a bzip2 compressed stream of tar blocks
852 'w|' open an uncompressed stream for writing
853 'w|gz' open a gzip compressed stream for writing
854 'w|bz2' open a bzip2 compressed stream for writing
855 """
856
857 if not name and not fileobj:
858 raise ValueError, "nothing to open"
859
860 if ":" in mode:
861 filemode, comptype = mode.split(":", 1)
862 filemode = filemode or "r"
863 comptype = comptype or "tar"
864
865 # Select the *open() function according to
866 # given compression.
867 if comptype in cls.OPEN_METH:
868 func = getattr(cls, cls.OPEN_METH[comptype])
869 else:
870 raise CompressionError, "unknown compression type %r" % comptype
871 return func(name, filemode, fileobj)
872
873 elif "|" in mode:
874 filemode, comptype = mode.split("|", 1)
875 filemode = filemode or "r"
876 comptype = comptype or "tar"
877
878 if filemode not in "rw":
879 raise ValueError, "mode must be 'r' or 'w'"
880
881 t = cls(name, filemode,
882 _Stream(name, filemode, comptype, fileobj, bufsize))
883 t._extfileobj = False
884 return t
885
886 elif mode == "r":
887 # Find out which *open() is appropriate for opening the file.
888 for comptype in cls.OPEN_METH:
889 func = getattr(cls, cls.OPEN_METH[comptype])
890 try:
891 return func(name, "r", fileobj)
892 except (ReadError, CompressionError):
893 continue
894 raise ReadError, "file could not be opened successfully"
895
896 elif mode in "aw":
897 return cls.taropen(name, mode, fileobj)
898
899 raise ValueError, "undiscernible mode"
900
901 open = classmethod(open)
902
903 def taropen(cls, name, mode="r", fileobj=None):
904 """Open uncompressed tar archive name for reading or writing.
905 """
906 if len(mode) > 1 or mode not in "raw":
907 raise ValueError, "mode must be 'r', 'a' or 'w'"
908 return cls(name, mode, fileobj)
909
910 taropen = classmethod(taropen)
911
912 def gzopen(cls, name, mode="r", fileobj=None, compresslevel=9):
913 """Open gzip compressed tar archive name for reading or writing.
914 Appending is not allowed.
915 """
916 if len(mode) > 1 or mode not in "rw":
917 raise ValueError, "mode must be 'r' or 'w'"
918
919 try:
920 import gzip
Neal Norwitz4ec68242003-04-11 03:05:56 +0000921 gzip.GzipFile
922 except (ImportError, AttributeError):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000923 raise CompressionError, "gzip module is not available"
924
925 pre, ext = os.path.splitext(name)
926 pre = os.path.basename(pre)
927 if ext == ".tgz":
928 ext = ".tar"
929 if ext == ".gz":
930 ext = ""
931 tarname = pre + ext
932
933 if fileobj is None:
934 fileobj = file(name, mode + "b")
935
936 if mode != "r":
937 name = tarname
938
939 try:
940 t = cls.taropen(tarname, mode,
941 gzip.GzipFile(name, mode, compresslevel, fileobj)
942 )
943 except IOError:
944 raise ReadError, "not a gzip file"
945 t._extfileobj = False
946 return t
947
948 gzopen = classmethod(gzopen)
949
950 def bz2open(cls, name, mode="r", fileobj=None, compresslevel=9):
951 """Open bzip2 compressed tar archive name for reading or writing.
952 Appending is not allowed.
953 """
954 if len(mode) > 1 or mode not in "rw":
955 raise ValueError, "mode must be 'r' or 'w'."
956
957 try:
958 import bz2
959 except ImportError:
960 raise CompressionError, "bz2 module is not available"
961
962 pre, ext = os.path.splitext(name)
963 pre = os.path.basename(pre)
964 if ext == ".tbz2":
965 ext = ".tar"
966 if ext == ".bz2":
967 ext = ""
968 tarname = pre + ext
969
970 if fileobj is not None:
971 raise ValueError, "no support for external file objects"
972
973 try:
974 t = cls.taropen(tarname, mode, bz2.BZ2File(name, mode, compresslevel=compresslevel))
975 except IOError:
976 raise ReadError, "not a bzip2 file"
977 t._extfileobj = False
978 return t
979
980 bz2open = classmethod(bz2open)
981
982 # All *open() methods are registered here.
983 OPEN_METH = {
984 "tar": "taropen", # uncompressed tar
985 "gz": "gzopen", # gzip compressed tar
986 "bz2": "bz2open" # bzip2 compressed tar
987 }
988
989 #--------------------------------------------------------------------------
990 # The public methods which TarFile provides:
991
992 def close(self):
993 """Close the TarFile. In write-mode, two finishing zero blocks are
994 appended to the archive.
995 """
996 if self.closed:
997 return
998
999 if self._mode in "aw":
1000 self.fileobj.write(NUL * (BLOCKSIZE * 2))
1001 self.offset += (BLOCKSIZE * 2)
1002 # fill up the end with zero-blocks
1003 # (like option -b20 for tar does)
1004 blocks, remainder = divmod(self.offset, RECORDSIZE)
1005 if remainder > 0:
1006 self.fileobj.write(NUL * (RECORDSIZE - remainder))
1007
1008 if not self._extfileobj:
1009 self.fileobj.close()
1010 self.closed = True
1011
1012 def getmember(self, name):
1013 """Return a TarInfo object for member `name'. If `name' can not be
1014 found in the archive, KeyError is raised. If a member occurs more
1015 than once in the archive, its last occurence is assumed to be the
1016 most up-to-date version.
1017 """
1018 self._check()
1019 if name not in self.membernames and not self._loaded:
1020 self._load()
1021 if name not in self.membernames:
1022 raise KeyError, "filename %r not found" % name
1023 return self._getmember(name)
1024
1025 def getmembers(self):
1026 """Return the members of the archive as a list of TarInfo objects. The
1027 list has the same order as the members in the archive.
1028 """
1029 self._check()
1030 if not self._loaded: # if we want to obtain a list of
1031 self._load() # all members, we first have to
1032 # scan the whole archive.
1033 return self.members
1034
1035 def getnames(self):
1036 """Return the members of the archive as a list of their names. It has
1037 the same order as the list returned by getmembers().
1038 """
1039 self._check()
1040 if not self._loaded:
1041 self._load()
1042 return self.membernames
1043
1044 def gettarinfo(self, name=None, arcname=None, fileobj=None):
1045 """Create a TarInfo object for either the file `name' or the file
1046 object `fileobj' (using os.fstat on its file descriptor). You can
1047 modify some of the TarInfo's attributes before you add it using
1048 addfile(). If given, `arcname' specifies an alternative name for the
1049 file in the archive.
1050 """
1051 self._check("aw")
1052
1053 # When fileobj is given, replace name by
1054 # fileobj's real name.
1055 if fileobj is not None:
1056 name = fileobj.name
1057
1058 # Building the name of the member in the archive.
1059 # Backward slashes are converted to forward slashes,
1060 # Absolute paths are turned to relative paths.
1061 if arcname is None:
1062 arcname = name
1063 arcname = normpath(arcname)
1064 drv, arcname = os.path.splitdrive(arcname)
1065 while arcname[0:1] == "/":
1066 arcname = arcname[1:]
1067
1068 # Now, fill the TarInfo object with
1069 # information specific for the file.
1070 tarinfo = TarInfo()
1071
1072 # Use os.stat or os.lstat, depending on platform
1073 # and if symlinks shall be resolved.
1074 if fileobj is None:
1075 if hasattr(os, "lstat") and not self.dereference:
1076 statres = os.lstat(name)
1077 else:
1078 statres = os.stat(name)
1079 else:
1080 statres = os.fstat(fileobj.fileno())
1081 linkname = ""
1082
1083 stmd = statres.st_mode
1084 if stat.S_ISREG(stmd):
1085 inode = (statres.st_ino, statres.st_dev)
1086 if inode in self.inodes and not self.dereference:
1087 # Is it a hardlink to an already
1088 # archived file?
1089 type = LNKTYPE
1090 linkname = self.inodes[inode]
1091 else:
1092 # The inode is added only if its valid.
1093 # For win32 it is always 0.
1094 type = REGTYPE
1095 if inode[0]:
1096 self.inodes[inode] = arcname
1097 elif stat.S_ISDIR(stmd):
1098 type = DIRTYPE
1099 if arcname[-1:] != "/":
1100 arcname += "/"
1101 elif stat.S_ISFIFO(stmd):
1102 type = FIFOTYPE
1103 elif stat.S_ISLNK(stmd):
1104 type = SYMTYPE
1105 linkname = os.readlink(name)
1106 elif stat.S_ISCHR(stmd):
1107 type = CHRTYPE
1108 elif stat.S_ISBLK(stmd):
1109 type = BLKTYPE
1110 else:
1111 return None
1112
1113 # Fill the TarInfo object with all
1114 # information we can get.
1115 tarinfo.name = arcname
1116 tarinfo.mode = stmd
1117 tarinfo.uid = statres.st_uid
1118 tarinfo.gid = statres.st_gid
1119 tarinfo.size = statres.st_size
1120 tarinfo.mtime = statres.st_mtime
1121 tarinfo.type = type
1122 tarinfo.linkname = linkname
1123 if pwd:
1124 try:
1125 tarinfo.uname = pwd.getpwuid(tarinfo.uid)[0]
1126 except KeyError:
1127 pass
1128 if grp:
1129 try:
1130 tarinfo.gname = grp.getgrgid(tarinfo.gid)[0]
1131 except KeyError:
1132 pass
1133
1134 if type in (CHRTYPE, BLKTYPE):
1135 if hasattr(os, "major") and hasattr(os, "minor"):
1136 tarinfo.devmajor = os.major(statres.st_rdev)
1137 tarinfo.devminor = os.minor(statres.st_rdev)
1138 return tarinfo
1139
1140 def list(self, verbose=True):
1141 """Print a table of contents to sys.stdout. If `verbose' is False, only
1142 the names of the members are printed. If it is True, an `ls -l'-like
1143 output is produced.
1144 """
1145 self._check()
1146
1147 for tarinfo in self:
1148 if verbose:
1149 print filemode(tarinfo.mode),
1150 print "%s/%s" % (tarinfo.uname or tarinfo.uid,
1151 tarinfo.gname or tarinfo.gid),
1152 if tarinfo.ischr() or tarinfo.isblk():
1153 print "%10s" % ("%d,%d" \
1154 % (tarinfo.devmajor, tarinfo.devminor)),
1155 else:
1156 print "%10d" % tarinfo.size,
1157 print "%d-%02d-%02d %02d:%02d:%02d" \
1158 % time.localtime(tarinfo.mtime)[:6],
1159
1160 print tarinfo.name,
1161
1162 if verbose:
1163 if tarinfo.issym():
1164 print "->", tarinfo.linkname,
1165 if tarinfo.islnk():
1166 print "link to", tarinfo.linkname,
1167 print
1168
1169 def add(self, name, arcname=None, recursive=True):
1170 """Add the file `name' to the archive. `name' may be any type of file
1171 (directory, fifo, symbolic link, etc.). If given, `arcname'
1172 specifies an alternative name for the file in the archive.
1173 Directories are added recursively by default. This can be avoided by
1174 setting `recursive' to False.
1175 """
1176 self._check("aw")
1177
1178 if arcname is None:
1179 arcname = name
1180
1181 # Skip if somebody tries to archive the archive...
1182 if self.name is not None \
1183 and os.path.abspath(name) == os.path.abspath(self.name):
1184 self._dbg(2, "tarfile: Skipped %r" % name)
1185 return
1186
1187 # Special case: The user wants to add the current
1188 # working directory.
1189 if name == ".":
1190 if recursive:
1191 if arcname == ".":
1192 arcname = ""
1193 for f in os.listdir("."):
1194 self.add(f, os.path.join(arcname, f))
1195 return
1196
1197 self._dbg(1, name)
1198
1199 # Create a TarInfo object from the file.
1200 tarinfo = self.gettarinfo(name, arcname)
1201
1202 if tarinfo is None:
1203 self._dbg(1, "tarfile: Unsupported type %r" % name)
1204 return
1205
1206 # Append the tar header and data to the archive.
1207 if tarinfo.isreg():
1208 f = file(name, "rb")
1209 self.addfile(tarinfo, f)
1210 f.close()
1211
1212 if tarinfo.type in (LNKTYPE, SYMTYPE, FIFOTYPE, CHRTYPE, BLKTYPE):
1213 tarinfo.size = 0L
1214 self.addfile(tarinfo)
1215
1216 if tarinfo.isdir():
1217 self.addfile(tarinfo)
1218 if recursive:
1219 for f in os.listdir(name):
1220 self.add(os.path.join(name, f), os.path.join(arcname, f))
1221
1222 def addfile(self, tarinfo, fileobj=None):
1223 """Add the TarInfo object `tarinfo' to the archive. If `fileobj' is
1224 given, tarinfo.size bytes are read from it and added to the archive.
1225 You can create TarInfo objects using gettarinfo().
1226 On Windows platforms, `fileobj' should always be opened with mode
1227 'rb' to avoid irritation about the file size.
1228 """
1229 self._check("aw")
1230
1231 tarinfo.name = normpath(tarinfo.name)
1232 if tarinfo.isdir():
1233 # directories should end with '/'
1234 tarinfo.name += "/"
1235
1236 if tarinfo.linkname:
1237 tarinfo.linkname = normpath(tarinfo.linkname)
1238
1239 if tarinfo.size > MAXSIZE_MEMBER:
1240 raise ValueError, "file is too large (>8GB)"
1241
1242 if len(tarinfo.linkname) > LENGTH_LINK:
1243 if self.posix:
1244 raise ValueError, "linkname is too long (>%d)" \
1245 % (LENGTH_LINK)
1246 else:
1247 self._create_gnulong(tarinfo.linkname, GNUTYPE_LONGLINK)
1248 tarinfo.linkname = tarinfo.linkname[:LENGTH_LINK -1]
1249 self._dbg(2, "tarfile: Created GNU tar extension LONGLINK")
1250
1251 if len(tarinfo.name) > LENGTH_NAME:
1252 if self.posix:
1253 prefix = tarinfo.name[:LENGTH_PREFIX + 1]
1254 while prefix and prefix[-1] != "/":
Tim Peters2c60f7a2003-01-29 03:49:43 +00001255 prefix = prefix[:-1]
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001256
1257 name = tarinfo.name[len(prefix):]
1258 prefix = prefix[:-1]
1259
1260 if not prefix or len(name) > LENGTH_NAME:
1261 raise ValueError, "name is too long (>%d)" \
1262 % (LENGTH_NAME)
1263
1264 tarinfo.name = name
1265 tarinfo.prefix = prefix
1266 else:
1267 self._create_gnulong(tarinfo.name, GNUTYPE_LONGNAME)
1268 tarinfo.name = tarinfo.name[:LENGTH_NAME - 1]
1269 self._dbg(2, "tarfile: Created GNU tar extension LONGNAME")
1270
1271 self.fileobj.write(tarinfo.tobuf())
1272 self.offset += BLOCKSIZE
1273
1274 # If there's data to follow, append it.
1275 if fileobj is not None:
1276 copyfileobj(fileobj, self.fileobj, tarinfo.size)
1277 blocks, remainder = divmod(tarinfo.size, BLOCKSIZE)
1278 if remainder > 0:
1279 self.fileobj.write(NUL * (BLOCKSIZE - remainder))
1280 blocks += 1
1281 self.offset += blocks * BLOCKSIZE
1282
Andrew M. Kuchling864bba12004-07-10 22:02:11 +00001283 self._record_member(tarinfo)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001284
1285 def extract(self, member, path=""):
1286 """Extract a member from the archive to the current working directory,
1287 using its full name. Its file information is extracted as accurately
1288 as possible. `member' may be a filename or a TarInfo object. You can
1289 specify a different directory using `path'.
1290 """
1291 self._check("r")
1292
1293 if isinstance(member, TarInfo):
1294 tarinfo = member
1295 else:
1296 tarinfo = self.getmember(member)
1297
1298 try:
1299 self._extract_member(tarinfo, os.path.join(path, tarinfo.name))
1300 except EnvironmentError, e:
1301 if self.errorlevel > 0:
1302 raise
1303 else:
1304 if e.filename is None:
1305 self._dbg(1, "tarfile: %s" % e.strerror)
1306 else:
1307 self._dbg(1, "tarfile: %s %r" % (e.strerror, e.filename))
1308 except ExtractError, e:
1309 if self.errorlevel > 1:
1310 raise
1311 else:
1312 self._dbg(1, "tarfile: %s" % e)
1313
1314 def extractfile(self, member):
1315 """Extract a member from the archive as a file object. `member' may be
1316 a filename or a TarInfo object. If `member' is a regular file, a
1317 file-like object is returned. If `member' is a link, a file-like
1318 object is constructed from the link's target. If `member' is none of
1319 the above, None is returned.
1320 The file-like object is read-only and provides the following
1321 methods: read(), readline(), readlines(), seek() and tell()
1322 """
1323 self._check("r")
1324
1325 if isinstance(member, TarInfo):
1326 tarinfo = member
1327 else:
1328 tarinfo = self.getmember(member)
1329
1330 if tarinfo.isreg():
1331 return self.fileobject(self, tarinfo)
1332
1333 elif tarinfo.type not in SUPPORTED_TYPES:
1334 # If a member's type is unknown, it is treated as a
1335 # regular file.
1336 return self.fileobject(self, tarinfo)
1337
1338 elif tarinfo.islnk() or tarinfo.issym():
1339 if isinstance(self.fileobj, _Stream):
1340 # A small but ugly workaround for the case that someone tries
1341 # to extract a (sym)link as a file-object from a non-seekable
1342 # stream of tar blocks.
1343 raise StreamError, "cannot extract (sym)link as file object"
1344 else:
1345 # A (sym)link's file object is it's target's file object.
1346 return self.extractfile(self._getmember(tarinfo.linkname,
1347 tarinfo))
1348 else:
1349 # If there's no data associated with the member (directory, chrdev,
1350 # blkdev, etc.), return None instead of a file object.
1351 return None
1352
1353 def _extract_member(self, tarinfo, targetpath):
1354 """Extract the TarInfo object tarinfo to a physical
1355 file called targetpath.
1356 """
1357 # Fetch the TarInfo object for the given name
1358 # and build the destination pathname, replacing
1359 # forward slashes to platform specific separators.
1360 if targetpath[-1:] == "/":
1361 targetpath = targetpath[:-1]
1362 targetpath = os.path.normpath(targetpath)
1363
1364 # Create all upper directories.
1365 upperdirs = os.path.dirname(targetpath)
1366 if upperdirs and not os.path.exists(upperdirs):
1367 ti = TarInfo()
1368 ti.name = upperdirs
1369 ti.type = DIRTYPE
1370 ti.mode = 0777
1371 ti.mtime = tarinfo.mtime
1372 ti.uid = tarinfo.uid
1373 ti.gid = tarinfo.gid
1374 ti.uname = tarinfo.uname
1375 ti.gname = tarinfo.gname
1376 try:
1377 self._extract_member(ti, ti.name)
1378 except:
1379 pass
1380
1381 if tarinfo.islnk() or tarinfo.issym():
1382 self._dbg(1, "%s -> %s" % (tarinfo.name, tarinfo.linkname))
1383 else:
1384 self._dbg(1, tarinfo.name)
1385
1386 if tarinfo.isreg():
1387 self.makefile(tarinfo, targetpath)
1388 elif tarinfo.isdir():
1389 self.makedir(tarinfo, targetpath)
1390 elif tarinfo.isfifo():
1391 self.makefifo(tarinfo, targetpath)
1392 elif tarinfo.ischr() or tarinfo.isblk():
1393 self.makedev(tarinfo, targetpath)
1394 elif tarinfo.islnk() or tarinfo.issym():
1395 self.makelink(tarinfo, targetpath)
1396 elif tarinfo.type not in SUPPORTED_TYPES:
1397 self.makeunknown(tarinfo, targetpath)
1398 else:
1399 self.makefile(tarinfo, targetpath)
1400
1401 self.chown(tarinfo, targetpath)
1402 if not tarinfo.issym():
1403 self.chmod(tarinfo, targetpath)
1404 self.utime(tarinfo, targetpath)
1405
1406 #--------------------------------------------------------------------------
1407 # Below are the different file methods. They are called via
1408 # _extract_member() when extract() is called. They can be replaced in a
1409 # subclass to implement other functionality.
1410
1411 def makedir(self, tarinfo, targetpath):
1412 """Make a directory called targetpath.
1413 """
1414 try:
1415 os.mkdir(targetpath)
1416 except EnvironmentError, e:
1417 if e.errno != errno.EEXIST:
1418 raise
1419
1420 def makefile(self, tarinfo, targetpath):
1421 """Make a file called targetpath.
1422 """
1423 source = self.extractfile(tarinfo)
1424 target = file(targetpath, "wb")
1425 copyfileobj(source, target)
1426 source.close()
1427 target.close()
1428
1429 def makeunknown(self, tarinfo, targetpath):
1430 """Make a file from a TarInfo object with an unknown type
1431 at targetpath.
1432 """
1433 self.makefile(tarinfo, targetpath)
1434 self._dbg(1, "tarfile: Unknown file type %r, " \
1435 "extracted as regular file." % tarinfo.type)
1436
1437 def makefifo(self, tarinfo, targetpath):
1438 """Make a fifo called targetpath.
1439 """
1440 if hasattr(os, "mkfifo"):
1441 os.mkfifo(targetpath)
1442 else:
1443 raise ExtractError, "fifo not supported by system"
1444
1445 def makedev(self, tarinfo, targetpath):
1446 """Make a character or block device called targetpath.
1447 """
1448 if not hasattr(os, "mknod") or not hasattr(os, "makedev"):
1449 raise ExtractError, "special devices not supported by system"
1450
1451 mode = tarinfo.mode
1452 if tarinfo.isblk():
1453 mode |= stat.S_IFBLK
1454 else:
1455 mode |= stat.S_IFCHR
1456
1457 os.mknod(targetpath, mode,
1458 os.makedev(tarinfo.devmajor, tarinfo.devminor))
1459
1460 def makelink(self, tarinfo, targetpath):
1461 """Make a (symbolic) link called targetpath. If it cannot be created
1462 (platform limitation), we try to make a copy of the referenced file
1463 instead of a link.
1464 """
1465 linkpath = tarinfo.linkname
1466 try:
1467 if tarinfo.issym():
1468 os.symlink(linkpath, targetpath)
1469 else:
1470 os.link(linkpath, targetpath)
1471 except AttributeError:
1472 if tarinfo.issym():
1473 linkpath = os.path.join(os.path.dirname(tarinfo.name),
1474 linkpath)
1475 linkpath = normpath(linkpath)
1476
1477 try:
1478 self._extract_member(self.getmember(linkpath), targetpath)
1479 except (EnvironmentError, KeyError), e:
1480 linkpath = os.path.normpath(linkpath)
1481 try:
1482 shutil.copy2(linkpath, targetpath)
1483 except EnvironmentError, e:
1484 raise IOError, "link could not be created"
1485
1486 def chown(self, tarinfo, targetpath):
1487 """Set owner of targetpath according to tarinfo.
1488 """
1489 if pwd and hasattr(os, "geteuid") and os.geteuid() == 0:
1490 # We have to be root to do so.
1491 try:
1492 g = grp.getgrnam(tarinfo.gname)[2]
1493 except KeyError:
1494 try:
1495 g = grp.getgrgid(tarinfo.gid)[2]
1496 except KeyError:
1497 g = os.getgid()
1498 try:
1499 u = pwd.getpwnam(tarinfo.uname)[2]
1500 except KeyError:
1501 try:
1502 u = pwd.getpwuid(tarinfo.uid)[2]
1503 except KeyError:
1504 u = os.getuid()
1505 try:
1506 if tarinfo.issym() and hasattr(os, "lchown"):
1507 os.lchown(targetpath, u, g)
1508 else:
Andrew MacIntyre7970d202003-02-19 12:51:34 +00001509 if sys.platform != "os2emx":
1510 os.chown(targetpath, u, g)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001511 except EnvironmentError, e:
1512 raise ExtractError, "could not change owner"
1513
1514 def chmod(self, tarinfo, targetpath):
1515 """Set file permissions of targetpath according to tarinfo.
1516 """
Jack Jansen834eff62003-03-07 12:47:06 +00001517 if hasattr(os, 'chmod'):
1518 try:
1519 os.chmod(targetpath, tarinfo.mode)
1520 except EnvironmentError, e:
1521 raise ExtractError, "could not change mode"
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001522
1523 def utime(self, tarinfo, targetpath):
1524 """Set modification time of targetpath according to tarinfo.
1525 """
Jack Jansen834eff62003-03-07 12:47:06 +00001526 if not hasattr(os, 'utime'):
Tim Petersf9347782003-03-07 15:36:41 +00001527 return
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001528 if sys.platform == "win32" and tarinfo.isdir():
1529 # According to msdn.microsoft.com, it is an error (EACCES)
1530 # to use utime() on directories.
1531 return
1532 try:
1533 os.utime(targetpath, (tarinfo.mtime, tarinfo.mtime))
1534 except EnvironmentError, e:
1535 raise ExtractError, "could not change modification time"
1536
1537 #--------------------------------------------------------------------------
1538
1539 def next(self):
1540 """Return the next member of the archive as a TarInfo object, when
1541 TarFile is opened for reading. Return None if there is no more
1542 available.
1543 """
1544 self._check("ra")
1545 if self.firstmember is not None:
1546 m = self.firstmember
1547 self.firstmember = None
1548 return m
1549
1550 # Read the next block.
Andrew M. Kuchling864bba12004-07-10 22:02:11 +00001551 self.fileobj.seek(self.offset)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001552 while True:
1553 buf = self.fileobj.read(BLOCKSIZE)
1554 if not buf:
1555 return None
1556 try:
1557 tarinfo = TarInfo.frombuf(buf)
1558 except ValueError:
1559 if self.ignore_zeros:
1560 if buf.count(NUL) == BLOCKSIZE:
1561 adj = "empty"
1562 else:
1563 adj = "invalid"
1564 self._dbg(2, "0x%X: %s block" % (self.offset, adj))
1565 self.offset += BLOCKSIZE
1566 continue
1567 else:
1568 # Block is empty or unreadable.
Andrew M. Kuchling864bba12004-07-10 22:02:11 +00001569 if self.offset == 0:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001570 # If the first block is invalid. That does not
1571 # look like a tar archive we can handle.
1572 raise ReadError,"empty, unreadable or compressed file"
1573 return None
1574 break
1575
1576 # We shouldn't rely on this checksum, because some tar programs
1577 # calculate it differently and it is merely validating the
1578 # header block. We could just as well skip this part, which would
1579 # have a slight effect on performance...
1580 if tarinfo.chksum != calc_chksum(buf):
1581 self._dbg(1, "tarfile: Bad Checksum %r" % tarinfo.name)
1582
1583 # Set the TarInfo object's offset to the current position of the
1584 # TarFile and set self.offset to the position where the data blocks
1585 # should begin.
1586 tarinfo.offset = self.offset
1587 self.offset += BLOCKSIZE
1588
1589 # Check if the TarInfo object has a typeflag for which a callback
1590 # method is registered in the TYPE_METH. If so, then call it.
1591 if tarinfo.type in self.TYPE_METH:
Andrew M. Kuchling864bba12004-07-10 22:02:11 +00001592 return self.TYPE_METH[tarinfo.type](self, tarinfo)
1593
1594 tarinfo.offset_data = self.offset
1595 if tarinfo.isreg() or tarinfo.type not in SUPPORTED_TYPES:
1596 # Skip the following data blocks.
1597 self.offset += self._block(tarinfo.size)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001598
1599 if tarinfo.isreg() and tarinfo.name[:-1] == "/":
1600 # some old tar programs don't know DIRTYPE
1601 tarinfo.type = DIRTYPE
1602
Andrew M. Kuchling864bba12004-07-10 22:02:11 +00001603 self._record_member(tarinfo)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001604 return tarinfo
1605
1606 #--------------------------------------------------------------------------
1607 # Below are some methods which are called for special typeflags in the
1608 # next() method, e.g. for unwrapping GNU longname/longlink blocks. They
1609 # are registered in TYPE_METH below. You can register your own methods
1610 # with this mapping.
1611 # A registered method is called with a TarInfo object as only argument.
1612 #
1613 # During its execution the method MUST perform the following tasks:
1614 # 1. set tarinfo.offset_data to the position where the data blocks begin,
1615 # if there is data to follow.
1616 # 2. set self.offset to the position where the next member's header will
1617 # begin.
Andrew M. Kuchling864bba12004-07-10 22:02:11 +00001618 # 3. call self._record_member() if the tarinfo object is supposed to
1619 # appear as a member of the TarFile object.
1620 # 4. return tarinfo or another valid TarInfo object.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001621
1622 def proc_gnulong(self, tarinfo):
1623 """Evaluate the blocks that hold a GNU longname
1624 or longlink member.
1625 """
1626 buf = ""
1627 name = None
1628 linkname = None
1629 count = tarinfo.size
1630 while count > 0:
1631 block = self.fileobj.read(BLOCKSIZE)
1632 buf += block
1633 self.offset += BLOCKSIZE
1634 count -= BLOCKSIZE
1635
Andrew M. Kuchling864bba12004-07-10 22:02:11 +00001636 # Fetch the next header
1637 next = self.next()
1638
1639 next.offset = tarinfo.offset
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001640 if tarinfo.type == GNUTYPE_LONGNAME:
Andrew M. Kuchling864bba12004-07-10 22:02:11 +00001641 next.name = nts(buf)
1642 elif tarinfo.type == GNUTYPE_LONGLINK:
1643 next.linkname = nts(buf)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001644
Andrew M. Kuchling864bba12004-07-10 22:02:11 +00001645 return next
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001646
1647 def proc_sparse(self, tarinfo):
1648 """Analyze a GNU sparse header plus extra headers.
1649 """
1650 buf = tarinfo.tobuf()
1651 sp = _ringbuffer()
1652 pos = 386
1653 lastpos = 0L
1654 realpos = 0L
1655 # There are 4 possible sparse structs in the
1656 # first header.
1657 for i in xrange(4):
1658 try:
1659 offset = int(buf[pos:pos + 12], 8)
1660 numbytes = int(buf[pos + 12:pos + 24], 8)
1661 except ValueError:
1662 break
1663 if offset > lastpos:
1664 sp.append(_hole(lastpos, offset - lastpos))
1665 sp.append(_data(offset, numbytes, realpos))
1666 realpos += numbytes
1667 lastpos = offset + numbytes
1668 pos += 24
1669
1670 isextended = ord(buf[482])
1671 origsize = int(buf[483:495], 8)
1672
1673 # If the isextended flag is given,
1674 # there are extra headers to process.
1675 while isextended == 1:
1676 buf = self.fileobj.read(BLOCKSIZE)
1677 self.offset += BLOCKSIZE
1678 pos = 0
1679 for i in xrange(21):
1680 try:
1681 offset = int(buf[pos:pos + 12], 8)
1682 numbytes = int(buf[pos + 12:pos + 24], 8)
1683 except ValueError:
1684 break
1685 if offset > lastpos:
1686 sp.append(_hole(lastpos, offset - lastpos))
1687 sp.append(_data(offset, numbytes, realpos))
1688 realpos += numbytes
1689 lastpos = offset + numbytes
1690 pos += 24
1691 isextended = ord(buf[504])
1692
1693 if lastpos < origsize:
1694 sp.append(_hole(lastpos, origsize - lastpos))
1695
1696 tarinfo.sparse = sp
1697
1698 tarinfo.offset_data = self.offset
1699 self.offset += self._block(tarinfo.size)
1700 tarinfo.size = origsize
Andrew M. Kuchling864bba12004-07-10 22:02:11 +00001701
1702 self._record_member(tarinfo)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001703 return tarinfo
1704
1705 # The type mapping for the next() method. The keys are single character
1706 # strings, the typeflag. The values are methods which are called when
1707 # next() encounters such a typeflag.
1708 TYPE_METH = {
1709 GNUTYPE_LONGNAME: proc_gnulong,
1710 GNUTYPE_LONGLINK: proc_gnulong,
1711 GNUTYPE_SPARSE: proc_sparse
1712 }
1713
1714 #--------------------------------------------------------------------------
1715 # Little helper methods:
1716
1717 def _block(self, count):
1718 """Round up a byte count by BLOCKSIZE and return it,
1719 e.g. _block(834) => 1024.
1720 """
1721 blocks, remainder = divmod(count, BLOCKSIZE)
1722 if remainder:
1723 blocks += 1
1724 return blocks * BLOCKSIZE
1725
1726 def _getmember(self, name, tarinfo=None):
1727 """Find an archive member by name from bottom to top.
1728 If tarinfo is given, it is used as the starting point.
1729 """
1730 if tarinfo is None:
1731 end = len(self.members)
1732 else:
1733 end = self.members.index(tarinfo)
1734
1735 for i in xrange(end - 1, -1, -1):
1736 if name == self.membernames[i]:
1737 return self.members[i]
1738
Andrew M. Kuchling864bba12004-07-10 22:02:11 +00001739 def _record_member(self, tarinfo):
1740 """Record a tarinfo object in the internal datastructures.
1741 """
1742 self.members.append(tarinfo)
1743 self.membernames.append(tarinfo.name)
1744
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001745 def _load(self):
1746 """Read through the entire archive file and look for readable
1747 members.
1748 """
1749 while True:
1750 tarinfo = self.next()
1751 if tarinfo is None:
1752 break
1753 self._loaded = True
1754
1755 def _check(self, mode=None):
1756 """Check if TarFile is still open, and if the operation's mode
1757 corresponds to TarFile's mode.
1758 """
1759 if self.closed:
1760 raise IOError, "%s is closed" % self.__class__.__name__
1761 if mode is not None and self._mode not in mode:
1762 raise IOError, "bad operation for mode %r" % self._mode
1763
1764 def __iter__(self):
1765 """Provide an iterator object.
1766 """
1767 if self._loaded:
1768 return iter(self.members)
1769 else:
1770 return TarIter(self)
1771
1772 def _create_gnulong(self, name, type):
1773 """Write a GNU longname/longlink member to the TarFile.
1774 It consists of an extended tar header, with the length
1775 of the longname as size, followed by data blocks,
1776 which contain the longname as a null terminated string.
1777 """
1778 tarinfo = TarInfo()
1779 tarinfo.name = "././@LongLink"
1780 tarinfo.type = type
1781 tarinfo.mode = 0
1782 tarinfo.size = len(name)
1783
1784 # write extended header
1785 self.fileobj.write(tarinfo.tobuf())
1786 # write name blocks
1787 self.fileobj.write(name)
1788 blocks, remainder = divmod(tarinfo.size, BLOCKSIZE)
1789 if remainder > 0:
1790 self.fileobj.write(NUL * (BLOCKSIZE - remainder))
1791 blocks += 1
1792 self.offset += blocks * BLOCKSIZE
1793
1794 def _dbg(self, level, msg):
1795 """Write debugging output to sys.stderr.
1796 """
1797 if level <= self.debug:
1798 print >> sys.stderr, msg
1799# class TarFile
1800
1801class TarIter:
1802 """Iterator Class.
1803
1804 for tarinfo in TarFile(...):
1805 suite...
1806 """
1807
1808 def __init__(self, tarfile):
1809 """Construct a TarIter object.
1810 """
1811 self.tarfile = tarfile
1812 def __iter__(self):
1813 """Return iterator object.
1814 """
1815 return self
1816 def next(self):
1817 """Return the next item using TarFile's next() method.
1818 When all members have been read, set TarFile as _loaded.
1819 """
1820 tarinfo = self.tarfile.next()
1821 if not tarinfo:
1822 self.tarfile._loaded = True
1823 raise StopIteration
1824 return tarinfo
1825
1826# Helper classes for sparse file support
1827class _section:
1828 """Base class for _data and _hole.
1829 """
1830 def __init__(self, offset, size):
1831 self.offset = offset
1832 self.size = size
1833 def __contains__(self, offset):
1834 return self.offset <= offset < self.offset + self.size
1835
1836class _data(_section):
1837 """Represent a data section in a sparse file.
1838 """
1839 def __init__(self, offset, size, realpos):
1840 _section.__init__(self, offset, size)
1841 self.realpos = realpos
1842
1843class _hole(_section):
1844 """Represent a hole section in a sparse file.
1845 """
1846 pass
1847
1848class _ringbuffer(list):
1849 """Ringbuffer class which increases performance
1850 over a regular list.
1851 """
1852 def __init__(self):
1853 self.idx = 0
1854 def find(self, offset):
1855 idx = self.idx
1856 while True:
1857 item = self[idx]
1858 if offset in item:
1859 break
1860 idx += 1
1861 if idx == len(self):
1862 idx = 0
1863 if idx == self.idx:
1864 # End of File
1865 return None
1866 self.idx = idx
1867 return item
1868
1869#---------------------------------------------
1870# zipfile compatible TarFile class
1871#---------------------------------------------
1872TAR_PLAIN = 0 # zipfile.ZIP_STORED
1873TAR_GZIPPED = 8 # zipfile.ZIP_DEFLATED
1874class TarFileCompat:
1875 """TarFile class compatible with standard module zipfile's
1876 ZipFile class.
1877 """
1878 def __init__(self, file, mode="r", compression=TAR_PLAIN):
1879 if compression == TAR_PLAIN:
1880 self.tarfile = TarFile.taropen(file, mode)
1881 elif compression == TAR_GZIPPED:
1882 self.tarfile = TarFile.gzopen(file, mode)
1883 else:
1884 raise ValueError, "unknown compression constant"
1885 if mode[0:1] == "r":
1886 members = self.tarfile.getmembers()
1887 for i in xrange(len(members)):
1888 m = members[i]
1889 m.filename = m.name
1890 m.file_size = m.size
1891 m.date_time = time.gmtime(m.mtime)[:6]
1892 def namelist(self):
1893 return map(lambda m: m.name, self.infolist())
1894 def infolist(self):
1895 return filter(lambda m: m.type in REGULAR_TYPES,
1896 self.tarfile.getmembers())
1897 def printdir(self):
1898 self.tarfile.list()
1899 def testzip(self):
1900 return
1901 def getinfo(self, name):
1902 return self.tarfile.getmember(name)
1903 def read(self, name):
1904 return self.tarfile.extractfile(self.tarfile.getmember(name)).read()
1905 def write(self, filename, arcname=None, compress_type=None):
1906 self.tarfile.add(filename, arcname)
1907 def writestr(self, zinfo, bytes):
1908 import StringIO
1909 import calendar
1910 zinfo.name = zinfo.filename
1911 zinfo.size = zinfo.file_size
1912 zinfo.mtime = calendar.timegm(zinfo.date_time)
1913 self.tarfile.addfile(zinfo, StringIO.StringIO(bytes))
1914 def close(self):
1915 self.tarfile.close()
1916#class TarFileCompat
1917
1918#--------------------
1919# exported functions
1920#--------------------
1921def is_tarfile(name):
1922 """Return True if name points to a tar archive that we
1923 are able to handle, else return False.
1924 """
1925 try:
1926 t = open(name)
1927 t.close()
1928 return True
1929 except TarError:
1930 return False
1931
1932open = TarFile.open