blob: 152b23c252c9f82b48581aa7b076e6c1f835fd84 [file] [log] [blame]
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001#!/usr/bin/env python
2# -*- coding: iso-8859-1 -*-
3#-------------------------------------------------------------------
4# tarfile.py
5#-------------------------------------------------------------------
6# Copyright (C) 2002 Lars Gustäbel <lars@gustaebel.de>
7# All rights reserved.
8#
9# Permission is hereby granted, free of charge, to any person
10# obtaining a copy of this software and associated documentation
11# files (the "Software"), to deal in the Software without
12# restriction, including without limitation the rights to use,
13# copy, modify, merge, publish, distribute, sublicense, and/or sell
14# copies of the Software, and to permit persons to whom the
15# Software is furnished to do so, subject to the following
16# conditions:
17#
18# The above copyright notice and this permission notice shall be
19# included in all copies or substantial portions of the Software.
20#
21# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
22# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
23# OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
24# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
25# HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
26# WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
27# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
28# OTHER DEALINGS IN THE SOFTWARE.
29#
30"""Read from and write to tar format archives.
31"""
32
33__version__ = "$Revision$"
34# $Source$
35
36version = "0.6.4"
37__author__ = "Lars Gustäbel (lars@gustaebel.de)"
38__date__ = "$Date$"
39__cvsid__ = "$Id$"
40__credits__ = "Gustavo Niemeyer, Niels Gustäbel, Richard Townsend."
41
42#---------
43# Imports
44#---------
45import sys
46import os
47import shutil
48import stat
49import errno
50import time
51import struct
52
Jack Jansencfc49022003-03-07 13:37:32 +000053if sys.platform == 'mac':
54 # This module needs work for MacOS9, especially in the area of pathname
55 # handling. In many places it is assumed a simple substitution of / by the
56 # local os.path.sep is good enough to convert pathnames, but this does not
57 # work with the mac rooted:path:name versus :nonrooted:path:name syntax
58 raise ImportError, "tarfile does not work for platform==mac"
59
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +000060try:
61 import grp, pwd
62except ImportError:
63 grp = pwd = None
64
65# from tarfile import *
66__all__ = ["TarFile", "TarInfo", "is_tarfile", "TarError"]
67
68#---------------------------------------------------------
69# tar constants
70#---------------------------------------------------------
71NUL = "\0" # the null character
72BLOCKSIZE = 512 # length of processing blocks
73RECORDSIZE = BLOCKSIZE * 20 # length of records
74MAGIC = "ustar" # magic tar string
75VERSION = "00" # version number
76
77LENGTH_NAME = 100 # maximum length of a filename
78LENGTH_LINK = 100 # maximum length of a linkname
79LENGTH_PREFIX = 155 # maximum length of the prefix field
80MAXSIZE_MEMBER = 077777777777L # maximum size of a file (11 octal digits)
81
82REGTYPE = "0" # regular file
83AREGTYPE = "\0" # regular file
84LNKTYPE = "1" # link (inside tarfile)
85SYMTYPE = "2" # symbolic link
86CHRTYPE = "3" # character special device
87BLKTYPE = "4" # block special device
88DIRTYPE = "5" # directory
89FIFOTYPE = "6" # fifo special device
90CONTTYPE = "7" # contiguous file
91
92GNUTYPE_LONGNAME = "L" # GNU tar extension for longnames
93GNUTYPE_LONGLINK = "K" # GNU tar extension for longlink
94GNUTYPE_SPARSE = "S" # GNU tar extension for sparse file
95
96#---------------------------------------------------------
97# tarfile constants
98#---------------------------------------------------------
99SUPPORTED_TYPES = (REGTYPE, AREGTYPE, LNKTYPE, # file types that tarfile
100 SYMTYPE, DIRTYPE, FIFOTYPE, # can cope with.
101 CONTTYPE, CHRTYPE, BLKTYPE,
102 GNUTYPE_LONGNAME, GNUTYPE_LONGLINK,
103 GNUTYPE_SPARSE)
104
105REGULAR_TYPES = (REGTYPE, AREGTYPE, # file types that somehow
106 CONTTYPE, GNUTYPE_SPARSE) # represent regular files
107
108#---------------------------------------------------------
109# Bits used in the mode field, values in octal.
110#---------------------------------------------------------
111S_IFLNK = 0120000 # symbolic link
112S_IFREG = 0100000 # regular file
113S_IFBLK = 0060000 # block device
114S_IFDIR = 0040000 # directory
115S_IFCHR = 0020000 # character device
116S_IFIFO = 0010000 # fifo
117
118TSUID = 04000 # set UID on execution
119TSGID = 02000 # set GID on execution
120TSVTX = 01000 # reserved
121
122TUREAD = 0400 # read by owner
123TUWRITE = 0200 # write by owner
124TUEXEC = 0100 # execute/search by owner
125TGREAD = 0040 # read by group
126TGWRITE = 0020 # write by group
127TGEXEC = 0010 # execute/search by group
128TOREAD = 0004 # read by other
129TOWRITE = 0002 # write by other
130TOEXEC = 0001 # execute/search by other
131
132#---------------------------------------------------------
133# Some useful functions
134#---------------------------------------------------------
135def nts(s):
136 """Convert a null-terminated string buffer to a python string.
137 """
138 return s.split(NUL, 1)[0]
139
140def calc_chksum(buf):
141 """Calculate the checksum for a member's header. It's a simple addition
142 of all bytes, treating the chksum field as if filled with spaces.
143 buf is a 512 byte long string buffer which holds the header.
144 """
145 chk = 256 # chksum field is treated as blanks,
146 # so the initial value is 8 * ord(" ")
147 for c in buf[:148]: chk += ord(c) # sum up all bytes before chksum
148 for c in buf[156:]: chk += ord(c) # sum up all bytes after chksum
149 return chk
150
151def copyfileobj(src, dst, length=None):
152 """Copy length bytes from fileobj src to fileobj dst.
153 If length is None, copy the entire content.
154 """
155 if length == 0:
156 return
157 if length is None:
158 shutil.copyfileobj(src, dst)
159 return
160
161 BUFSIZE = 16 * 1024
162 blocks, remainder = divmod(length, BUFSIZE)
163 for b in xrange(blocks):
164 buf = src.read(BUFSIZE)
165 if len(buf) < BUFSIZE:
166 raise IOError, "end of file reached"
167 dst.write(buf)
168
169 if remainder != 0:
170 buf = src.read(remainder)
171 if len(buf) < remainder:
172 raise IOError, "end of file reached"
173 dst.write(buf)
174 return
175
176filemode_table = (
177 (S_IFLNK, "l",
178 S_IFREG, "-",
179 S_IFBLK, "b",
180 S_IFDIR, "d",
181 S_IFCHR, "c",
182 S_IFIFO, "p"),
183 (TUREAD, "r"),
184 (TUWRITE, "w"),
185 (TUEXEC, "x", TSUID, "S", TUEXEC|TSUID, "s"),
186 (TGREAD, "r"),
187 (TGWRITE, "w"),
188 (TGEXEC, "x", TSGID, "S", TGEXEC|TSGID, "s"),
189 (TOREAD, "r"),
190 (TOWRITE, "w"),
191 (TOEXEC, "x", TSVTX, "T", TOEXEC|TSVTX, "t"))
192
193def filemode(mode):
194 """Convert a file's mode to a string of the form
195 -rwxrwxrwx.
196 Used by TarFile.list()
197 """
198 s = ""
199 for t in filemode_table:
200 while True:
201 if mode & t[0] == t[0]:
202 s += t[1]
203 elif len(t) > 2:
204 t = t[2:]
205 continue
206 else:
207 s += "-"
208 break
209 return s
210
211if os.sep != "/":
212 normpath = lambda path: os.path.normpath(path).replace(os.sep, "/")
213else:
214 normpath = os.path.normpath
215
216class TarError(Exception):
217 """Base exception."""
218 pass
219class ExtractError(TarError):
220 """General exception for extract errors."""
221 pass
222class ReadError(TarError):
223 """Exception for unreadble tar archives."""
224 pass
225class CompressionError(TarError):
226 """Exception for unavailable compression methods."""
227 pass
228class StreamError(TarError):
229 """Exception for unsupported operations on stream-like TarFiles."""
230 pass
231
232#---------------------------
233# internal stream interface
234#---------------------------
235class _LowLevelFile:
236 """Low-level file object. Supports reading and writing.
237 It is used instead of a regular file object for streaming
238 access.
239 """
240
241 def __init__(self, name, mode):
242 mode = {
243 "r": os.O_RDONLY,
244 "w": os.O_WRONLY | os.O_CREAT | os.O_TRUNC,
245 }[mode]
246 if hasattr(os, "O_BINARY"):
247 mode |= os.O_BINARY
248 self.fd = os.open(name, mode)
249
250 def close(self):
251 os.close(self.fd)
252
253 def read(self, size):
254 return os.read(self.fd, size)
255
256 def write(self, s):
257 os.write(self.fd, s)
258
259class _Stream:
260 """Class that serves as an adapter between TarFile and
261 a stream-like object. The stream-like object only
262 needs to have a read() or write() method and is accessed
263 blockwise. Use of gzip or bzip2 compression is possible.
264 A stream-like object could be for example: sys.stdin,
265 sys.stdout, a socket, a tape device etc.
266
267 _Stream is intended to be used only internally.
268 """
269
270 def __init__(self, name, mode, type, fileobj, bufsize):
271 """Construct a _Stream object.
272 """
273 self._extfileobj = True
274 if fileobj is None:
275 fileobj = _LowLevelFile(name, mode)
276 self._extfileobj = False
277
278 self.name = name or ""
279 self.mode = mode
280 self.type = type
281 self.fileobj = fileobj
282 self.bufsize = bufsize
283 self.buf = ""
284 self.pos = 0L
285 self.closed = False
286
287 if type == "gz":
288 try:
289 import zlib
290 except ImportError:
291 raise CompressionError, "zlib module is not available"
292 self.zlib = zlib
293 self.crc = zlib.crc32("")
294 if mode == "r":
295 self._init_read_gz()
296 else:
297 self._init_write_gz()
298
299 if type == "bz2":
300 try:
301 import bz2
302 except ImportError:
303 raise CompressionError, "bz2 module is not available"
304 if mode == "r":
305 self.dbuf = ""
306 self.cmp = bz2.BZ2Decompressor()
307 else:
308 self.cmp = bz2.BZ2Compressor()
309
310 def __del__(self):
311 if not self.closed:
312 self.close()
313
314 def _init_write_gz(self):
315 """Initialize for writing with gzip compression.
316 """
317 self.cmp = self.zlib.compressobj(9, self.zlib.DEFLATED,
318 -self.zlib.MAX_WBITS,
319 self.zlib.DEF_MEM_LEVEL,
320 0)
321 timestamp = struct.pack("<L", long(time.time()))
322 self.__write("\037\213\010\010%s\002\377" % timestamp)
323 if self.name.endswith(".gz"):
324 self.name = self.name[:-3]
325 self.__write(self.name + NUL)
326
327 def write(self, s):
328 """Write string s to the stream.
329 """
330 if self.type == "gz":
331 self.crc = self.zlib.crc32(s, self.crc)
332 self.pos += len(s)
333 if self.type != "tar":
334 s = self.cmp.compress(s)
335 self.__write(s)
336
337 def __write(self, s):
338 """Write string s to the stream if a whole new block
339 is ready to be written.
340 """
341 self.buf += s
342 while len(self.buf) > self.bufsize:
343 self.fileobj.write(self.buf[:self.bufsize])
344 self.buf = self.buf[self.bufsize:]
345
346 def close(self):
347 """Close the _Stream object. No operation should be
348 done on it afterwards.
349 """
350 if self.closed:
351 return
352
353 if self.mode == "w" and self.buf:
354 if self.type != "tar":
355 self.buf += self.cmp.flush()
356 self.fileobj.write(self.buf)
357 self.buf = ""
358 if self.type == "gz":
359 self.fileobj.write(struct.pack("<l", self.crc))
360 self.fileobj.write(struct.pack("<L", self.pos))
361
362 if not self._extfileobj:
363 self.fileobj.close()
364
365 self.closed = True
366
367 def _init_read_gz(self):
368 """Initialize for reading a gzip compressed fileobj.
369 """
370 self.cmp = self.zlib.decompressobj(-self.zlib.MAX_WBITS)
371 self.dbuf = ""
372
373 # taken from gzip.GzipFile with some alterations
374 if self.__read(2) != "\037\213":
375 raise ReadError, "not a gzip file"
376 if self.__read(1) != "\010":
377 raise CompressionError, "unsupported compression method"
378
379 flag = ord(self.__read(1))
380 self.__read(6)
381
382 if flag & 4:
383 xlen = ord(self.__read(1)) + 256 * ord(self.__read(1))
384 self.read(xlen)
385 if flag & 8:
386 while True:
387 s = self.__read(1)
388 if not s or s == NUL:
389 break
390 if flag & 16:
391 while True:
392 s = self.__read(1)
393 if not s or s == NUL:
394 break
395 if flag & 2:
396 self.__read(2)
397
398 def tell(self):
399 """Return the stream's file pointer position.
400 """
401 return self.pos
402
403 def seek(self, pos=0):
404 """Set the stream's file pointer to pos. Negative seeking
405 is forbidden.
406 """
407 if pos - self.pos >= 0:
408 blocks, remainder = divmod(pos - self.pos, self.bufsize)
409 for i in xrange(blocks):
410 self.read(self.bufsize)
411 self.read(remainder)
412 else:
413 raise StreamError, "seeking backwards is not allowed"
414 return self.pos
415
416 def read(self, size=None):
417 """Return the next size number of bytes from the stream.
418 If size is not defined, return all bytes of the stream
419 up to EOF.
420 """
421 if size is None:
422 t = []
423 while True:
424 buf = self._read(self.bufsize)
425 if not buf:
426 break
427 t.append(buf)
428 buf = "".join(t)
429 else:
430 buf = self._read(size)
431 self.pos += len(buf)
432 return buf
433
434 def _read(self, size):
435 """Return size bytes from the stream.
436 """
437 if self.type == "tar":
438 return self.__read(size)
439
440 c = len(self.dbuf)
441 t = [self.dbuf]
442 while c < size:
443 buf = self.__read(self.bufsize)
444 if not buf:
445 break
446 buf = self.cmp.decompress(buf)
447 t.append(buf)
448 c += len(buf)
449 t = "".join(t)
450 self.dbuf = t[size:]
451 return t[:size]
452
453 def __read(self, size):
454 """Return size bytes from stream. If internal buffer is empty,
455 read another block from the stream.
456 """
457 c = len(self.buf)
458 t = [self.buf]
459 while c < size:
460 buf = self.fileobj.read(self.bufsize)
461 if not buf:
462 break
463 t.append(buf)
464 c += len(buf)
465 t = "".join(t)
466 self.buf = t[size:]
467 return t[:size]
468# class _Stream
469
470#------------------------
471# Extraction file object
472#------------------------
473class ExFileObject(object):
474 """File-like object for reading an archive member.
475 Is returned by TarFile.extractfile(). Support for
476 sparse files included.
477 """
478
479 def __init__(self, tarfile, tarinfo):
480 self.fileobj = tarfile.fileobj
481 self.name = tarinfo.name
482 self.mode = "r"
483 self.closed = False
484 self.offset = tarinfo.offset_data
485 self.size = tarinfo.size
486 self.pos = 0L
487 self.linebuffer = ""
488 if tarinfo.issparse():
489 self.sparse = tarinfo.sparse
490 self.read = self._readsparse
491 else:
492 self.read = self._readnormal
493
494 def __read(self, size):
495 """Overloadable read method.
496 """
497 return self.fileobj.read(size)
498
499 def readline(self, size=-1):
500 """Read a line with approx. size. If size is negative,
501 read a whole line. readline() and read() must not
502 be mixed up (!).
503 """
504 if size < 0:
505 size = sys.maxint
506
507 nl = self.linebuffer.find("\n")
508 if nl >= 0:
509 nl = min(nl, size)
510 else:
511 size -= len(self.linebuffer)
512 while nl < 0:
513 buf = self.read(min(size, 100))
514 if not buf:
515 break
516 self.linebuffer += buf
517 size -= len(buf)
518 if size <= 0:
519 break
520 nl = self.linebuffer.find("\n")
521 if nl == -1:
522 s = self.linebuffer
523 self.linebuffer = ""
524 return s
525 buf = self.linebuffer[:nl]
526 self.linebuffer = self.linebuffer[nl + 1:]
527 while buf[-1:] == "\r":
528 buf = buf[:-1]
529 return buf + "\n"
530
531 def readlines(self):
532 """Return a list with all (following) lines.
533 """
534 result = []
535 while True:
536 line = self.readline()
537 if not line: break
538 result.append(line)
539 return result
540
541 def _readnormal(self, size=None):
542 """Read operation for regular files.
543 """
544 if self.closed:
545 raise ValueError, "file is closed"
546 self.fileobj.seek(self.offset + self.pos)
547 bytesleft = self.size - self.pos
548 if size is None:
549 bytestoread = bytesleft
550 else:
551 bytestoread = min(size, bytesleft)
552 self.pos += bytestoread
553 return self.__read(bytestoread)
554
555 def _readsparse(self, size=None):
556 """Read operation for sparse files.
557 """
558 if self.closed:
559 raise ValueError, "file is closed"
560
561 if size is None:
562 size = self.size - self.pos
563
564 data = []
565 while size > 0:
566 buf = self._readsparsesection(size)
567 if not buf:
568 break
569 size -= len(buf)
570 data.append(buf)
571 return "".join(data)
572
573 def _readsparsesection(self, size):
574 """Read a single section of a sparse file.
575 """
576 section = self.sparse.find(self.pos)
577
578 if section is None:
579 return ""
580
581 toread = min(size, section.offset + section.size - self.pos)
582 if isinstance(section, _data):
583 realpos = section.realpos + self.pos - section.offset
584 self.pos += toread
585 self.fileobj.seek(self.offset + realpos)
586 return self.__read(toread)
587 else:
588 self.pos += toread
589 return NUL * toread
590
591 def tell(self):
592 """Return the current file position.
593 """
594 return self.pos
595
596 def seek(self, pos, whence=0):
597 """Seek to a position in the file.
598 """
599 self.linebuffer = ""
600 if whence == 0:
601 self.pos = min(max(pos, 0), self.size)
602 if whence == 1:
603 if pos < 0:
604 self.pos = max(self.pos + pos, 0)
605 else:
606 self.pos = min(self.pos + pos, self.size)
607 if whence == 2:
608 self.pos = max(min(self.size + pos, self.size), 0)
609
610 def close(self):
611 """Close the file object.
612 """
613 self.closed = True
614#class ExFileObject
615
616#------------------
617# Exported Classes
618#------------------
619class TarInfo(object):
620 """Informational class which holds the details about an
621 archive member given by a tar header block.
622 TarInfo objects are returned by TarFile.getmember(),
623 TarFile.getmembers() and TarFile.gettarinfo() and are
624 usually created internally.
625 """
626
627 def __init__(self, name=""):
628 """Construct a TarInfo object. name is the optional name
629 of the member.
630 """
631
632 self.name = name # member name (dirnames must end with '/')
633 self.mode = 0666 # file permissions
634 self.uid = 0 # user id
635 self.gid = 0 # group id
636 self.size = 0 # file size
637 self.mtime = 0 # modification time
638 self.chksum = 0 # header checksum
639 self.type = REGTYPE # member type
640 self.linkname = "" # link name
641 self.uname = "user" # user name
642 self.gname = "group" # group name
643 self.devmajor = 0 #-
644 self.devminor = 0 #-for use with CHRTYPE and BLKTYPE
645 self.prefix = "" # prefix to filename or holding information
646 # about sparse files
647
648 self.offset = 0 # the tar header starts here
649 self.offset_data = 0 # the file's data starts here
650
651 def __repr__(self):
652 return "<%s %r at %#x>" % (self.__class__.__name__,self.name,id(self))
653
654 def frombuf(cls, buf):
655 """Construct a TarInfo object from a 512 byte string buffer.
656 """
657 tarinfo = cls()
658 tarinfo.name = nts(buf[0:100])
659 tarinfo.mode = int(buf[100:108], 8)
660 tarinfo.uid = int(buf[108:116],8)
661 tarinfo.gid = int(buf[116:124],8)
662 tarinfo.size = long(buf[124:136], 8)
663 tarinfo.mtime = long(buf[136:148], 8)
664 tarinfo.chksum = int(buf[148:156], 8)
665 tarinfo.type = buf[156:157]
666 tarinfo.linkname = nts(buf[157:257])
667 tarinfo.uname = nts(buf[265:297])
668 tarinfo.gname = nts(buf[297:329])
669 try:
670 tarinfo.devmajor = int(buf[329:337], 8)
671 tarinfo.devminor = int(buf[337:345], 8)
672 except ValueError:
673 tarinfo.devmajor = tarinfo.devmajor = 0
674
675 # The prefix field is used for filenames > 100 in
676 # the POSIX standard.
677 # name = prefix + "/" + name
678 prefix = buf[345:500]
679 while prefix and prefix[-1] == NUL:
680 prefix = prefix[:-1]
681 if len(prefix.split(NUL)) == 1:
682 tarinfo.prefix = prefix
683 tarinfo.name = normpath(os.path.join(tarinfo.prefix, tarinfo.name))
684 else:
685 tarinfo.prefix = buf[345:500]
686
687 # Directory names should have a '/' at the end.
688 if tarinfo.isdir() and tarinfo.name[-1:] != "/":
689 tarinfo.name += "/"
690 return tarinfo
691
692 frombuf = classmethod(frombuf)
693
694 def tobuf(self):
695 """Return a tar header block as a 512 byte string.
696 """
697 name = self.name
698
699 # The following code was contributed by Detlef Lannert.
700 parts = []
701 for value, fieldsize in (
702 (name, 100),
703 ("%07o" % (self.mode & 07777), 8),
704 ("%07o" % self.uid, 8),
705 ("%07o" % self.gid, 8),
706 ("%011o" % self.size, 12),
707 ("%011o" % self.mtime, 12),
708 (" ", 8),
709 (self.type, 1),
710 (self.linkname, 100),
711 (MAGIC, 6),
712 (VERSION, 2),
713 (self.uname, 32),
714 (self.gname, 32),
715 ("%07o" % self.devmajor, 8),
716 ("%07o" % self.devminor, 8),
717 (self.prefix, 155)
718 ):
719 l = len(value)
720 parts.append(value + (fieldsize - l) * NUL)
721
722 buf = "".join(parts)
723 chksum = calc_chksum(buf)
724 buf = buf[:148] + "%06o\0" % chksum + buf[155:]
725 buf += (BLOCKSIZE - len(buf)) * NUL
726 self.buf = buf
727 return buf
728
729 def isreg(self):
730 return self.type in REGULAR_TYPES
731 def isfile(self):
732 return self.isreg()
733 def isdir(self):
734 return self.type == DIRTYPE
735 def issym(self):
736 return self.type == SYMTYPE
737 def islnk(self):
738 return self.type == LNKTYPE
739 def ischr(self):
740 return self.type == CHRTYPE
741 def isblk(self):
742 return self.type == BLKTYPE
743 def isfifo(self):
744 return self.type == FIFOTYPE
745 def issparse(self):
746 return self.type == GNUTYPE_SPARSE
747 def isdev(self):
748 return self.type in (CHRTYPE, BLKTYPE, FIFOTYPE)
749# class TarInfo
750
751class TarFile(object):
752 """The TarFile Class provides an interface to tar archives.
753 """
754
755 debug = 0 # May be set from 0 (no msgs) to 3 (all msgs)
756
757 dereference = False # If true, add content of linked file to the
758 # tar file, else the link.
759
760 ignore_zeros = False # If true, skips empty or invalid blocks and
761 # continues processing.
762
763 errorlevel = 0 # If 0, fatal errors only appear in debug
764 # messages (if debug >= 0). If > 0, errors
765 # are passed to the caller as exceptions.
766
767 posix = True # If True, generates POSIX.1-1990-compliant
768 # archives (no GNU extensions!)
769
770 fileobject = ExFileObject
771
772 def __init__(self, name=None, mode="r", fileobj=None):
773 """Open an (uncompressed) tar archive `name'. `mode' is either 'r' to
774 read from an existing archive, 'a' to append data to an existing
775 file or 'w' to create a new file overwriting an existing one. `mode'
776 defaults to 'r'.
777 If `fileobj' is given, it is used for reading or writing data. If it
778 can be determined, `mode' is overridden by `fileobj's mode.
779 `fileobj' is not closed, when TarFile is closed.
780 """
781 self.name = name
782
783 if len(mode) > 1 or mode not in "raw":
784 raise ValueError, "mode must be 'r', 'a' or 'w'"
785 self._mode = mode
786 self.mode = {"r": "rb", "a": "r+b", "w": "wb"}[mode]
787
788 if not fileobj:
789 fileobj = file(self.name, self.mode)
790 self._extfileobj = False
791 else:
792 if self.name is None and hasattr(fileobj, "name"):
793 self.name = fileobj.name
794 if hasattr(fileobj, "mode"):
795 self.mode = fileobj.mode
796 self._extfileobj = True
797 self.fileobj = fileobj
798
799 # Init datastructures
800 self.closed = False
801 self.members = [] # list of members as TarInfo objects
802 self.membernames = [] # names of members
803 self.chunks = [0] # chunk cache
804 self._loaded = False # flag if all members have been read
805 self.offset = 0L # current position in the archive file
806 self.inodes = {} # dictionary caching the inodes of
807 # archive members already added
808
809 if self._mode == "r":
810 self.firstmember = None
811 self.firstmember = self.next()
812
813 if self._mode == "a":
814 # Move to the end of the archive,
815 # before the first empty block.
816 self.firstmember = None
817 while True:
818 try:
819 tarinfo = self.next()
820 except ReadError:
821 self.fileobj.seek(0)
822 break
823 if tarinfo is None:
824 self.fileobj.seek(- BLOCKSIZE, 1)
825 break
826
827 if self._mode in "aw":
828 self._loaded = True
829
830 #--------------------------------------------------------------------------
831 # Below are the classmethods which act as alternate constructors to the
832 # TarFile class. The open() method is the only one that is needed for
833 # public use; it is the "super"-constructor and is able to select an
834 # adequate "sub"-constructor for a particular compression using the mapping
835 # from OPEN_METH.
836 #
837 # This concept allows one to subclass TarFile without losing the comfort of
838 # the super-constructor. A sub-constructor is registered and made available
839 # by adding it to the mapping in OPEN_METH.
840
841 def open(cls, name=None, mode="r", fileobj=None, bufsize=20*512):
842 """Open a tar archive for reading, writing or appending. Return
843 an appropriate TarFile class.
844
845 mode:
846 'r' open for reading with transparent compression
847 'r:' open for reading exclusively uncompressed
848 'r:gz' open for reading with gzip compression
849 'r:bz2' open for reading with bzip2 compression
850 'a' or 'a:' open for appending
851 'w' or 'w:' open for writing without compression
852 'w:gz' open for writing with gzip compression
853 'w:bz2' open for writing with bzip2 compression
854 'r|' open an uncompressed stream of tar blocks for reading
855 'r|gz' open a gzip compressed stream of tar blocks
856 'r|bz2' open a bzip2 compressed stream of tar blocks
857 'w|' open an uncompressed stream for writing
858 'w|gz' open a gzip compressed stream for writing
859 'w|bz2' open a bzip2 compressed stream for writing
860 """
861
862 if not name and not fileobj:
863 raise ValueError, "nothing to open"
864
865 if ":" in mode:
866 filemode, comptype = mode.split(":", 1)
867 filemode = filemode or "r"
868 comptype = comptype or "tar"
869
870 # Select the *open() function according to
871 # given compression.
872 if comptype in cls.OPEN_METH:
873 func = getattr(cls, cls.OPEN_METH[comptype])
874 else:
875 raise CompressionError, "unknown compression type %r" % comptype
876 return func(name, filemode, fileobj)
877
878 elif "|" in mode:
879 filemode, comptype = mode.split("|", 1)
880 filemode = filemode or "r"
881 comptype = comptype or "tar"
882
883 if filemode not in "rw":
884 raise ValueError, "mode must be 'r' or 'w'"
885
886 t = cls(name, filemode,
887 _Stream(name, filemode, comptype, fileobj, bufsize))
888 t._extfileobj = False
889 return t
890
891 elif mode == "r":
892 # Find out which *open() is appropriate for opening the file.
893 for comptype in cls.OPEN_METH:
894 func = getattr(cls, cls.OPEN_METH[comptype])
895 try:
896 return func(name, "r", fileobj)
897 except (ReadError, CompressionError):
898 continue
899 raise ReadError, "file could not be opened successfully"
900
901 elif mode in "aw":
902 return cls.taropen(name, mode, fileobj)
903
904 raise ValueError, "undiscernible mode"
905
906 open = classmethod(open)
907
908 def taropen(cls, name, mode="r", fileobj=None):
909 """Open uncompressed tar archive name for reading or writing.
910 """
911 if len(mode) > 1 or mode not in "raw":
912 raise ValueError, "mode must be 'r', 'a' or 'w'"
913 return cls(name, mode, fileobj)
914
915 taropen = classmethod(taropen)
916
917 def gzopen(cls, name, mode="r", fileobj=None, compresslevel=9):
918 """Open gzip compressed tar archive name for reading or writing.
919 Appending is not allowed.
920 """
921 if len(mode) > 1 or mode not in "rw":
922 raise ValueError, "mode must be 'r' or 'w'"
923
924 try:
925 import gzip
926 except ImportError:
927 raise CompressionError, "gzip module is not available"
928
929 pre, ext = os.path.splitext(name)
930 pre = os.path.basename(pre)
931 if ext == ".tgz":
932 ext = ".tar"
933 if ext == ".gz":
934 ext = ""
935 tarname = pre + ext
936
937 if fileobj is None:
938 fileobj = file(name, mode + "b")
939
940 if mode != "r":
941 name = tarname
942
943 try:
944 t = cls.taropen(tarname, mode,
945 gzip.GzipFile(name, mode, compresslevel, fileobj)
946 )
947 except IOError:
948 raise ReadError, "not a gzip file"
949 t._extfileobj = False
950 return t
951
952 gzopen = classmethod(gzopen)
953
954 def bz2open(cls, name, mode="r", fileobj=None, compresslevel=9):
955 """Open bzip2 compressed tar archive name for reading or writing.
956 Appending is not allowed.
957 """
958 if len(mode) > 1 or mode not in "rw":
959 raise ValueError, "mode must be 'r' or 'w'."
960
961 try:
962 import bz2
963 except ImportError:
964 raise CompressionError, "bz2 module is not available"
965
966 pre, ext = os.path.splitext(name)
967 pre = os.path.basename(pre)
968 if ext == ".tbz2":
969 ext = ".tar"
970 if ext == ".bz2":
971 ext = ""
972 tarname = pre + ext
973
974 if fileobj is not None:
975 raise ValueError, "no support for external file objects"
976
977 try:
978 t = cls.taropen(tarname, mode, bz2.BZ2File(name, mode, compresslevel=compresslevel))
979 except IOError:
980 raise ReadError, "not a bzip2 file"
981 t._extfileobj = False
982 return t
983
984 bz2open = classmethod(bz2open)
985
986 # All *open() methods are registered here.
987 OPEN_METH = {
988 "tar": "taropen", # uncompressed tar
989 "gz": "gzopen", # gzip compressed tar
990 "bz2": "bz2open" # bzip2 compressed tar
991 }
992
993 #--------------------------------------------------------------------------
994 # The public methods which TarFile provides:
995
996 def close(self):
997 """Close the TarFile. In write-mode, two finishing zero blocks are
998 appended to the archive.
999 """
1000 if self.closed:
1001 return
1002
1003 if self._mode in "aw":
1004 self.fileobj.write(NUL * (BLOCKSIZE * 2))
1005 self.offset += (BLOCKSIZE * 2)
1006 # fill up the end with zero-blocks
1007 # (like option -b20 for tar does)
1008 blocks, remainder = divmod(self.offset, RECORDSIZE)
1009 if remainder > 0:
1010 self.fileobj.write(NUL * (RECORDSIZE - remainder))
1011
1012 if not self._extfileobj:
1013 self.fileobj.close()
1014 self.closed = True
1015
1016 def getmember(self, name):
1017 """Return a TarInfo object for member `name'. If `name' can not be
1018 found in the archive, KeyError is raised. If a member occurs more
1019 than once in the archive, its last occurence is assumed to be the
1020 most up-to-date version.
1021 """
1022 self._check()
1023 if name not in self.membernames and not self._loaded:
1024 self._load()
1025 if name not in self.membernames:
1026 raise KeyError, "filename %r not found" % name
1027 return self._getmember(name)
1028
1029 def getmembers(self):
1030 """Return the members of the archive as a list of TarInfo objects. The
1031 list has the same order as the members in the archive.
1032 """
1033 self._check()
1034 if not self._loaded: # if we want to obtain a list of
1035 self._load() # all members, we first have to
1036 # scan the whole archive.
1037 return self.members
1038
1039 def getnames(self):
1040 """Return the members of the archive as a list of their names. It has
1041 the same order as the list returned by getmembers().
1042 """
1043 self._check()
1044 if not self._loaded:
1045 self._load()
1046 return self.membernames
1047
1048 def gettarinfo(self, name=None, arcname=None, fileobj=None):
1049 """Create a TarInfo object for either the file `name' or the file
1050 object `fileobj' (using os.fstat on its file descriptor). You can
1051 modify some of the TarInfo's attributes before you add it using
1052 addfile(). If given, `arcname' specifies an alternative name for the
1053 file in the archive.
1054 """
1055 self._check("aw")
1056
1057 # When fileobj is given, replace name by
1058 # fileobj's real name.
1059 if fileobj is not None:
1060 name = fileobj.name
1061
1062 # Building the name of the member in the archive.
1063 # Backward slashes are converted to forward slashes,
1064 # Absolute paths are turned to relative paths.
1065 if arcname is None:
1066 arcname = name
1067 arcname = normpath(arcname)
1068 drv, arcname = os.path.splitdrive(arcname)
1069 while arcname[0:1] == "/":
1070 arcname = arcname[1:]
1071
1072 # Now, fill the TarInfo object with
1073 # information specific for the file.
1074 tarinfo = TarInfo()
1075
1076 # Use os.stat or os.lstat, depending on platform
1077 # and if symlinks shall be resolved.
1078 if fileobj is None:
1079 if hasattr(os, "lstat") and not self.dereference:
1080 statres = os.lstat(name)
1081 else:
1082 statres = os.stat(name)
1083 else:
1084 statres = os.fstat(fileobj.fileno())
1085 linkname = ""
1086
1087 stmd = statres.st_mode
1088 if stat.S_ISREG(stmd):
1089 inode = (statres.st_ino, statres.st_dev)
1090 if inode in self.inodes and not self.dereference:
1091 # Is it a hardlink to an already
1092 # archived file?
1093 type = LNKTYPE
1094 linkname = self.inodes[inode]
1095 else:
1096 # The inode is added only if its valid.
1097 # For win32 it is always 0.
1098 type = REGTYPE
1099 if inode[0]:
1100 self.inodes[inode] = arcname
1101 elif stat.S_ISDIR(stmd):
1102 type = DIRTYPE
1103 if arcname[-1:] != "/":
1104 arcname += "/"
1105 elif stat.S_ISFIFO(stmd):
1106 type = FIFOTYPE
1107 elif stat.S_ISLNK(stmd):
1108 type = SYMTYPE
1109 linkname = os.readlink(name)
1110 elif stat.S_ISCHR(stmd):
1111 type = CHRTYPE
1112 elif stat.S_ISBLK(stmd):
1113 type = BLKTYPE
1114 else:
1115 return None
1116
1117 # Fill the TarInfo object with all
1118 # information we can get.
1119 tarinfo.name = arcname
1120 tarinfo.mode = stmd
1121 tarinfo.uid = statres.st_uid
1122 tarinfo.gid = statres.st_gid
1123 tarinfo.size = statres.st_size
1124 tarinfo.mtime = statres.st_mtime
1125 tarinfo.type = type
1126 tarinfo.linkname = linkname
1127 if pwd:
1128 try:
1129 tarinfo.uname = pwd.getpwuid(tarinfo.uid)[0]
1130 except KeyError:
1131 pass
1132 if grp:
1133 try:
1134 tarinfo.gname = grp.getgrgid(tarinfo.gid)[0]
1135 except KeyError:
1136 pass
1137
1138 if type in (CHRTYPE, BLKTYPE):
1139 if hasattr(os, "major") and hasattr(os, "minor"):
1140 tarinfo.devmajor = os.major(statres.st_rdev)
1141 tarinfo.devminor = os.minor(statres.st_rdev)
1142 return tarinfo
1143
1144 def list(self, verbose=True):
1145 """Print a table of contents to sys.stdout. If `verbose' is False, only
1146 the names of the members are printed. If it is True, an `ls -l'-like
1147 output is produced.
1148 """
1149 self._check()
1150
1151 for tarinfo in self:
1152 if verbose:
1153 print filemode(tarinfo.mode),
1154 print "%s/%s" % (tarinfo.uname or tarinfo.uid,
1155 tarinfo.gname or tarinfo.gid),
1156 if tarinfo.ischr() or tarinfo.isblk():
1157 print "%10s" % ("%d,%d" \
1158 % (tarinfo.devmajor, tarinfo.devminor)),
1159 else:
1160 print "%10d" % tarinfo.size,
1161 print "%d-%02d-%02d %02d:%02d:%02d" \
1162 % time.localtime(tarinfo.mtime)[:6],
1163
1164 print tarinfo.name,
1165
1166 if verbose:
1167 if tarinfo.issym():
1168 print "->", tarinfo.linkname,
1169 if tarinfo.islnk():
1170 print "link to", tarinfo.linkname,
1171 print
1172
1173 def add(self, name, arcname=None, recursive=True):
1174 """Add the file `name' to the archive. `name' may be any type of file
1175 (directory, fifo, symbolic link, etc.). If given, `arcname'
1176 specifies an alternative name for the file in the archive.
1177 Directories are added recursively by default. This can be avoided by
1178 setting `recursive' to False.
1179 """
1180 self._check("aw")
1181
1182 if arcname is None:
1183 arcname = name
1184
1185 # Skip if somebody tries to archive the archive...
1186 if self.name is not None \
1187 and os.path.abspath(name) == os.path.abspath(self.name):
1188 self._dbg(2, "tarfile: Skipped %r" % name)
1189 return
1190
1191 # Special case: The user wants to add the current
1192 # working directory.
1193 if name == ".":
1194 if recursive:
1195 if arcname == ".":
1196 arcname = ""
1197 for f in os.listdir("."):
1198 self.add(f, os.path.join(arcname, f))
1199 return
1200
1201 self._dbg(1, name)
1202
1203 # Create a TarInfo object from the file.
1204 tarinfo = self.gettarinfo(name, arcname)
1205
1206 if tarinfo is None:
1207 self._dbg(1, "tarfile: Unsupported type %r" % name)
1208 return
1209
1210 # Append the tar header and data to the archive.
1211 if tarinfo.isreg():
1212 f = file(name, "rb")
1213 self.addfile(tarinfo, f)
1214 f.close()
1215
1216 if tarinfo.type in (LNKTYPE, SYMTYPE, FIFOTYPE, CHRTYPE, BLKTYPE):
1217 tarinfo.size = 0L
1218 self.addfile(tarinfo)
1219
1220 if tarinfo.isdir():
1221 self.addfile(tarinfo)
1222 if recursive:
1223 for f in os.listdir(name):
1224 self.add(os.path.join(name, f), os.path.join(arcname, f))
1225
1226 def addfile(self, tarinfo, fileobj=None):
1227 """Add the TarInfo object `tarinfo' to the archive. If `fileobj' is
1228 given, tarinfo.size bytes are read from it and added to the archive.
1229 You can create TarInfo objects using gettarinfo().
1230 On Windows platforms, `fileobj' should always be opened with mode
1231 'rb' to avoid irritation about the file size.
1232 """
1233 self._check("aw")
1234
1235 tarinfo.name = normpath(tarinfo.name)
1236 if tarinfo.isdir():
1237 # directories should end with '/'
1238 tarinfo.name += "/"
1239
1240 if tarinfo.linkname:
1241 tarinfo.linkname = normpath(tarinfo.linkname)
1242
1243 if tarinfo.size > MAXSIZE_MEMBER:
1244 raise ValueError, "file is too large (>8GB)"
1245
1246 if len(tarinfo.linkname) > LENGTH_LINK:
1247 if self.posix:
1248 raise ValueError, "linkname is too long (>%d)" \
1249 % (LENGTH_LINK)
1250 else:
1251 self._create_gnulong(tarinfo.linkname, GNUTYPE_LONGLINK)
1252 tarinfo.linkname = tarinfo.linkname[:LENGTH_LINK -1]
1253 self._dbg(2, "tarfile: Created GNU tar extension LONGLINK")
1254
1255 if len(tarinfo.name) > LENGTH_NAME:
1256 if self.posix:
1257 prefix = tarinfo.name[:LENGTH_PREFIX + 1]
1258 while prefix and prefix[-1] != "/":
Tim Peters2c60f7a2003-01-29 03:49:43 +00001259 prefix = prefix[:-1]
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001260
1261 name = tarinfo.name[len(prefix):]
1262 prefix = prefix[:-1]
1263
1264 if not prefix or len(name) > LENGTH_NAME:
1265 raise ValueError, "name is too long (>%d)" \
1266 % (LENGTH_NAME)
1267
1268 tarinfo.name = name
1269 tarinfo.prefix = prefix
1270 else:
1271 self._create_gnulong(tarinfo.name, GNUTYPE_LONGNAME)
1272 tarinfo.name = tarinfo.name[:LENGTH_NAME - 1]
1273 self._dbg(2, "tarfile: Created GNU tar extension LONGNAME")
1274
1275 self.fileobj.write(tarinfo.tobuf())
1276 self.offset += BLOCKSIZE
1277
1278 # If there's data to follow, append it.
1279 if fileobj is not None:
1280 copyfileobj(fileobj, self.fileobj, tarinfo.size)
1281 blocks, remainder = divmod(tarinfo.size, BLOCKSIZE)
1282 if remainder > 0:
1283 self.fileobj.write(NUL * (BLOCKSIZE - remainder))
1284 blocks += 1
1285 self.offset += blocks * BLOCKSIZE
1286
1287 self.members.append(tarinfo)
1288 self.membernames.append(tarinfo.name)
1289 self.chunks.append(self.offset)
1290
1291 def extract(self, member, path=""):
1292 """Extract a member from the archive to the current working directory,
1293 using its full name. Its file information is extracted as accurately
1294 as possible. `member' may be a filename or a TarInfo object. You can
1295 specify a different directory using `path'.
1296 """
1297 self._check("r")
1298
1299 if isinstance(member, TarInfo):
1300 tarinfo = member
1301 else:
1302 tarinfo = self.getmember(member)
1303
1304 try:
1305 self._extract_member(tarinfo, os.path.join(path, tarinfo.name))
1306 except EnvironmentError, e:
1307 if self.errorlevel > 0:
1308 raise
1309 else:
1310 if e.filename is None:
1311 self._dbg(1, "tarfile: %s" % e.strerror)
1312 else:
1313 self._dbg(1, "tarfile: %s %r" % (e.strerror, e.filename))
1314 except ExtractError, e:
1315 if self.errorlevel > 1:
1316 raise
1317 else:
1318 self._dbg(1, "tarfile: %s" % e)
1319
1320 def extractfile(self, member):
1321 """Extract a member from the archive as a file object. `member' may be
1322 a filename or a TarInfo object. If `member' is a regular file, a
1323 file-like object is returned. If `member' is a link, a file-like
1324 object is constructed from the link's target. If `member' is none of
1325 the above, None is returned.
1326 The file-like object is read-only and provides the following
1327 methods: read(), readline(), readlines(), seek() and tell()
1328 """
1329 self._check("r")
1330
1331 if isinstance(member, TarInfo):
1332 tarinfo = member
1333 else:
1334 tarinfo = self.getmember(member)
1335
1336 if tarinfo.isreg():
1337 return self.fileobject(self, tarinfo)
1338
1339 elif tarinfo.type not in SUPPORTED_TYPES:
1340 # If a member's type is unknown, it is treated as a
1341 # regular file.
1342 return self.fileobject(self, tarinfo)
1343
1344 elif tarinfo.islnk() or tarinfo.issym():
1345 if isinstance(self.fileobj, _Stream):
1346 # A small but ugly workaround for the case that someone tries
1347 # to extract a (sym)link as a file-object from a non-seekable
1348 # stream of tar blocks.
1349 raise StreamError, "cannot extract (sym)link as file object"
1350 else:
1351 # A (sym)link's file object is it's target's file object.
1352 return self.extractfile(self._getmember(tarinfo.linkname,
1353 tarinfo))
1354 else:
1355 # If there's no data associated with the member (directory, chrdev,
1356 # blkdev, etc.), return None instead of a file object.
1357 return None
1358
1359 def _extract_member(self, tarinfo, targetpath):
1360 """Extract the TarInfo object tarinfo to a physical
1361 file called targetpath.
1362 """
1363 # Fetch the TarInfo object for the given name
1364 # and build the destination pathname, replacing
1365 # forward slashes to platform specific separators.
1366 if targetpath[-1:] == "/":
1367 targetpath = targetpath[:-1]
1368 targetpath = os.path.normpath(targetpath)
1369
1370 # Create all upper directories.
1371 upperdirs = os.path.dirname(targetpath)
1372 if upperdirs and not os.path.exists(upperdirs):
1373 ti = TarInfo()
1374 ti.name = upperdirs
1375 ti.type = DIRTYPE
1376 ti.mode = 0777
1377 ti.mtime = tarinfo.mtime
1378 ti.uid = tarinfo.uid
1379 ti.gid = tarinfo.gid
1380 ti.uname = tarinfo.uname
1381 ti.gname = tarinfo.gname
1382 try:
1383 self._extract_member(ti, ti.name)
1384 except:
1385 pass
1386
1387 if tarinfo.islnk() or tarinfo.issym():
1388 self._dbg(1, "%s -> %s" % (tarinfo.name, tarinfo.linkname))
1389 else:
1390 self._dbg(1, tarinfo.name)
1391
1392 if tarinfo.isreg():
1393 self.makefile(tarinfo, targetpath)
1394 elif tarinfo.isdir():
1395 self.makedir(tarinfo, targetpath)
1396 elif tarinfo.isfifo():
1397 self.makefifo(tarinfo, targetpath)
1398 elif tarinfo.ischr() or tarinfo.isblk():
1399 self.makedev(tarinfo, targetpath)
1400 elif tarinfo.islnk() or tarinfo.issym():
1401 self.makelink(tarinfo, targetpath)
1402 elif tarinfo.type not in SUPPORTED_TYPES:
1403 self.makeunknown(tarinfo, targetpath)
1404 else:
1405 self.makefile(tarinfo, targetpath)
1406
1407 self.chown(tarinfo, targetpath)
1408 if not tarinfo.issym():
1409 self.chmod(tarinfo, targetpath)
1410 self.utime(tarinfo, targetpath)
1411
1412 #--------------------------------------------------------------------------
1413 # Below are the different file methods. They are called via
1414 # _extract_member() when extract() is called. They can be replaced in a
1415 # subclass to implement other functionality.
1416
1417 def makedir(self, tarinfo, targetpath):
1418 """Make a directory called targetpath.
1419 """
1420 try:
1421 os.mkdir(targetpath)
1422 except EnvironmentError, e:
1423 if e.errno != errno.EEXIST:
1424 raise
1425
1426 def makefile(self, tarinfo, targetpath):
1427 """Make a file called targetpath.
1428 """
1429 source = self.extractfile(tarinfo)
1430 target = file(targetpath, "wb")
1431 copyfileobj(source, target)
1432 source.close()
1433 target.close()
1434
1435 def makeunknown(self, tarinfo, targetpath):
1436 """Make a file from a TarInfo object with an unknown type
1437 at targetpath.
1438 """
1439 self.makefile(tarinfo, targetpath)
1440 self._dbg(1, "tarfile: Unknown file type %r, " \
1441 "extracted as regular file." % tarinfo.type)
1442
1443 def makefifo(self, tarinfo, targetpath):
1444 """Make a fifo called targetpath.
1445 """
1446 if hasattr(os, "mkfifo"):
1447 os.mkfifo(targetpath)
1448 else:
1449 raise ExtractError, "fifo not supported by system"
1450
1451 def makedev(self, tarinfo, targetpath):
1452 """Make a character or block device called targetpath.
1453 """
1454 if not hasattr(os, "mknod") or not hasattr(os, "makedev"):
1455 raise ExtractError, "special devices not supported by system"
1456
1457 mode = tarinfo.mode
1458 if tarinfo.isblk():
1459 mode |= stat.S_IFBLK
1460 else:
1461 mode |= stat.S_IFCHR
1462
1463 os.mknod(targetpath, mode,
1464 os.makedev(tarinfo.devmajor, tarinfo.devminor))
1465
1466 def makelink(self, tarinfo, targetpath):
1467 """Make a (symbolic) link called targetpath. If it cannot be created
1468 (platform limitation), we try to make a copy of the referenced file
1469 instead of a link.
1470 """
1471 linkpath = tarinfo.linkname
1472 try:
1473 if tarinfo.issym():
1474 os.symlink(linkpath, targetpath)
1475 else:
1476 os.link(linkpath, targetpath)
1477 except AttributeError:
1478 if tarinfo.issym():
1479 linkpath = os.path.join(os.path.dirname(tarinfo.name),
1480 linkpath)
1481 linkpath = normpath(linkpath)
1482
1483 try:
1484 self._extract_member(self.getmember(linkpath), targetpath)
1485 except (EnvironmentError, KeyError), e:
1486 linkpath = os.path.normpath(linkpath)
1487 try:
1488 shutil.copy2(linkpath, targetpath)
1489 except EnvironmentError, e:
1490 raise IOError, "link could not be created"
1491
1492 def chown(self, tarinfo, targetpath):
1493 """Set owner of targetpath according to tarinfo.
1494 """
1495 if pwd and hasattr(os, "geteuid") and os.geteuid() == 0:
1496 # We have to be root to do so.
1497 try:
1498 g = grp.getgrnam(tarinfo.gname)[2]
1499 except KeyError:
1500 try:
1501 g = grp.getgrgid(tarinfo.gid)[2]
1502 except KeyError:
1503 g = os.getgid()
1504 try:
1505 u = pwd.getpwnam(tarinfo.uname)[2]
1506 except KeyError:
1507 try:
1508 u = pwd.getpwuid(tarinfo.uid)[2]
1509 except KeyError:
1510 u = os.getuid()
1511 try:
1512 if tarinfo.issym() and hasattr(os, "lchown"):
1513 os.lchown(targetpath, u, g)
1514 else:
Andrew MacIntyre7970d202003-02-19 12:51:34 +00001515 if sys.platform != "os2emx":
1516 os.chown(targetpath, u, g)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001517 except EnvironmentError, e:
1518 raise ExtractError, "could not change owner"
1519
1520 def chmod(self, tarinfo, targetpath):
1521 """Set file permissions of targetpath according to tarinfo.
1522 """
Jack Jansen834eff62003-03-07 12:47:06 +00001523 if hasattr(os, 'chmod'):
1524 try:
1525 os.chmod(targetpath, tarinfo.mode)
1526 except EnvironmentError, e:
1527 raise ExtractError, "could not change mode"
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001528
1529 def utime(self, tarinfo, targetpath):
1530 """Set modification time of targetpath according to tarinfo.
1531 """
Jack Jansen834eff62003-03-07 12:47:06 +00001532 if not hasattr(os, 'utime'):
Tim Petersf9347782003-03-07 15:36:41 +00001533 return
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001534 if sys.platform == "win32" and tarinfo.isdir():
1535 # According to msdn.microsoft.com, it is an error (EACCES)
1536 # to use utime() on directories.
1537 return
1538 try:
1539 os.utime(targetpath, (tarinfo.mtime, tarinfo.mtime))
1540 except EnvironmentError, e:
1541 raise ExtractError, "could not change modification time"
1542
1543 #--------------------------------------------------------------------------
1544
1545 def next(self):
1546 """Return the next member of the archive as a TarInfo object, when
1547 TarFile is opened for reading. Return None if there is no more
1548 available.
1549 """
1550 self._check("ra")
1551 if self.firstmember is not None:
1552 m = self.firstmember
1553 self.firstmember = None
1554 return m
1555
1556 # Read the next block.
1557 self.fileobj.seek(self.chunks[-1])
1558 while True:
1559 buf = self.fileobj.read(BLOCKSIZE)
1560 if not buf:
1561 return None
1562 try:
1563 tarinfo = TarInfo.frombuf(buf)
1564 except ValueError:
1565 if self.ignore_zeros:
1566 if buf.count(NUL) == BLOCKSIZE:
1567 adj = "empty"
1568 else:
1569 adj = "invalid"
1570 self._dbg(2, "0x%X: %s block" % (self.offset, adj))
1571 self.offset += BLOCKSIZE
1572 continue
1573 else:
1574 # Block is empty or unreadable.
1575 if self.chunks[-1] == 0:
1576 # If the first block is invalid. That does not
1577 # look like a tar archive we can handle.
1578 raise ReadError,"empty, unreadable or compressed file"
1579 return None
1580 break
1581
1582 # We shouldn't rely on this checksum, because some tar programs
1583 # calculate it differently and it is merely validating the
1584 # header block. We could just as well skip this part, which would
1585 # have a slight effect on performance...
1586 if tarinfo.chksum != calc_chksum(buf):
1587 self._dbg(1, "tarfile: Bad Checksum %r" % tarinfo.name)
1588
1589 # Set the TarInfo object's offset to the current position of the
1590 # TarFile and set self.offset to the position where the data blocks
1591 # should begin.
1592 tarinfo.offset = self.offset
1593 self.offset += BLOCKSIZE
1594
1595 # Check if the TarInfo object has a typeflag for which a callback
1596 # method is registered in the TYPE_METH. If so, then call it.
1597 if tarinfo.type in self.TYPE_METH:
1598 tarinfo = self.TYPE_METH[tarinfo.type](self, tarinfo)
1599 else:
1600 tarinfo.offset_data = self.offset
1601 if tarinfo.isreg() or tarinfo.type not in SUPPORTED_TYPES:
1602 # Skip the following data blocks.
1603 self.offset += self._block(tarinfo.size)
1604
1605 if tarinfo.isreg() and tarinfo.name[:-1] == "/":
1606 # some old tar programs don't know DIRTYPE
1607 tarinfo.type = DIRTYPE
1608
1609 self.members.append(tarinfo)
1610 self.membernames.append(tarinfo.name)
1611 self.chunks.append(self.offset)
1612 return tarinfo
1613
1614 #--------------------------------------------------------------------------
1615 # Below are some methods which are called for special typeflags in the
1616 # next() method, e.g. for unwrapping GNU longname/longlink blocks. They
1617 # are registered in TYPE_METH below. You can register your own methods
1618 # with this mapping.
1619 # A registered method is called with a TarInfo object as only argument.
1620 #
1621 # During its execution the method MUST perform the following tasks:
1622 # 1. set tarinfo.offset_data to the position where the data blocks begin,
1623 # if there is data to follow.
1624 # 2. set self.offset to the position where the next member's header will
1625 # begin.
1626 # 3. return a valid TarInfo object.
1627
1628 def proc_gnulong(self, tarinfo):
1629 """Evaluate the blocks that hold a GNU longname
1630 or longlink member.
1631 """
1632 buf = ""
1633 name = None
1634 linkname = None
1635 count = tarinfo.size
1636 while count > 0:
1637 block = self.fileobj.read(BLOCKSIZE)
1638 buf += block
1639 self.offset += BLOCKSIZE
1640 count -= BLOCKSIZE
1641
1642 if tarinfo.type == GNUTYPE_LONGNAME:
1643 name = nts(buf)
1644 if tarinfo.type == GNUTYPE_LONGLINK:
1645 linkname = nts(buf)
1646
1647 buf = self.fileobj.read(BLOCKSIZE)
1648
1649 tarinfo = TarInfo.frombuf(buf)
1650 tarinfo.offset = self.offset
1651 self.offset += BLOCKSIZE
1652 tarinfo.offset_data = self.offset
1653 tarinfo.name = name or tarinfo.name
1654 tarinfo.linkname = linkname or tarinfo.linkname
1655
1656 if tarinfo.isreg() or tarinfo.type not in SUPPORTED_TYPES:
1657 # Skip the following data blocks.
1658 self.offset += self._block(tarinfo.size)
1659 return tarinfo
1660
1661 def proc_sparse(self, tarinfo):
1662 """Analyze a GNU sparse header plus extra headers.
1663 """
1664 buf = tarinfo.tobuf()
1665 sp = _ringbuffer()
1666 pos = 386
1667 lastpos = 0L
1668 realpos = 0L
1669 # There are 4 possible sparse structs in the
1670 # first header.
1671 for i in xrange(4):
1672 try:
1673 offset = int(buf[pos:pos + 12], 8)
1674 numbytes = int(buf[pos + 12:pos + 24], 8)
1675 except ValueError:
1676 break
1677 if offset > lastpos:
1678 sp.append(_hole(lastpos, offset - lastpos))
1679 sp.append(_data(offset, numbytes, realpos))
1680 realpos += numbytes
1681 lastpos = offset + numbytes
1682 pos += 24
1683
1684 isextended = ord(buf[482])
1685 origsize = int(buf[483:495], 8)
1686
1687 # If the isextended flag is given,
1688 # there are extra headers to process.
1689 while isextended == 1:
1690 buf = self.fileobj.read(BLOCKSIZE)
1691 self.offset += BLOCKSIZE
1692 pos = 0
1693 for i in xrange(21):
1694 try:
1695 offset = int(buf[pos:pos + 12], 8)
1696 numbytes = int(buf[pos + 12:pos + 24], 8)
1697 except ValueError:
1698 break
1699 if offset > lastpos:
1700 sp.append(_hole(lastpos, offset - lastpos))
1701 sp.append(_data(offset, numbytes, realpos))
1702 realpos += numbytes
1703 lastpos = offset + numbytes
1704 pos += 24
1705 isextended = ord(buf[504])
1706
1707 if lastpos < origsize:
1708 sp.append(_hole(lastpos, origsize - lastpos))
1709
1710 tarinfo.sparse = sp
1711
1712 tarinfo.offset_data = self.offset
1713 self.offset += self._block(tarinfo.size)
1714 tarinfo.size = origsize
1715 return tarinfo
1716
1717 # The type mapping for the next() method. The keys are single character
1718 # strings, the typeflag. The values are methods which are called when
1719 # next() encounters such a typeflag.
1720 TYPE_METH = {
1721 GNUTYPE_LONGNAME: proc_gnulong,
1722 GNUTYPE_LONGLINK: proc_gnulong,
1723 GNUTYPE_SPARSE: proc_sparse
1724 }
1725
1726 #--------------------------------------------------------------------------
1727 # Little helper methods:
1728
1729 def _block(self, count):
1730 """Round up a byte count by BLOCKSIZE and return it,
1731 e.g. _block(834) => 1024.
1732 """
1733 blocks, remainder = divmod(count, BLOCKSIZE)
1734 if remainder:
1735 blocks += 1
1736 return blocks * BLOCKSIZE
1737
1738 def _getmember(self, name, tarinfo=None):
1739 """Find an archive member by name from bottom to top.
1740 If tarinfo is given, it is used as the starting point.
1741 """
1742 if tarinfo is None:
1743 end = len(self.members)
1744 else:
1745 end = self.members.index(tarinfo)
1746
1747 for i in xrange(end - 1, -1, -1):
1748 if name == self.membernames[i]:
1749 return self.members[i]
1750
1751 def _load(self):
1752 """Read through the entire archive file and look for readable
1753 members.
1754 """
1755 while True:
1756 tarinfo = self.next()
1757 if tarinfo is None:
1758 break
1759 self._loaded = True
1760
1761 def _check(self, mode=None):
1762 """Check if TarFile is still open, and if the operation's mode
1763 corresponds to TarFile's mode.
1764 """
1765 if self.closed:
1766 raise IOError, "%s is closed" % self.__class__.__name__
1767 if mode is not None and self._mode not in mode:
1768 raise IOError, "bad operation for mode %r" % self._mode
1769
1770 def __iter__(self):
1771 """Provide an iterator object.
1772 """
1773 if self._loaded:
1774 return iter(self.members)
1775 else:
1776 return TarIter(self)
1777
1778 def _create_gnulong(self, name, type):
1779 """Write a GNU longname/longlink member to the TarFile.
1780 It consists of an extended tar header, with the length
1781 of the longname as size, followed by data blocks,
1782 which contain the longname as a null terminated string.
1783 """
1784 tarinfo = TarInfo()
1785 tarinfo.name = "././@LongLink"
1786 tarinfo.type = type
1787 tarinfo.mode = 0
1788 tarinfo.size = len(name)
1789
1790 # write extended header
1791 self.fileobj.write(tarinfo.tobuf())
1792 # write name blocks
1793 self.fileobj.write(name)
1794 blocks, remainder = divmod(tarinfo.size, BLOCKSIZE)
1795 if remainder > 0:
1796 self.fileobj.write(NUL * (BLOCKSIZE - remainder))
1797 blocks += 1
1798 self.offset += blocks * BLOCKSIZE
1799
1800 def _dbg(self, level, msg):
1801 """Write debugging output to sys.stderr.
1802 """
1803 if level <= self.debug:
1804 print >> sys.stderr, msg
1805# class TarFile
1806
1807class TarIter:
1808 """Iterator Class.
1809
1810 for tarinfo in TarFile(...):
1811 suite...
1812 """
1813
1814 def __init__(self, tarfile):
1815 """Construct a TarIter object.
1816 """
1817 self.tarfile = tarfile
1818 def __iter__(self):
1819 """Return iterator object.
1820 """
1821 return self
1822 def next(self):
1823 """Return the next item using TarFile's next() method.
1824 When all members have been read, set TarFile as _loaded.
1825 """
1826 tarinfo = self.tarfile.next()
1827 if not tarinfo:
1828 self.tarfile._loaded = True
1829 raise StopIteration
1830 return tarinfo
1831
1832# Helper classes for sparse file support
1833class _section:
1834 """Base class for _data and _hole.
1835 """
1836 def __init__(self, offset, size):
1837 self.offset = offset
1838 self.size = size
1839 def __contains__(self, offset):
1840 return self.offset <= offset < self.offset + self.size
1841
1842class _data(_section):
1843 """Represent a data section in a sparse file.
1844 """
1845 def __init__(self, offset, size, realpos):
1846 _section.__init__(self, offset, size)
1847 self.realpos = realpos
1848
1849class _hole(_section):
1850 """Represent a hole section in a sparse file.
1851 """
1852 pass
1853
1854class _ringbuffer(list):
1855 """Ringbuffer class which increases performance
1856 over a regular list.
1857 """
1858 def __init__(self):
1859 self.idx = 0
1860 def find(self, offset):
1861 idx = self.idx
1862 while True:
1863 item = self[idx]
1864 if offset in item:
1865 break
1866 idx += 1
1867 if idx == len(self):
1868 idx = 0
1869 if idx == self.idx:
1870 # End of File
1871 return None
1872 self.idx = idx
1873 return item
1874
1875#---------------------------------------------
1876# zipfile compatible TarFile class
1877#---------------------------------------------
1878TAR_PLAIN = 0 # zipfile.ZIP_STORED
1879TAR_GZIPPED = 8 # zipfile.ZIP_DEFLATED
1880class TarFileCompat:
1881 """TarFile class compatible with standard module zipfile's
1882 ZipFile class.
1883 """
1884 def __init__(self, file, mode="r", compression=TAR_PLAIN):
1885 if compression == TAR_PLAIN:
1886 self.tarfile = TarFile.taropen(file, mode)
1887 elif compression == TAR_GZIPPED:
1888 self.tarfile = TarFile.gzopen(file, mode)
1889 else:
1890 raise ValueError, "unknown compression constant"
1891 if mode[0:1] == "r":
1892 members = self.tarfile.getmembers()
1893 for i in xrange(len(members)):
1894 m = members[i]
1895 m.filename = m.name
1896 m.file_size = m.size
1897 m.date_time = time.gmtime(m.mtime)[:6]
1898 def namelist(self):
1899 return map(lambda m: m.name, self.infolist())
1900 def infolist(self):
1901 return filter(lambda m: m.type in REGULAR_TYPES,
1902 self.tarfile.getmembers())
1903 def printdir(self):
1904 self.tarfile.list()
1905 def testzip(self):
1906 return
1907 def getinfo(self, name):
1908 return self.tarfile.getmember(name)
1909 def read(self, name):
1910 return self.tarfile.extractfile(self.tarfile.getmember(name)).read()
1911 def write(self, filename, arcname=None, compress_type=None):
1912 self.tarfile.add(filename, arcname)
1913 def writestr(self, zinfo, bytes):
1914 import StringIO
1915 import calendar
1916 zinfo.name = zinfo.filename
1917 zinfo.size = zinfo.file_size
1918 zinfo.mtime = calendar.timegm(zinfo.date_time)
1919 self.tarfile.addfile(zinfo, StringIO.StringIO(bytes))
1920 def close(self):
1921 self.tarfile.close()
1922#class TarFileCompat
1923
1924#--------------------
1925# exported functions
1926#--------------------
1927def is_tarfile(name):
1928 """Return True if name points to a tar archive that we
1929 are able to handle, else return False.
1930 """
1931 try:
1932 t = open(name)
1933 t.close()
1934 return True
1935 except TarError:
1936 return False
1937
1938open = TarFile.open