blob: b85f117c976809f7f8f2df138a4f490d2b11ee91 [file] [log] [blame]
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001#!/usr/bin/env python
2# -*- coding: iso-8859-1 -*-
3#-------------------------------------------------------------------
4# tarfile.py
5#-------------------------------------------------------------------
6# Copyright (C) 2002 Lars Gustäbel <lars@gustaebel.de>
7# All rights reserved.
8#
9# Permission is hereby granted, free of charge, to any person
10# obtaining a copy of this software and associated documentation
11# files (the "Software"), to deal in the Software without
12# restriction, including without limitation the rights to use,
13# copy, modify, merge, publish, distribute, sublicense, and/or sell
14# copies of the Software, and to permit persons to whom the
15# Software is furnished to do so, subject to the following
16# conditions:
17#
18# The above copyright notice and this permission notice shall be
19# included in all copies or substantial portions of the Software.
20#
21# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
22# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
23# OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
24# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
25# HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
26# WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
27# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
28# OTHER DEALINGS IN THE SOFTWARE.
29#
30"""Read from and write to tar format archives.
31"""
32
33__version__ = "$Revision$"
34# $Source$
35
36version = "0.6.4"
37__author__ = "Lars Gustäbel (lars@gustaebel.de)"
38__date__ = "$Date$"
39__cvsid__ = "$Id$"
40__credits__ = "Gustavo Niemeyer, Niels Gustäbel, Richard Townsend."
41
42#---------
43# Imports
44#---------
45import sys
46import os
47import shutil
48import stat
49import errno
50import time
51import struct
52
Jack Jansencfc49022003-03-07 13:37:32 +000053if sys.platform == 'mac':
54 # This module needs work for MacOS9, especially in the area of pathname
55 # handling. In many places it is assumed a simple substitution of / by the
56 # local os.path.sep is good enough to convert pathnames, but this does not
57 # work with the mac rooted:path:name versus :nonrooted:path:name syntax
58 raise ImportError, "tarfile does not work for platform==mac"
59
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +000060try:
61 import grp, pwd
62except ImportError:
63 grp = pwd = None
64
65# from tarfile import *
66__all__ = ["TarFile", "TarInfo", "is_tarfile", "TarError"]
67
68#---------------------------------------------------------
69# tar constants
70#---------------------------------------------------------
71NUL = "\0" # the null character
72BLOCKSIZE = 512 # length of processing blocks
73RECORDSIZE = BLOCKSIZE * 20 # length of records
74MAGIC = "ustar" # magic tar string
75VERSION = "00" # version number
76
77LENGTH_NAME = 100 # maximum length of a filename
78LENGTH_LINK = 100 # maximum length of a linkname
79LENGTH_PREFIX = 155 # maximum length of the prefix field
80MAXSIZE_MEMBER = 077777777777L # maximum size of a file (11 octal digits)
81
82REGTYPE = "0" # regular file
83AREGTYPE = "\0" # regular file
84LNKTYPE = "1" # link (inside tarfile)
85SYMTYPE = "2" # symbolic link
86CHRTYPE = "3" # character special device
87BLKTYPE = "4" # block special device
88DIRTYPE = "5" # directory
89FIFOTYPE = "6" # fifo special device
90CONTTYPE = "7" # contiguous file
91
92GNUTYPE_LONGNAME = "L" # GNU tar extension for longnames
93GNUTYPE_LONGLINK = "K" # GNU tar extension for longlink
94GNUTYPE_SPARSE = "S" # GNU tar extension for sparse file
95
96#---------------------------------------------------------
97# tarfile constants
98#---------------------------------------------------------
99SUPPORTED_TYPES = (REGTYPE, AREGTYPE, LNKTYPE, # file types that tarfile
100 SYMTYPE, DIRTYPE, FIFOTYPE, # can cope with.
101 CONTTYPE, CHRTYPE, BLKTYPE,
102 GNUTYPE_LONGNAME, GNUTYPE_LONGLINK,
103 GNUTYPE_SPARSE)
104
105REGULAR_TYPES = (REGTYPE, AREGTYPE, # file types that somehow
106 CONTTYPE, GNUTYPE_SPARSE) # represent regular files
107
108#---------------------------------------------------------
109# Bits used in the mode field, values in octal.
110#---------------------------------------------------------
111S_IFLNK = 0120000 # symbolic link
112S_IFREG = 0100000 # regular file
113S_IFBLK = 0060000 # block device
114S_IFDIR = 0040000 # directory
115S_IFCHR = 0020000 # character device
116S_IFIFO = 0010000 # fifo
117
118TSUID = 04000 # set UID on execution
119TSGID = 02000 # set GID on execution
120TSVTX = 01000 # reserved
121
122TUREAD = 0400 # read by owner
123TUWRITE = 0200 # write by owner
124TUEXEC = 0100 # execute/search by owner
125TGREAD = 0040 # read by group
126TGWRITE = 0020 # write by group
127TGEXEC = 0010 # execute/search by group
128TOREAD = 0004 # read by other
129TOWRITE = 0002 # write by other
130TOEXEC = 0001 # execute/search by other
131
132#---------------------------------------------------------
133# Some useful functions
134#---------------------------------------------------------
135def nts(s):
136 """Convert a null-terminated string buffer to a python string.
137 """
Andrew M. Kuchling864bba12004-07-10 22:02:11 +0000138 return s.rstrip(NUL)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000139
140def calc_chksum(buf):
141 """Calculate the checksum for a member's header. It's a simple addition
142 of all bytes, treating the chksum field as if filled with spaces.
143 buf is a 512 byte long string buffer which holds the header.
144 """
145 chk = 256 # chksum field is treated as blanks,
146 # so the initial value is 8 * ord(" ")
147 for c in buf[:148]: chk += ord(c) # sum up all bytes before chksum
148 for c in buf[156:]: chk += ord(c) # sum up all bytes after chksum
149 return chk
150
151def copyfileobj(src, dst, length=None):
152 """Copy length bytes from fileobj src to fileobj dst.
153 If length is None, copy the entire content.
154 """
155 if length == 0:
156 return
157 if length is None:
158 shutil.copyfileobj(src, dst)
159 return
160
161 BUFSIZE = 16 * 1024
162 blocks, remainder = divmod(length, BUFSIZE)
163 for b in xrange(blocks):
164 buf = src.read(BUFSIZE)
165 if len(buf) < BUFSIZE:
166 raise IOError, "end of file reached"
167 dst.write(buf)
168
169 if remainder != 0:
170 buf = src.read(remainder)
171 if len(buf) < remainder:
172 raise IOError, "end of file reached"
173 dst.write(buf)
174 return
175
176filemode_table = (
Andrew M. Kuchling8bc462f2004-10-20 11:48:42 +0000177 ((S_IFLNK, "l"),
178 (S_IFREG, "-"),
179 (S_IFBLK, "b"),
180 (S_IFDIR, "d"),
181 (S_IFCHR, "c"),
182 (S_IFIFO, "p")),
183
184 ((TUREAD, "r"),),
185 ((TUWRITE, "w"),),
186 ((TUEXEC|TSUID, "s"),
187 (TSUID, "S"),
188 (TUEXEC, "x")),
189
190 ((TGREAD, "r"),),
191 ((TGWRITE, "w"),),
192 ((TGEXEC|TSGID, "s"),
193 (TSGID, "S"),
194 (TGEXEC, "x")),
195
196 ((TOREAD, "r"),),
197 ((TOWRITE, "w"),),
198 ((TOEXEC|TSVTX, "t"),
199 (TSVTX, "T"),
200 (TOEXEC, "x"))
201)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000202
203def filemode(mode):
204 """Convert a file's mode to a string of the form
205 -rwxrwxrwx.
206 Used by TarFile.list()
207 """
Andrew M. Kuchling8bc462f2004-10-20 11:48:42 +0000208 perm = []
209 for table in filemode_table:
210 for bit, char in table:
211 if mode & bit == bit:
212 perm.append(char)
213 break
214 else:
215 perm.append("-")
216 return "".join(perm)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000217
218if os.sep != "/":
219 normpath = lambda path: os.path.normpath(path).replace(os.sep, "/")
220else:
221 normpath = os.path.normpath
222
223class TarError(Exception):
224 """Base exception."""
225 pass
226class ExtractError(TarError):
227 """General exception for extract errors."""
228 pass
229class ReadError(TarError):
230 """Exception for unreadble tar archives."""
231 pass
232class CompressionError(TarError):
233 """Exception for unavailable compression methods."""
234 pass
235class StreamError(TarError):
236 """Exception for unsupported operations on stream-like TarFiles."""
237 pass
238
239#---------------------------
240# internal stream interface
241#---------------------------
242class _LowLevelFile:
243 """Low-level file object. Supports reading and writing.
244 It is used instead of a regular file object for streaming
245 access.
246 """
247
248 def __init__(self, name, mode):
249 mode = {
250 "r": os.O_RDONLY,
251 "w": os.O_WRONLY | os.O_CREAT | os.O_TRUNC,
252 }[mode]
253 if hasattr(os, "O_BINARY"):
254 mode |= os.O_BINARY
255 self.fd = os.open(name, mode)
256
257 def close(self):
258 os.close(self.fd)
259
260 def read(self, size):
261 return os.read(self.fd, size)
262
263 def write(self, s):
264 os.write(self.fd, s)
265
266class _Stream:
267 """Class that serves as an adapter between TarFile and
268 a stream-like object. The stream-like object only
269 needs to have a read() or write() method and is accessed
270 blockwise. Use of gzip or bzip2 compression is possible.
271 A stream-like object could be for example: sys.stdin,
272 sys.stdout, a socket, a tape device etc.
273
274 _Stream is intended to be used only internally.
275 """
276
277 def __init__(self, name, mode, type, fileobj, bufsize):
278 """Construct a _Stream object.
279 """
280 self._extfileobj = True
281 if fileobj is None:
282 fileobj = _LowLevelFile(name, mode)
283 self._extfileobj = False
284
285 self.name = name or ""
286 self.mode = mode
287 self.type = type
288 self.fileobj = fileobj
289 self.bufsize = bufsize
290 self.buf = ""
291 self.pos = 0L
292 self.closed = False
293
294 if type == "gz":
295 try:
296 import zlib
297 except ImportError:
298 raise CompressionError, "zlib module is not available"
299 self.zlib = zlib
300 self.crc = zlib.crc32("")
301 if mode == "r":
302 self._init_read_gz()
303 else:
304 self._init_write_gz()
305
306 if type == "bz2":
307 try:
308 import bz2
309 except ImportError:
310 raise CompressionError, "bz2 module is not available"
311 if mode == "r":
312 self.dbuf = ""
313 self.cmp = bz2.BZ2Decompressor()
314 else:
315 self.cmp = bz2.BZ2Compressor()
316
317 def __del__(self):
318 if not self.closed:
319 self.close()
320
321 def _init_write_gz(self):
322 """Initialize for writing with gzip compression.
323 """
324 self.cmp = self.zlib.compressobj(9, self.zlib.DEFLATED,
325 -self.zlib.MAX_WBITS,
326 self.zlib.DEF_MEM_LEVEL,
327 0)
328 timestamp = struct.pack("<L", long(time.time()))
329 self.__write("\037\213\010\010%s\002\377" % timestamp)
330 if self.name.endswith(".gz"):
331 self.name = self.name[:-3]
332 self.__write(self.name + NUL)
333
334 def write(self, s):
335 """Write string s to the stream.
336 """
337 if self.type == "gz":
338 self.crc = self.zlib.crc32(s, self.crc)
339 self.pos += len(s)
340 if self.type != "tar":
341 s = self.cmp.compress(s)
342 self.__write(s)
343
344 def __write(self, s):
345 """Write string s to the stream if a whole new block
346 is ready to be written.
347 """
348 self.buf += s
349 while len(self.buf) > self.bufsize:
350 self.fileobj.write(self.buf[:self.bufsize])
351 self.buf = self.buf[self.bufsize:]
352
353 def close(self):
354 """Close the _Stream object. No operation should be
355 done on it afterwards.
356 """
357 if self.closed:
358 return
359
Martin v. Löwisc234a522004-08-22 21:28:33 +0000360 if self.mode == "w" and self.type != "tar":
361 self.buf += self.cmp.flush()
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000362 if self.mode == "w" and self.buf:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000363 self.fileobj.write(self.buf)
364 self.buf = ""
365 if self.type == "gz":
366 self.fileobj.write(struct.pack("<l", self.crc))
Andrew M. Kuchling10a44492003-10-24 17:38:34 +0000367 self.fileobj.write(struct.pack("<L", self.pos & 0xffffFFFFL))
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000368
369 if not self._extfileobj:
370 self.fileobj.close()
371
372 self.closed = True
373
374 def _init_read_gz(self):
375 """Initialize for reading a gzip compressed fileobj.
376 """
377 self.cmp = self.zlib.decompressobj(-self.zlib.MAX_WBITS)
378 self.dbuf = ""
379
380 # taken from gzip.GzipFile with some alterations
381 if self.__read(2) != "\037\213":
382 raise ReadError, "not a gzip file"
383 if self.__read(1) != "\010":
384 raise CompressionError, "unsupported compression method"
385
386 flag = ord(self.__read(1))
387 self.__read(6)
388
389 if flag & 4:
390 xlen = ord(self.__read(1)) + 256 * ord(self.__read(1))
391 self.read(xlen)
392 if flag & 8:
393 while True:
394 s = self.__read(1)
395 if not s or s == NUL:
396 break
397 if flag & 16:
398 while True:
399 s = self.__read(1)
400 if not s or s == NUL:
401 break
402 if flag & 2:
403 self.__read(2)
404
405 def tell(self):
406 """Return the stream's file pointer position.
407 """
408 return self.pos
409
410 def seek(self, pos=0):
411 """Set the stream's file pointer to pos. Negative seeking
412 is forbidden.
413 """
414 if pos - self.pos >= 0:
415 blocks, remainder = divmod(pos - self.pos, self.bufsize)
416 for i in xrange(blocks):
417 self.read(self.bufsize)
418 self.read(remainder)
419 else:
420 raise StreamError, "seeking backwards is not allowed"
421 return self.pos
422
423 def read(self, size=None):
424 """Return the next size number of bytes from the stream.
425 If size is not defined, return all bytes of the stream
426 up to EOF.
427 """
428 if size is None:
429 t = []
430 while True:
431 buf = self._read(self.bufsize)
432 if not buf:
433 break
434 t.append(buf)
435 buf = "".join(t)
436 else:
437 buf = self._read(size)
438 self.pos += len(buf)
439 return buf
440
441 def _read(self, size):
442 """Return size bytes from the stream.
443 """
444 if self.type == "tar":
445 return self.__read(size)
446
447 c = len(self.dbuf)
448 t = [self.dbuf]
449 while c < size:
450 buf = self.__read(self.bufsize)
451 if not buf:
452 break
453 buf = self.cmp.decompress(buf)
454 t.append(buf)
455 c += len(buf)
456 t = "".join(t)
457 self.dbuf = t[size:]
458 return t[:size]
459
460 def __read(self, size):
461 """Return size bytes from stream. If internal buffer is empty,
462 read another block from the stream.
463 """
464 c = len(self.buf)
465 t = [self.buf]
466 while c < size:
467 buf = self.fileobj.read(self.bufsize)
468 if not buf:
469 break
470 t.append(buf)
471 c += len(buf)
472 t = "".join(t)
473 self.buf = t[size:]
474 return t[:size]
475# class _Stream
476
477#------------------------
478# Extraction file object
479#------------------------
480class ExFileObject(object):
481 """File-like object for reading an archive member.
482 Is returned by TarFile.extractfile(). Support for
483 sparse files included.
484 """
485
486 def __init__(self, tarfile, tarinfo):
487 self.fileobj = tarfile.fileobj
488 self.name = tarinfo.name
489 self.mode = "r"
490 self.closed = False
491 self.offset = tarinfo.offset_data
492 self.size = tarinfo.size
493 self.pos = 0L
494 self.linebuffer = ""
495 if tarinfo.issparse():
496 self.sparse = tarinfo.sparse
497 self.read = self._readsparse
498 else:
499 self.read = self._readnormal
500
501 def __read(self, size):
502 """Overloadable read method.
503 """
504 return self.fileobj.read(size)
505
506 def readline(self, size=-1):
507 """Read a line with approx. size. If size is negative,
508 read a whole line. readline() and read() must not
509 be mixed up (!).
510 """
511 if size < 0:
512 size = sys.maxint
513
514 nl = self.linebuffer.find("\n")
515 if nl >= 0:
516 nl = min(nl, size)
517 else:
518 size -= len(self.linebuffer)
Martin v. Löwisc11d6f12004-08-25 10:52:58 +0000519 while (nl < 0 and size > 0):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000520 buf = self.read(min(size, 100))
521 if not buf:
522 break
523 self.linebuffer += buf
524 size -= len(buf)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000525 nl = self.linebuffer.find("\n")
526 if nl == -1:
527 s = self.linebuffer
528 self.linebuffer = ""
529 return s
530 buf = self.linebuffer[:nl]
531 self.linebuffer = self.linebuffer[nl + 1:]
532 while buf[-1:] == "\r":
533 buf = buf[:-1]
534 return buf + "\n"
535
536 def readlines(self):
537 """Return a list with all (following) lines.
538 """
539 result = []
540 while True:
541 line = self.readline()
542 if not line: break
543 result.append(line)
544 return result
545
546 def _readnormal(self, size=None):
547 """Read operation for regular files.
548 """
549 if self.closed:
550 raise ValueError, "file is closed"
551 self.fileobj.seek(self.offset + self.pos)
552 bytesleft = self.size - self.pos
553 if size is None:
554 bytestoread = bytesleft
555 else:
556 bytestoread = min(size, bytesleft)
557 self.pos += bytestoread
558 return self.__read(bytestoread)
559
560 def _readsparse(self, size=None):
561 """Read operation for sparse files.
562 """
563 if self.closed:
564 raise ValueError, "file is closed"
565
566 if size is None:
567 size = self.size - self.pos
568
569 data = []
570 while size > 0:
571 buf = self._readsparsesection(size)
572 if not buf:
573 break
574 size -= len(buf)
575 data.append(buf)
576 return "".join(data)
577
578 def _readsparsesection(self, size):
579 """Read a single section of a sparse file.
580 """
581 section = self.sparse.find(self.pos)
582
583 if section is None:
584 return ""
585
586 toread = min(size, section.offset + section.size - self.pos)
587 if isinstance(section, _data):
588 realpos = section.realpos + self.pos - section.offset
589 self.pos += toread
590 self.fileobj.seek(self.offset + realpos)
591 return self.__read(toread)
592 else:
593 self.pos += toread
594 return NUL * toread
595
596 def tell(self):
597 """Return the current file position.
598 """
599 return self.pos
600
601 def seek(self, pos, whence=0):
602 """Seek to a position in the file.
603 """
604 self.linebuffer = ""
605 if whence == 0:
606 self.pos = min(max(pos, 0), self.size)
607 if whence == 1:
608 if pos < 0:
609 self.pos = max(self.pos + pos, 0)
610 else:
611 self.pos = min(self.pos + pos, self.size)
612 if whence == 2:
613 self.pos = max(min(self.size + pos, self.size), 0)
614
615 def close(self):
616 """Close the file object.
617 """
618 self.closed = True
619#class ExFileObject
620
621#------------------
622# Exported Classes
623#------------------
624class TarInfo(object):
625 """Informational class which holds the details about an
626 archive member given by a tar header block.
627 TarInfo objects are returned by TarFile.getmember(),
628 TarFile.getmembers() and TarFile.gettarinfo() and are
629 usually created internally.
630 """
631
632 def __init__(self, name=""):
633 """Construct a TarInfo object. name is the optional name
634 of the member.
635 """
636
637 self.name = name # member name (dirnames must end with '/')
638 self.mode = 0666 # file permissions
639 self.uid = 0 # user id
640 self.gid = 0 # group id
641 self.size = 0 # file size
642 self.mtime = 0 # modification time
643 self.chksum = 0 # header checksum
644 self.type = REGTYPE # member type
645 self.linkname = "" # link name
646 self.uname = "user" # user name
647 self.gname = "group" # group name
648 self.devmajor = 0 #-
649 self.devminor = 0 #-for use with CHRTYPE and BLKTYPE
650 self.prefix = "" # prefix to filename or holding information
651 # about sparse files
652
653 self.offset = 0 # the tar header starts here
654 self.offset_data = 0 # the file's data starts here
655
656 def __repr__(self):
657 return "<%s %r at %#x>" % (self.__class__.__name__,self.name,id(self))
658
659 def frombuf(cls, buf):
660 """Construct a TarInfo object from a 512 byte string buffer.
661 """
662 tarinfo = cls()
Neal Norwitzd96d1012004-07-20 22:23:02 +0000663 tarinfo.name = nts(buf[0:100])
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000664 tarinfo.mode = int(buf[100:108], 8)
665 tarinfo.uid = int(buf[108:116],8)
666 tarinfo.gid = int(buf[116:124],8)
Neal Norwitzd96d1012004-07-20 22:23:02 +0000667
668 # There are two possible codings for the size field we
669 # have to discriminate, see comment in tobuf() below.
670 if buf[124] != chr(0200):
671 tarinfo.size = long(buf[124:136], 8)
672 else:
673 tarinfo.size = 0L
674 for i in range(11):
675 tarinfo.size <<= 8
676 tarinfo.size += ord(buf[125 + i])
677
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000678 tarinfo.mtime = long(buf[136:148], 8)
679 tarinfo.chksum = int(buf[148:156], 8)
680 tarinfo.type = buf[156:157]
681 tarinfo.linkname = nts(buf[157:257])
682 tarinfo.uname = nts(buf[265:297])
683 tarinfo.gname = nts(buf[297:329])
684 try:
685 tarinfo.devmajor = int(buf[329:337], 8)
686 tarinfo.devminor = int(buf[337:345], 8)
687 except ValueError:
688 tarinfo.devmajor = tarinfo.devmajor = 0
Jack Jansen2b4b5a52003-04-22 22:03:11 +0000689 tarinfo.prefix = buf[345:500]
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000690
691 # The prefix field is used for filenames > 100 in
692 # the POSIX standard.
Jack Jansen2b4b5a52003-04-22 22:03:11 +0000693 # name = prefix + '/' + name
694 if tarinfo.type != GNUTYPE_SPARSE:
695 tarinfo.name = normpath(os.path.join(nts(tarinfo.prefix), tarinfo.name))
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000696
697 # Directory names should have a '/' at the end.
698 if tarinfo.isdir() and tarinfo.name[-1:] != "/":
699 tarinfo.name += "/"
700 return tarinfo
701
702 frombuf = classmethod(frombuf)
703
704 def tobuf(self):
705 """Return a tar header block as a 512 byte string.
706 """
Neal Norwitzd96d1012004-07-20 22:23:02 +0000707 # Prefer the size to be encoded as 11 octal ascii digits
708 # which is the most portable. If the size exceeds this
709 # limit (>= 8 GB), encode it as an 88-bit value which is
710 # a GNU tar feature.
711 if self.size <= MAXSIZE_MEMBER:
712 size = "%011o" % self.size
713 else:
714 s = self.size
715 size = ""
716 for i in range(11):
717 size = chr(s & 0377) + size
718 s >>= 8
719 size = chr(0200) + size
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000720
721 # The following code was contributed by Detlef Lannert.
722 parts = []
723 for value, fieldsize in (
Neal Norwitzd96d1012004-07-20 22:23:02 +0000724 (self.name, 100),
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000725 ("%07o" % (self.mode & 07777), 8),
726 ("%07o" % self.uid, 8),
727 ("%07o" % self.gid, 8),
Neal Norwitzd96d1012004-07-20 22:23:02 +0000728 (size, 12),
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000729 ("%011o" % self.mtime, 12),
730 (" ", 8),
731 (self.type, 1),
732 (self.linkname, 100),
733 (MAGIC, 6),
734 (VERSION, 2),
735 (self.uname, 32),
736 (self.gname, 32),
737 ("%07o" % self.devmajor, 8),
738 ("%07o" % self.devminor, 8),
739 (self.prefix, 155)
740 ):
741 l = len(value)
Andrew M. Kuchling864bba12004-07-10 22:02:11 +0000742 parts.append(value[:fieldsize] + (fieldsize - l) * NUL)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000743
744 buf = "".join(parts)
745 chksum = calc_chksum(buf)
746 buf = buf[:148] + "%06o\0" % chksum + buf[155:]
747 buf += (BLOCKSIZE - len(buf)) * NUL
748 self.buf = buf
749 return buf
750
751 def isreg(self):
752 return self.type in REGULAR_TYPES
753 def isfile(self):
754 return self.isreg()
755 def isdir(self):
756 return self.type == DIRTYPE
757 def issym(self):
758 return self.type == SYMTYPE
759 def islnk(self):
760 return self.type == LNKTYPE
761 def ischr(self):
762 return self.type == CHRTYPE
763 def isblk(self):
764 return self.type == BLKTYPE
765 def isfifo(self):
766 return self.type == FIFOTYPE
767 def issparse(self):
768 return self.type == GNUTYPE_SPARSE
769 def isdev(self):
770 return self.type in (CHRTYPE, BLKTYPE, FIFOTYPE)
771# class TarInfo
772
773class TarFile(object):
774 """The TarFile Class provides an interface to tar archives.
775 """
776
777 debug = 0 # May be set from 0 (no msgs) to 3 (all msgs)
778
779 dereference = False # If true, add content of linked file to the
780 # tar file, else the link.
781
782 ignore_zeros = False # If true, skips empty or invalid blocks and
783 # continues processing.
784
785 errorlevel = 0 # If 0, fatal errors only appear in debug
786 # messages (if debug >= 0). If > 0, errors
787 # are passed to the caller as exceptions.
788
Martin v. Löwis75b9da42004-08-18 13:57:44 +0000789 posix = False # If True, generates POSIX.1-1990-compliant
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000790 # archives (no GNU extensions!)
791
792 fileobject = ExFileObject
793
794 def __init__(self, name=None, mode="r", fileobj=None):
795 """Open an (uncompressed) tar archive `name'. `mode' is either 'r' to
796 read from an existing archive, 'a' to append data to an existing
797 file or 'w' to create a new file overwriting an existing one. `mode'
798 defaults to 'r'.
799 If `fileobj' is given, it is used for reading or writing data. If it
800 can be determined, `mode' is overridden by `fileobj's mode.
801 `fileobj' is not closed, when TarFile is closed.
802 """
803 self.name = name
804
805 if len(mode) > 1 or mode not in "raw":
806 raise ValueError, "mode must be 'r', 'a' or 'w'"
807 self._mode = mode
808 self.mode = {"r": "rb", "a": "r+b", "w": "wb"}[mode]
809
810 if not fileobj:
811 fileobj = file(self.name, self.mode)
812 self._extfileobj = False
813 else:
814 if self.name is None and hasattr(fileobj, "name"):
815 self.name = fileobj.name
816 if hasattr(fileobj, "mode"):
817 self.mode = fileobj.mode
818 self._extfileobj = True
819 self.fileobj = fileobj
820
821 # Init datastructures
822 self.closed = False
823 self.members = [] # list of members as TarInfo objects
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000824 self._loaded = False # flag if all members have been read
825 self.offset = 0L # current position in the archive file
826 self.inodes = {} # dictionary caching the inodes of
827 # archive members already added
828
829 if self._mode == "r":
830 self.firstmember = None
831 self.firstmember = self.next()
832
833 if self._mode == "a":
834 # Move to the end of the archive,
835 # before the first empty block.
836 self.firstmember = None
837 while True:
838 try:
839 tarinfo = self.next()
840 except ReadError:
841 self.fileobj.seek(0)
842 break
843 if tarinfo is None:
844 self.fileobj.seek(- BLOCKSIZE, 1)
845 break
846
847 if self._mode in "aw":
848 self._loaded = True
849
850 #--------------------------------------------------------------------------
851 # Below are the classmethods which act as alternate constructors to the
852 # TarFile class. The open() method is the only one that is needed for
853 # public use; it is the "super"-constructor and is able to select an
854 # adequate "sub"-constructor for a particular compression using the mapping
855 # from OPEN_METH.
856 #
857 # This concept allows one to subclass TarFile without losing the comfort of
858 # the super-constructor. A sub-constructor is registered and made available
859 # by adding it to the mapping in OPEN_METH.
860
861 def open(cls, name=None, mode="r", fileobj=None, bufsize=20*512):
862 """Open a tar archive for reading, writing or appending. Return
863 an appropriate TarFile class.
864
865 mode:
866 'r' open for reading with transparent compression
867 'r:' open for reading exclusively uncompressed
868 'r:gz' open for reading with gzip compression
869 'r:bz2' open for reading with bzip2 compression
870 'a' or 'a:' open for appending
871 'w' or 'w:' open for writing without compression
872 'w:gz' open for writing with gzip compression
873 'w:bz2' open for writing with bzip2 compression
874 'r|' open an uncompressed stream of tar blocks for reading
875 'r|gz' open a gzip compressed stream of tar blocks
876 'r|bz2' open a bzip2 compressed stream of tar blocks
877 'w|' open an uncompressed stream for writing
878 'w|gz' open a gzip compressed stream for writing
879 'w|bz2' open a bzip2 compressed stream for writing
880 """
881
882 if not name and not fileobj:
883 raise ValueError, "nothing to open"
884
885 if ":" in mode:
886 filemode, comptype = mode.split(":", 1)
887 filemode = filemode or "r"
888 comptype = comptype or "tar"
889
890 # Select the *open() function according to
891 # given compression.
892 if comptype in cls.OPEN_METH:
893 func = getattr(cls, cls.OPEN_METH[comptype])
894 else:
895 raise CompressionError, "unknown compression type %r" % comptype
896 return func(name, filemode, fileobj)
897
898 elif "|" in mode:
899 filemode, comptype = mode.split("|", 1)
900 filemode = filemode or "r"
901 comptype = comptype or "tar"
902
903 if filemode not in "rw":
904 raise ValueError, "mode must be 'r' or 'w'"
905
906 t = cls(name, filemode,
907 _Stream(name, filemode, comptype, fileobj, bufsize))
908 t._extfileobj = False
909 return t
910
911 elif mode == "r":
912 # Find out which *open() is appropriate for opening the file.
913 for comptype in cls.OPEN_METH:
914 func = getattr(cls, cls.OPEN_METH[comptype])
915 try:
916 return func(name, "r", fileobj)
917 except (ReadError, CompressionError):
918 continue
919 raise ReadError, "file could not be opened successfully"
920
921 elif mode in "aw":
922 return cls.taropen(name, mode, fileobj)
923
924 raise ValueError, "undiscernible mode"
925
926 open = classmethod(open)
927
928 def taropen(cls, name, mode="r", fileobj=None):
929 """Open uncompressed tar archive name for reading or writing.
930 """
931 if len(mode) > 1 or mode not in "raw":
932 raise ValueError, "mode must be 'r', 'a' or 'w'"
933 return cls(name, mode, fileobj)
934
935 taropen = classmethod(taropen)
936
937 def gzopen(cls, name, mode="r", fileobj=None, compresslevel=9):
938 """Open gzip compressed tar archive name for reading or writing.
939 Appending is not allowed.
940 """
941 if len(mode) > 1 or mode not in "rw":
942 raise ValueError, "mode must be 'r' or 'w'"
943
944 try:
945 import gzip
Neal Norwitz4ec68242003-04-11 03:05:56 +0000946 gzip.GzipFile
947 except (ImportError, AttributeError):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000948 raise CompressionError, "gzip module is not available"
949
950 pre, ext = os.path.splitext(name)
951 pre = os.path.basename(pre)
952 if ext == ".tgz":
953 ext = ".tar"
954 if ext == ".gz":
955 ext = ""
956 tarname = pre + ext
957
958 if fileobj is None:
959 fileobj = file(name, mode + "b")
960
961 if mode != "r":
962 name = tarname
963
964 try:
965 t = cls.taropen(tarname, mode,
966 gzip.GzipFile(name, mode, compresslevel, fileobj)
967 )
968 except IOError:
969 raise ReadError, "not a gzip file"
970 t._extfileobj = False
971 return t
972
973 gzopen = classmethod(gzopen)
974
975 def bz2open(cls, name, mode="r", fileobj=None, compresslevel=9):
976 """Open bzip2 compressed tar archive name for reading or writing.
977 Appending is not allowed.
978 """
979 if len(mode) > 1 or mode not in "rw":
980 raise ValueError, "mode must be 'r' or 'w'."
981
982 try:
983 import bz2
984 except ImportError:
985 raise CompressionError, "bz2 module is not available"
986
987 pre, ext = os.path.splitext(name)
988 pre = os.path.basename(pre)
989 if ext == ".tbz2":
990 ext = ".tar"
991 if ext == ".bz2":
992 ext = ""
993 tarname = pre + ext
994
995 if fileobj is not None:
996 raise ValueError, "no support for external file objects"
997
998 try:
999 t = cls.taropen(tarname, mode, bz2.BZ2File(name, mode, compresslevel=compresslevel))
1000 except IOError:
1001 raise ReadError, "not a bzip2 file"
1002 t._extfileobj = False
1003 return t
1004
1005 bz2open = classmethod(bz2open)
1006
1007 # All *open() methods are registered here.
1008 OPEN_METH = {
1009 "tar": "taropen", # uncompressed tar
1010 "gz": "gzopen", # gzip compressed tar
1011 "bz2": "bz2open" # bzip2 compressed tar
1012 }
1013
1014 #--------------------------------------------------------------------------
1015 # The public methods which TarFile provides:
1016
1017 def close(self):
1018 """Close the TarFile. In write-mode, two finishing zero blocks are
1019 appended to the archive.
1020 """
1021 if self.closed:
1022 return
1023
1024 if self._mode in "aw":
1025 self.fileobj.write(NUL * (BLOCKSIZE * 2))
1026 self.offset += (BLOCKSIZE * 2)
1027 # fill up the end with zero-blocks
1028 # (like option -b20 for tar does)
1029 blocks, remainder = divmod(self.offset, RECORDSIZE)
1030 if remainder > 0:
1031 self.fileobj.write(NUL * (RECORDSIZE - remainder))
1032
1033 if not self._extfileobj:
1034 self.fileobj.close()
1035 self.closed = True
1036
1037 def getmember(self, name):
1038 """Return a TarInfo object for member `name'. If `name' can not be
1039 found in the archive, KeyError is raised. If a member occurs more
1040 than once in the archive, its last occurence is assumed to be the
1041 most up-to-date version.
1042 """
Martin v. Löwisf3c56112004-09-18 09:08:52 +00001043 tarinfo = self._getmember(name)
1044 if tarinfo is None:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001045 raise KeyError, "filename %r not found" % name
Martin v. Löwisf3c56112004-09-18 09:08:52 +00001046 return tarinfo
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001047
1048 def getmembers(self):
1049 """Return the members of the archive as a list of TarInfo objects. The
1050 list has the same order as the members in the archive.
1051 """
1052 self._check()
1053 if not self._loaded: # if we want to obtain a list of
1054 self._load() # all members, we first have to
1055 # scan the whole archive.
1056 return self.members
1057
1058 def getnames(self):
1059 """Return the members of the archive as a list of their names. It has
1060 the same order as the list returned by getmembers().
1061 """
Martin v. Löwisf3c56112004-09-18 09:08:52 +00001062 return [tarinfo.name for tarinfo in self.getmembers()]
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001063
1064 def gettarinfo(self, name=None, arcname=None, fileobj=None):
1065 """Create a TarInfo object for either the file `name' or the file
1066 object `fileobj' (using os.fstat on its file descriptor). You can
1067 modify some of the TarInfo's attributes before you add it using
1068 addfile(). If given, `arcname' specifies an alternative name for the
1069 file in the archive.
1070 """
1071 self._check("aw")
1072
1073 # When fileobj is given, replace name by
1074 # fileobj's real name.
1075 if fileobj is not None:
1076 name = fileobj.name
1077
1078 # Building the name of the member in the archive.
1079 # Backward slashes are converted to forward slashes,
1080 # Absolute paths are turned to relative paths.
1081 if arcname is None:
1082 arcname = name
1083 arcname = normpath(arcname)
1084 drv, arcname = os.path.splitdrive(arcname)
1085 while arcname[0:1] == "/":
1086 arcname = arcname[1:]
1087
1088 # Now, fill the TarInfo object with
1089 # information specific for the file.
1090 tarinfo = TarInfo()
1091
1092 # Use os.stat or os.lstat, depending on platform
1093 # and if symlinks shall be resolved.
1094 if fileobj is None:
1095 if hasattr(os, "lstat") and not self.dereference:
1096 statres = os.lstat(name)
1097 else:
1098 statres = os.stat(name)
1099 else:
1100 statres = os.fstat(fileobj.fileno())
1101 linkname = ""
1102
1103 stmd = statres.st_mode
1104 if stat.S_ISREG(stmd):
1105 inode = (statres.st_ino, statres.st_dev)
1106 if inode in self.inodes and not self.dereference:
1107 # Is it a hardlink to an already
1108 # archived file?
1109 type = LNKTYPE
1110 linkname = self.inodes[inode]
1111 else:
1112 # The inode is added only if its valid.
1113 # For win32 it is always 0.
1114 type = REGTYPE
1115 if inode[0]:
1116 self.inodes[inode] = arcname
1117 elif stat.S_ISDIR(stmd):
1118 type = DIRTYPE
1119 if arcname[-1:] != "/":
1120 arcname += "/"
1121 elif stat.S_ISFIFO(stmd):
1122 type = FIFOTYPE
1123 elif stat.S_ISLNK(stmd):
1124 type = SYMTYPE
1125 linkname = os.readlink(name)
1126 elif stat.S_ISCHR(stmd):
1127 type = CHRTYPE
1128 elif stat.S_ISBLK(stmd):
1129 type = BLKTYPE
1130 else:
1131 return None
1132
1133 # Fill the TarInfo object with all
1134 # information we can get.
1135 tarinfo.name = arcname
1136 tarinfo.mode = stmd
1137 tarinfo.uid = statres.st_uid
1138 tarinfo.gid = statres.st_gid
Martin v. Löwis61d77e02004-08-20 06:35:46 +00001139 if stat.S_ISDIR(stmd):
1140 # For a directory, the size must be 0
1141 tarinfo.size = 0
1142 else:
1143 tarinfo.size = statres.st_size
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001144 tarinfo.mtime = statres.st_mtime
1145 tarinfo.type = type
1146 tarinfo.linkname = linkname
1147 if pwd:
1148 try:
1149 tarinfo.uname = pwd.getpwuid(tarinfo.uid)[0]
1150 except KeyError:
1151 pass
1152 if grp:
1153 try:
1154 tarinfo.gname = grp.getgrgid(tarinfo.gid)[0]
1155 except KeyError:
1156 pass
1157
1158 if type in (CHRTYPE, BLKTYPE):
1159 if hasattr(os, "major") and hasattr(os, "minor"):
1160 tarinfo.devmajor = os.major(statres.st_rdev)
1161 tarinfo.devminor = os.minor(statres.st_rdev)
1162 return tarinfo
1163
1164 def list(self, verbose=True):
1165 """Print a table of contents to sys.stdout. If `verbose' is False, only
1166 the names of the members are printed. If it is True, an `ls -l'-like
1167 output is produced.
1168 """
1169 self._check()
1170
1171 for tarinfo in self:
1172 if verbose:
1173 print filemode(tarinfo.mode),
1174 print "%s/%s" % (tarinfo.uname or tarinfo.uid,
1175 tarinfo.gname or tarinfo.gid),
1176 if tarinfo.ischr() or tarinfo.isblk():
1177 print "%10s" % ("%d,%d" \
1178 % (tarinfo.devmajor, tarinfo.devminor)),
1179 else:
1180 print "%10d" % tarinfo.size,
1181 print "%d-%02d-%02d %02d:%02d:%02d" \
1182 % time.localtime(tarinfo.mtime)[:6],
1183
1184 print tarinfo.name,
1185
1186 if verbose:
1187 if tarinfo.issym():
1188 print "->", tarinfo.linkname,
1189 if tarinfo.islnk():
1190 print "link to", tarinfo.linkname,
1191 print
1192
1193 def add(self, name, arcname=None, recursive=True):
1194 """Add the file `name' to the archive. `name' may be any type of file
1195 (directory, fifo, symbolic link, etc.). If given, `arcname'
1196 specifies an alternative name for the file in the archive.
1197 Directories are added recursively by default. This can be avoided by
1198 setting `recursive' to False.
1199 """
1200 self._check("aw")
1201
1202 if arcname is None:
1203 arcname = name
1204
1205 # Skip if somebody tries to archive the archive...
1206 if self.name is not None \
1207 and os.path.abspath(name) == os.path.abspath(self.name):
1208 self._dbg(2, "tarfile: Skipped %r" % name)
1209 return
1210
1211 # Special case: The user wants to add the current
1212 # working directory.
1213 if name == ".":
1214 if recursive:
1215 if arcname == ".":
1216 arcname = ""
1217 for f in os.listdir("."):
1218 self.add(f, os.path.join(arcname, f))
1219 return
1220
1221 self._dbg(1, name)
1222
1223 # Create a TarInfo object from the file.
1224 tarinfo = self.gettarinfo(name, arcname)
1225
1226 if tarinfo is None:
1227 self._dbg(1, "tarfile: Unsupported type %r" % name)
1228 return
1229
1230 # Append the tar header and data to the archive.
1231 if tarinfo.isreg():
1232 f = file(name, "rb")
1233 self.addfile(tarinfo, f)
1234 f.close()
1235
1236 if tarinfo.type in (LNKTYPE, SYMTYPE, FIFOTYPE, CHRTYPE, BLKTYPE):
1237 tarinfo.size = 0L
1238 self.addfile(tarinfo)
1239
1240 if tarinfo.isdir():
1241 self.addfile(tarinfo)
1242 if recursive:
1243 for f in os.listdir(name):
1244 self.add(os.path.join(name, f), os.path.join(arcname, f))
1245
1246 def addfile(self, tarinfo, fileobj=None):
1247 """Add the TarInfo object `tarinfo' to the archive. If `fileobj' is
1248 given, tarinfo.size bytes are read from it and added to the archive.
1249 You can create TarInfo objects using gettarinfo().
1250 On Windows platforms, `fileobj' should always be opened with mode
1251 'rb' to avoid irritation about the file size.
1252 """
1253 self._check("aw")
1254
1255 tarinfo.name = normpath(tarinfo.name)
1256 if tarinfo.isdir():
1257 # directories should end with '/'
1258 tarinfo.name += "/"
1259
1260 if tarinfo.linkname:
1261 tarinfo.linkname = normpath(tarinfo.linkname)
1262
1263 if tarinfo.size > MAXSIZE_MEMBER:
Neal Norwitzd96d1012004-07-20 22:23:02 +00001264 if self.posix:
1265 raise ValueError, "file is too large (>= 8 GB)"
1266 else:
1267 self._dbg(2, "tarfile: Created GNU tar largefile header")
1268
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001269
1270 if len(tarinfo.linkname) > LENGTH_LINK:
1271 if self.posix:
1272 raise ValueError, "linkname is too long (>%d)" \
1273 % (LENGTH_LINK)
1274 else:
1275 self._create_gnulong(tarinfo.linkname, GNUTYPE_LONGLINK)
1276 tarinfo.linkname = tarinfo.linkname[:LENGTH_LINK -1]
1277 self._dbg(2, "tarfile: Created GNU tar extension LONGLINK")
1278
1279 if len(tarinfo.name) > LENGTH_NAME:
1280 if self.posix:
1281 prefix = tarinfo.name[:LENGTH_PREFIX + 1]
1282 while prefix and prefix[-1] != "/":
Tim Peters2c60f7a2003-01-29 03:49:43 +00001283 prefix = prefix[:-1]
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001284
1285 name = tarinfo.name[len(prefix):]
1286 prefix = prefix[:-1]
1287
1288 if not prefix or len(name) > LENGTH_NAME:
1289 raise ValueError, "name is too long (>%d)" \
1290 % (LENGTH_NAME)
1291
1292 tarinfo.name = name
1293 tarinfo.prefix = prefix
1294 else:
1295 self._create_gnulong(tarinfo.name, GNUTYPE_LONGNAME)
1296 tarinfo.name = tarinfo.name[:LENGTH_NAME - 1]
1297 self._dbg(2, "tarfile: Created GNU tar extension LONGNAME")
1298
1299 self.fileobj.write(tarinfo.tobuf())
1300 self.offset += BLOCKSIZE
1301
1302 # If there's data to follow, append it.
1303 if fileobj is not None:
1304 copyfileobj(fileobj, self.fileobj, tarinfo.size)
1305 blocks, remainder = divmod(tarinfo.size, BLOCKSIZE)
1306 if remainder > 0:
1307 self.fileobj.write(NUL * (BLOCKSIZE - remainder))
1308 blocks += 1
1309 self.offset += blocks * BLOCKSIZE
1310
Martin v. Löwisf3c56112004-09-18 09:08:52 +00001311 self.members.append(tarinfo)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001312
1313 def extract(self, member, path=""):
1314 """Extract a member from the archive to the current working directory,
1315 using its full name. Its file information is extracted as accurately
1316 as possible. `member' may be a filename or a TarInfo object. You can
1317 specify a different directory using `path'.
1318 """
1319 self._check("r")
1320
1321 if isinstance(member, TarInfo):
1322 tarinfo = member
1323 else:
1324 tarinfo = self.getmember(member)
1325
Neal Norwitza4f651a2004-07-20 22:07:44 +00001326 # Prepare the link target for makelink().
1327 if tarinfo.islnk():
1328 tarinfo._link_target = os.path.join(path, tarinfo.linkname)
1329
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001330 try:
1331 self._extract_member(tarinfo, os.path.join(path, tarinfo.name))
1332 except EnvironmentError, e:
1333 if self.errorlevel > 0:
1334 raise
1335 else:
1336 if e.filename is None:
1337 self._dbg(1, "tarfile: %s" % e.strerror)
1338 else:
1339 self._dbg(1, "tarfile: %s %r" % (e.strerror, e.filename))
1340 except ExtractError, e:
1341 if self.errorlevel > 1:
1342 raise
1343 else:
1344 self._dbg(1, "tarfile: %s" % e)
1345
1346 def extractfile(self, member):
1347 """Extract a member from the archive as a file object. `member' may be
1348 a filename or a TarInfo object. If `member' is a regular file, a
1349 file-like object is returned. If `member' is a link, a file-like
1350 object is constructed from the link's target. If `member' is none of
1351 the above, None is returned.
1352 The file-like object is read-only and provides the following
1353 methods: read(), readline(), readlines(), seek() and tell()
1354 """
1355 self._check("r")
1356
1357 if isinstance(member, TarInfo):
1358 tarinfo = member
1359 else:
1360 tarinfo = self.getmember(member)
1361
1362 if tarinfo.isreg():
1363 return self.fileobject(self, tarinfo)
1364
1365 elif tarinfo.type not in SUPPORTED_TYPES:
1366 # If a member's type is unknown, it is treated as a
1367 # regular file.
1368 return self.fileobject(self, tarinfo)
1369
1370 elif tarinfo.islnk() or tarinfo.issym():
1371 if isinstance(self.fileobj, _Stream):
1372 # A small but ugly workaround for the case that someone tries
1373 # to extract a (sym)link as a file-object from a non-seekable
1374 # stream of tar blocks.
1375 raise StreamError, "cannot extract (sym)link as file object"
1376 else:
1377 # A (sym)link's file object is it's target's file object.
1378 return self.extractfile(self._getmember(tarinfo.linkname,
1379 tarinfo))
1380 else:
1381 # If there's no data associated with the member (directory, chrdev,
1382 # blkdev, etc.), return None instead of a file object.
1383 return None
1384
1385 def _extract_member(self, tarinfo, targetpath):
1386 """Extract the TarInfo object tarinfo to a physical
1387 file called targetpath.
1388 """
1389 # Fetch the TarInfo object for the given name
1390 # and build the destination pathname, replacing
1391 # forward slashes to platform specific separators.
1392 if targetpath[-1:] == "/":
1393 targetpath = targetpath[:-1]
1394 targetpath = os.path.normpath(targetpath)
1395
1396 # Create all upper directories.
1397 upperdirs = os.path.dirname(targetpath)
1398 if upperdirs and not os.path.exists(upperdirs):
1399 ti = TarInfo()
1400 ti.name = upperdirs
1401 ti.type = DIRTYPE
1402 ti.mode = 0777
1403 ti.mtime = tarinfo.mtime
1404 ti.uid = tarinfo.uid
1405 ti.gid = tarinfo.gid
1406 ti.uname = tarinfo.uname
1407 ti.gname = tarinfo.gname
1408 try:
1409 self._extract_member(ti, ti.name)
1410 except:
1411 pass
1412
1413 if tarinfo.islnk() or tarinfo.issym():
1414 self._dbg(1, "%s -> %s" % (tarinfo.name, tarinfo.linkname))
1415 else:
1416 self._dbg(1, tarinfo.name)
1417
1418 if tarinfo.isreg():
1419 self.makefile(tarinfo, targetpath)
1420 elif tarinfo.isdir():
1421 self.makedir(tarinfo, targetpath)
1422 elif tarinfo.isfifo():
1423 self.makefifo(tarinfo, targetpath)
1424 elif tarinfo.ischr() or tarinfo.isblk():
1425 self.makedev(tarinfo, targetpath)
1426 elif tarinfo.islnk() or tarinfo.issym():
1427 self.makelink(tarinfo, targetpath)
1428 elif tarinfo.type not in SUPPORTED_TYPES:
1429 self.makeunknown(tarinfo, targetpath)
1430 else:
1431 self.makefile(tarinfo, targetpath)
1432
1433 self.chown(tarinfo, targetpath)
1434 if not tarinfo.issym():
1435 self.chmod(tarinfo, targetpath)
1436 self.utime(tarinfo, targetpath)
1437
1438 #--------------------------------------------------------------------------
1439 # Below are the different file methods. They are called via
1440 # _extract_member() when extract() is called. They can be replaced in a
1441 # subclass to implement other functionality.
1442
1443 def makedir(self, tarinfo, targetpath):
1444 """Make a directory called targetpath.
1445 """
1446 try:
1447 os.mkdir(targetpath)
1448 except EnvironmentError, e:
1449 if e.errno != errno.EEXIST:
1450 raise
1451
1452 def makefile(self, tarinfo, targetpath):
1453 """Make a file called targetpath.
1454 """
1455 source = self.extractfile(tarinfo)
1456 target = file(targetpath, "wb")
1457 copyfileobj(source, target)
1458 source.close()
1459 target.close()
1460
1461 def makeunknown(self, tarinfo, targetpath):
1462 """Make a file from a TarInfo object with an unknown type
1463 at targetpath.
1464 """
1465 self.makefile(tarinfo, targetpath)
1466 self._dbg(1, "tarfile: Unknown file type %r, " \
1467 "extracted as regular file." % tarinfo.type)
1468
1469 def makefifo(self, tarinfo, targetpath):
1470 """Make a fifo called targetpath.
1471 """
1472 if hasattr(os, "mkfifo"):
1473 os.mkfifo(targetpath)
1474 else:
1475 raise ExtractError, "fifo not supported by system"
1476
1477 def makedev(self, tarinfo, targetpath):
1478 """Make a character or block device called targetpath.
1479 """
1480 if not hasattr(os, "mknod") or not hasattr(os, "makedev"):
1481 raise ExtractError, "special devices not supported by system"
1482
1483 mode = tarinfo.mode
1484 if tarinfo.isblk():
1485 mode |= stat.S_IFBLK
1486 else:
1487 mode |= stat.S_IFCHR
1488
1489 os.mknod(targetpath, mode,
1490 os.makedev(tarinfo.devmajor, tarinfo.devminor))
1491
1492 def makelink(self, tarinfo, targetpath):
1493 """Make a (symbolic) link called targetpath. If it cannot be created
1494 (platform limitation), we try to make a copy of the referenced file
1495 instead of a link.
1496 """
1497 linkpath = tarinfo.linkname
1498 try:
1499 if tarinfo.issym():
1500 os.symlink(linkpath, targetpath)
1501 else:
Neal Norwitza4f651a2004-07-20 22:07:44 +00001502 # See extract().
1503 os.link(tarinfo._link_target, targetpath)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001504 except AttributeError:
1505 if tarinfo.issym():
1506 linkpath = os.path.join(os.path.dirname(tarinfo.name),
1507 linkpath)
1508 linkpath = normpath(linkpath)
1509
1510 try:
1511 self._extract_member(self.getmember(linkpath), targetpath)
1512 except (EnvironmentError, KeyError), e:
1513 linkpath = os.path.normpath(linkpath)
1514 try:
1515 shutil.copy2(linkpath, targetpath)
1516 except EnvironmentError, e:
1517 raise IOError, "link could not be created"
1518
1519 def chown(self, tarinfo, targetpath):
1520 """Set owner of targetpath according to tarinfo.
1521 """
1522 if pwd and hasattr(os, "geteuid") and os.geteuid() == 0:
1523 # We have to be root to do so.
1524 try:
1525 g = grp.getgrnam(tarinfo.gname)[2]
1526 except KeyError:
1527 try:
1528 g = grp.getgrgid(tarinfo.gid)[2]
1529 except KeyError:
1530 g = os.getgid()
1531 try:
1532 u = pwd.getpwnam(tarinfo.uname)[2]
1533 except KeyError:
1534 try:
1535 u = pwd.getpwuid(tarinfo.uid)[2]
1536 except KeyError:
1537 u = os.getuid()
1538 try:
1539 if tarinfo.issym() and hasattr(os, "lchown"):
1540 os.lchown(targetpath, u, g)
1541 else:
Andrew MacIntyre7970d202003-02-19 12:51:34 +00001542 if sys.platform != "os2emx":
1543 os.chown(targetpath, u, g)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001544 except EnvironmentError, e:
1545 raise ExtractError, "could not change owner"
1546
1547 def chmod(self, tarinfo, targetpath):
1548 """Set file permissions of targetpath according to tarinfo.
1549 """
Jack Jansen834eff62003-03-07 12:47:06 +00001550 if hasattr(os, 'chmod'):
1551 try:
1552 os.chmod(targetpath, tarinfo.mode)
1553 except EnvironmentError, e:
1554 raise ExtractError, "could not change mode"
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001555
1556 def utime(self, tarinfo, targetpath):
1557 """Set modification time of targetpath according to tarinfo.
1558 """
Jack Jansen834eff62003-03-07 12:47:06 +00001559 if not hasattr(os, 'utime'):
Tim Petersf9347782003-03-07 15:36:41 +00001560 return
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001561 if sys.platform == "win32" and tarinfo.isdir():
1562 # According to msdn.microsoft.com, it is an error (EACCES)
1563 # to use utime() on directories.
1564 return
1565 try:
1566 os.utime(targetpath, (tarinfo.mtime, tarinfo.mtime))
1567 except EnvironmentError, e:
1568 raise ExtractError, "could not change modification time"
1569
1570 #--------------------------------------------------------------------------
1571
1572 def next(self):
1573 """Return the next member of the archive as a TarInfo object, when
1574 TarFile is opened for reading. Return None if there is no more
1575 available.
1576 """
1577 self._check("ra")
1578 if self.firstmember is not None:
1579 m = self.firstmember
1580 self.firstmember = None
1581 return m
1582
1583 # Read the next block.
Andrew M. Kuchling864bba12004-07-10 22:02:11 +00001584 self.fileobj.seek(self.offset)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001585 while True:
1586 buf = self.fileobj.read(BLOCKSIZE)
1587 if not buf:
1588 return None
1589 try:
1590 tarinfo = TarInfo.frombuf(buf)
1591 except ValueError:
1592 if self.ignore_zeros:
1593 if buf.count(NUL) == BLOCKSIZE:
1594 adj = "empty"
1595 else:
1596 adj = "invalid"
1597 self._dbg(2, "0x%X: %s block" % (self.offset, adj))
1598 self.offset += BLOCKSIZE
1599 continue
1600 else:
1601 # Block is empty or unreadable.
Andrew M. Kuchling864bba12004-07-10 22:02:11 +00001602 if self.offset == 0:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001603 # If the first block is invalid. That does not
1604 # look like a tar archive we can handle.
1605 raise ReadError,"empty, unreadable or compressed file"
1606 return None
1607 break
1608
1609 # We shouldn't rely on this checksum, because some tar programs
1610 # calculate it differently and it is merely validating the
1611 # header block. We could just as well skip this part, which would
1612 # have a slight effect on performance...
1613 if tarinfo.chksum != calc_chksum(buf):
1614 self._dbg(1, "tarfile: Bad Checksum %r" % tarinfo.name)
1615
1616 # Set the TarInfo object's offset to the current position of the
1617 # TarFile and set self.offset to the position where the data blocks
1618 # should begin.
1619 tarinfo.offset = self.offset
1620 self.offset += BLOCKSIZE
1621
1622 # Check if the TarInfo object has a typeflag for which a callback
1623 # method is registered in the TYPE_METH. If so, then call it.
1624 if tarinfo.type in self.TYPE_METH:
Andrew M. Kuchling864bba12004-07-10 22:02:11 +00001625 return self.TYPE_METH[tarinfo.type](self, tarinfo)
1626
1627 tarinfo.offset_data = self.offset
1628 if tarinfo.isreg() or tarinfo.type not in SUPPORTED_TYPES:
1629 # Skip the following data blocks.
1630 self.offset += self._block(tarinfo.size)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001631
1632 if tarinfo.isreg() and tarinfo.name[:-1] == "/":
1633 # some old tar programs don't know DIRTYPE
1634 tarinfo.type = DIRTYPE
1635
Martin v. Löwisf3c56112004-09-18 09:08:52 +00001636 self.members.append(tarinfo)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001637 return tarinfo
1638
1639 #--------------------------------------------------------------------------
1640 # Below are some methods which are called for special typeflags in the
1641 # next() method, e.g. for unwrapping GNU longname/longlink blocks. They
1642 # are registered in TYPE_METH below. You can register your own methods
1643 # with this mapping.
1644 # A registered method is called with a TarInfo object as only argument.
1645 #
1646 # During its execution the method MUST perform the following tasks:
1647 # 1. set tarinfo.offset_data to the position where the data blocks begin,
1648 # if there is data to follow.
1649 # 2. set self.offset to the position where the next member's header will
1650 # begin.
Martin v. Löwisf3c56112004-09-18 09:08:52 +00001651 # 3. append the tarinfo object to self.members, if it is supposed to appear
1652 # as a member of the TarFile object.
Andrew M. Kuchling864bba12004-07-10 22:02:11 +00001653 # 4. return tarinfo or another valid TarInfo object.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001654
1655 def proc_gnulong(self, tarinfo):
1656 """Evaluate the blocks that hold a GNU longname
1657 or longlink member.
1658 """
1659 buf = ""
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001660 count = tarinfo.size
1661 while count > 0:
1662 block = self.fileobj.read(BLOCKSIZE)
1663 buf += block
1664 self.offset += BLOCKSIZE
1665 count -= BLOCKSIZE
1666
Andrew M. Kuchling864bba12004-07-10 22:02:11 +00001667 # Fetch the next header
1668 next = self.next()
1669
1670 next.offset = tarinfo.offset
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001671 if tarinfo.type == GNUTYPE_LONGNAME:
Andrew M. Kuchling864bba12004-07-10 22:02:11 +00001672 next.name = nts(buf)
1673 elif tarinfo.type == GNUTYPE_LONGLINK:
1674 next.linkname = nts(buf)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001675
Andrew M. Kuchling864bba12004-07-10 22:02:11 +00001676 return next
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001677
1678 def proc_sparse(self, tarinfo):
1679 """Analyze a GNU sparse header plus extra headers.
1680 """
1681 buf = tarinfo.tobuf()
1682 sp = _ringbuffer()
1683 pos = 386
1684 lastpos = 0L
1685 realpos = 0L
1686 # There are 4 possible sparse structs in the
1687 # first header.
1688 for i in xrange(4):
1689 try:
1690 offset = int(buf[pos:pos + 12], 8)
1691 numbytes = int(buf[pos + 12:pos + 24], 8)
1692 except ValueError:
1693 break
1694 if offset > lastpos:
1695 sp.append(_hole(lastpos, offset - lastpos))
1696 sp.append(_data(offset, numbytes, realpos))
1697 realpos += numbytes
1698 lastpos = offset + numbytes
1699 pos += 24
1700
1701 isextended = ord(buf[482])
1702 origsize = int(buf[483:495], 8)
1703
1704 # If the isextended flag is given,
1705 # there are extra headers to process.
1706 while isextended == 1:
1707 buf = self.fileobj.read(BLOCKSIZE)
1708 self.offset += BLOCKSIZE
1709 pos = 0
1710 for i in xrange(21):
1711 try:
1712 offset = int(buf[pos:pos + 12], 8)
1713 numbytes = int(buf[pos + 12:pos + 24], 8)
1714 except ValueError:
1715 break
1716 if offset > lastpos:
1717 sp.append(_hole(lastpos, offset - lastpos))
1718 sp.append(_data(offset, numbytes, realpos))
1719 realpos += numbytes
1720 lastpos = offset + numbytes
1721 pos += 24
1722 isextended = ord(buf[504])
1723
1724 if lastpos < origsize:
1725 sp.append(_hole(lastpos, origsize - lastpos))
1726
1727 tarinfo.sparse = sp
1728
1729 tarinfo.offset_data = self.offset
1730 self.offset += self._block(tarinfo.size)
1731 tarinfo.size = origsize
Andrew M. Kuchling864bba12004-07-10 22:02:11 +00001732
Martin v. Löwisf3c56112004-09-18 09:08:52 +00001733 self.members.append(tarinfo)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001734 return tarinfo
1735
1736 # The type mapping for the next() method. The keys are single character
1737 # strings, the typeflag. The values are methods which are called when
1738 # next() encounters such a typeflag.
1739 TYPE_METH = {
1740 GNUTYPE_LONGNAME: proc_gnulong,
1741 GNUTYPE_LONGLINK: proc_gnulong,
1742 GNUTYPE_SPARSE: proc_sparse
1743 }
1744
1745 #--------------------------------------------------------------------------
1746 # Little helper methods:
1747
1748 def _block(self, count):
1749 """Round up a byte count by BLOCKSIZE and return it,
1750 e.g. _block(834) => 1024.
1751 """
1752 blocks, remainder = divmod(count, BLOCKSIZE)
1753 if remainder:
1754 blocks += 1
1755 return blocks * BLOCKSIZE
1756
1757 def _getmember(self, name, tarinfo=None):
1758 """Find an archive member by name from bottom to top.
1759 If tarinfo is given, it is used as the starting point.
1760 """
Martin v. Löwisf3c56112004-09-18 09:08:52 +00001761 # Ensure that all members have been loaded.
1762 members = self.getmembers()
1763
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001764 if tarinfo is None:
Martin v. Löwisf3c56112004-09-18 09:08:52 +00001765 end = len(members)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001766 else:
Martin v. Löwisf3c56112004-09-18 09:08:52 +00001767 end = members.index(tarinfo)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001768
1769 for i in xrange(end - 1, -1, -1):
Martin v. Löwisf3c56112004-09-18 09:08:52 +00001770 if name == members[i].name:
1771 return members[i]
Andrew M. Kuchling864bba12004-07-10 22:02:11 +00001772
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001773 def _load(self):
1774 """Read through the entire archive file and look for readable
1775 members.
1776 """
1777 while True:
1778 tarinfo = self.next()
1779 if tarinfo is None:
1780 break
1781 self._loaded = True
1782
1783 def _check(self, mode=None):
1784 """Check if TarFile is still open, and if the operation's mode
1785 corresponds to TarFile's mode.
1786 """
1787 if self.closed:
1788 raise IOError, "%s is closed" % self.__class__.__name__
1789 if mode is not None and self._mode not in mode:
1790 raise IOError, "bad operation for mode %r" % self._mode
1791
1792 def __iter__(self):
1793 """Provide an iterator object.
1794 """
1795 if self._loaded:
1796 return iter(self.members)
1797 else:
1798 return TarIter(self)
1799
1800 def _create_gnulong(self, name, type):
1801 """Write a GNU longname/longlink member to the TarFile.
1802 It consists of an extended tar header, with the length
1803 of the longname as size, followed by data blocks,
1804 which contain the longname as a null terminated string.
1805 """
Neal Norwitz0662f8a2004-07-20 21:54:18 +00001806 name += NUL
1807
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001808 tarinfo = TarInfo()
1809 tarinfo.name = "././@LongLink"
1810 tarinfo.type = type
1811 tarinfo.mode = 0
1812 tarinfo.size = len(name)
1813
1814 # write extended header
1815 self.fileobj.write(tarinfo.tobuf())
Neal Norwitz0662f8a2004-07-20 21:54:18 +00001816 self.offset += BLOCKSIZE
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001817 # write name blocks
1818 self.fileobj.write(name)
1819 blocks, remainder = divmod(tarinfo.size, BLOCKSIZE)
1820 if remainder > 0:
1821 self.fileobj.write(NUL * (BLOCKSIZE - remainder))
1822 blocks += 1
1823 self.offset += blocks * BLOCKSIZE
1824
1825 def _dbg(self, level, msg):
1826 """Write debugging output to sys.stderr.
1827 """
1828 if level <= self.debug:
1829 print >> sys.stderr, msg
1830# class TarFile
1831
1832class TarIter:
1833 """Iterator Class.
1834
1835 for tarinfo in TarFile(...):
1836 suite...
1837 """
1838
1839 def __init__(self, tarfile):
1840 """Construct a TarIter object.
1841 """
1842 self.tarfile = tarfile
1843 def __iter__(self):
1844 """Return iterator object.
1845 """
1846 return self
1847 def next(self):
1848 """Return the next item using TarFile's next() method.
1849 When all members have been read, set TarFile as _loaded.
1850 """
1851 tarinfo = self.tarfile.next()
1852 if not tarinfo:
1853 self.tarfile._loaded = True
1854 raise StopIteration
1855 return tarinfo
1856
1857# Helper classes for sparse file support
1858class _section:
1859 """Base class for _data and _hole.
1860 """
1861 def __init__(self, offset, size):
1862 self.offset = offset
1863 self.size = size
1864 def __contains__(self, offset):
1865 return self.offset <= offset < self.offset + self.size
1866
1867class _data(_section):
1868 """Represent a data section in a sparse file.
1869 """
1870 def __init__(self, offset, size, realpos):
1871 _section.__init__(self, offset, size)
1872 self.realpos = realpos
1873
1874class _hole(_section):
1875 """Represent a hole section in a sparse file.
1876 """
1877 pass
1878
1879class _ringbuffer(list):
1880 """Ringbuffer class which increases performance
1881 over a regular list.
1882 """
1883 def __init__(self):
1884 self.idx = 0
1885 def find(self, offset):
1886 idx = self.idx
1887 while True:
1888 item = self[idx]
1889 if offset in item:
1890 break
1891 idx += 1
1892 if idx == len(self):
1893 idx = 0
1894 if idx == self.idx:
1895 # End of File
1896 return None
1897 self.idx = idx
1898 return item
1899
1900#---------------------------------------------
1901# zipfile compatible TarFile class
1902#---------------------------------------------
1903TAR_PLAIN = 0 # zipfile.ZIP_STORED
1904TAR_GZIPPED = 8 # zipfile.ZIP_DEFLATED
1905class TarFileCompat:
1906 """TarFile class compatible with standard module zipfile's
1907 ZipFile class.
1908 """
1909 def __init__(self, file, mode="r", compression=TAR_PLAIN):
1910 if compression == TAR_PLAIN:
1911 self.tarfile = TarFile.taropen(file, mode)
1912 elif compression == TAR_GZIPPED:
1913 self.tarfile = TarFile.gzopen(file, mode)
1914 else:
1915 raise ValueError, "unknown compression constant"
1916 if mode[0:1] == "r":
1917 members = self.tarfile.getmembers()
1918 for i in xrange(len(members)):
1919 m = members[i]
1920 m.filename = m.name
1921 m.file_size = m.size
1922 m.date_time = time.gmtime(m.mtime)[:6]
1923 def namelist(self):
1924 return map(lambda m: m.name, self.infolist())
1925 def infolist(self):
1926 return filter(lambda m: m.type in REGULAR_TYPES,
1927 self.tarfile.getmembers())
1928 def printdir(self):
1929 self.tarfile.list()
1930 def testzip(self):
1931 return
1932 def getinfo(self, name):
1933 return self.tarfile.getmember(name)
1934 def read(self, name):
1935 return self.tarfile.extractfile(self.tarfile.getmember(name)).read()
1936 def write(self, filename, arcname=None, compress_type=None):
1937 self.tarfile.add(filename, arcname)
1938 def writestr(self, zinfo, bytes):
1939 import StringIO
1940 import calendar
1941 zinfo.name = zinfo.filename
1942 zinfo.size = zinfo.file_size
1943 zinfo.mtime = calendar.timegm(zinfo.date_time)
1944 self.tarfile.addfile(zinfo, StringIO.StringIO(bytes))
1945 def close(self):
1946 self.tarfile.close()
1947#class TarFileCompat
1948
1949#--------------------
1950# exported functions
1951#--------------------
1952def is_tarfile(name):
1953 """Return True if name points to a tar archive that we
1954 are able to handle, else return False.
1955 """
1956 try:
1957 t = open(name)
1958 t.close()
1959 return True
1960 except TarError:
1961 return False
1962
1963open = TarFile.open