blob: 13969676dedba2f775603ac546c39cfcfc5d52fc [file] [log] [blame]
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001#!/usr/bin/env python
2# -*- coding: iso-8859-1 -*-
3#-------------------------------------------------------------------
4# tarfile.py
5#-------------------------------------------------------------------
6# Copyright (C) 2002 Lars Gustäbel <lars@gustaebel.de>
7# All rights reserved.
8#
9# Permission is hereby granted, free of charge, to any person
10# obtaining a copy of this software and associated documentation
11# files (the "Software"), to deal in the Software without
12# restriction, including without limitation the rights to use,
13# copy, modify, merge, publish, distribute, sublicense, and/or sell
14# copies of the Software, and to permit persons to whom the
15# Software is furnished to do so, subject to the following
16# conditions:
17#
18# The above copyright notice and this permission notice shall be
19# included in all copies or substantial portions of the Software.
20#
21# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
22# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
23# OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
24# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
25# HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
26# WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
27# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
28# OTHER DEALINGS IN THE SOFTWARE.
29#
30"""Read from and write to tar format archives.
31"""
32
33__version__ = "$Revision$"
34# $Source$
35
36version = "0.6.4"
37__author__ = "Lars Gustäbel (lars@gustaebel.de)"
38__date__ = "$Date$"
39__cvsid__ = "$Id$"
40__credits__ = "Gustavo Niemeyer, Niels Gustäbel, Richard Townsend."
41
42#---------
43# Imports
44#---------
45import sys
46import os
47import shutil
48import stat
49import errno
50import time
51import struct
52
Jack Jansencfc49022003-03-07 13:37:32 +000053if sys.platform == 'mac':
54 # This module needs work for MacOS9, especially in the area of pathname
55 # handling. In many places it is assumed a simple substitution of / by the
56 # local os.path.sep is good enough to convert pathnames, but this does not
57 # work with the mac rooted:path:name versus :nonrooted:path:name syntax
58 raise ImportError, "tarfile does not work for platform==mac"
59
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +000060try:
61 import grp, pwd
62except ImportError:
63 grp = pwd = None
64
65# from tarfile import *
66__all__ = ["TarFile", "TarInfo", "is_tarfile", "TarError"]
67
68#---------------------------------------------------------
69# tar constants
70#---------------------------------------------------------
71NUL = "\0" # the null character
72BLOCKSIZE = 512 # length of processing blocks
73RECORDSIZE = BLOCKSIZE * 20 # length of records
74MAGIC = "ustar" # magic tar string
75VERSION = "00" # version number
76
77LENGTH_NAME = 100 # maximum length of a filename
78LENGTH_LINK = 100 # maximum length of a linkname
79LENGTH_PREFIX = 155 # maximum length of the prefix field
80MAXSIZE_MEMBER = 077777777777L # maximum size of a file (11 octal digits)
81
82REGTYPE = "0" # regular file
83AREGTYPE = "\0" # regular file
84LNKTYPE = "1" # link (inside tarfile)
85SYMTYPE = "2" # symbolic link
86CHRTYPE = "3" # character special device
87BLKTYPE = "4" # block special device
88DIRTYPE = "5" # directory
89FIFOTYPE = "6" # fifo special device
90CONTTYPE = "7" # contiguous file
91
92GNUTYPE_LONGNAME = "L" # GNU tar extension for longnames
93GNUTYPE_LONGLINK = "K" # GNU tar extension for longlink
94GNUTYPE_SPARSE = "S" # GNU tar extension for sparse file
95
96#---------------------------------------------------------
97# tarfile constants
98#---------------------------------------------------------
99SUPPORTED_TYPES = (REGTYPE, AREGTYPE, LNKTYPE, # file types that tarfile
100 SYMTYPE, DIRTYPE, FIFOTYPE, # can cope with.
101 CONTTYPE, CHRTYPE, BLKTYPE,
102 GNUTYPE_LONGNAME, GNUTYPE_LONGLINK,
103 GNUTYPE_SPARSE)
104
105REGULAR_TYPES = (REGTYPE, AREGTYPE, # file types that somehow
106 CONTTYPE, GNUTYPE_SPARSE) # represent regular files
107
108#---------------------------------------------------------
109# Bits used in the mode field, values in octal.
110#---------------------------------------------------------
111S_IFLNK = 0120000 # symbolic link
112S_IFREG = 0100000 # regular file
113S_IFBLK = 0060000 # block device
114S_IFDIR = 0040000 # directory
115S_IFCHR = 0020000 # character device
116S_IFIFO = 0010000 # fifo
117
118TSUID = 04000 # set UID on execution
119TSGID = 02000 # set GID on execution
120TSVTX = 01000 # reserved
121
122TUREAD = 0400 # read by owner
123TUWRITE = 0200 # write by owner
124TUEXEC = 0100 # execute/search by owner
125TGREAD = 0040 # read by group
126TGWRITE = 0020 # write by group
127TGEXEC = 0010 # execute/search by group
128TOREAD = 0004 # read by other
129TOWRITE = 0002 # write by other
130TOEXEC = 0001 # execute/search by other
131
132#---------------------------------------------------------
133# Some useful functions
134#---------------------------------------------------------
135def nts(s):
136 """Convert a null-terminated string buffer to a python string.
137 """
Andrew M. Kuchling864bba12004-07-10 22:02:11 +0000138 return s.rstrip(NUL)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000139
140def calc_chksum(buf):
141 """Calculate the checksum for a member's header. It's a simple addition
142 of all bytes, treating the chksum field as if filled with spaces.
143 buf is a 512 byte long string buffer which holds the header.
144 """
145 chk = 256 # chksum field is treated as blanks,
146 # so the initial value is 8 * ord(" ")
147 for c in buf[:148]: chk += ord(c) # sum up all bytes before chksum
148 for c in buf[156:]: chk += ord(c) # sum up all bytes after chksum
149 return chk
150
151def copyfileobj(src, dst, length=None):
152 """Copy length bytes from fileobj src to fileobj dst.
153 If length is None, copy the entire content.
154 """
155 if length == 0:
156 return
157 if length is None:
158 shutil.copyfileobj(src, dst)
159 return
160
161 BUFSIZE = 16 * 1024
162 blocks, remainder = divmod(length, BUFSIZE)
163 for b in xrange(blocks):
164 buf = src.read(BUFSIZE)
165 if len(buf) < BUFSIZE:
166 raise IOError, "end of file reached"
167 dst.write(buf)
168
169 if remainder != 0:
170 buf = src.read(remainder)
171 if len(buf) < remainder:
172 raise IOError, "end of file reached"
173 dst.write(buf)
174 return
175
176filemode_table = (
177 (S_IFLNK, "l",
178 S_IFREG, "-",
179 S_IFBLK, "b",
180 S_IFDIR, "d",
181 S_IFCHR, "c",
182 S_IFIFO, "p"),
183 (TUREAD, "r"),
184 (TUWRITE, "w"),
185 (TUEXEC, "x", TSUID, "S", TUEXEC|TSUID, "s"),
186 (TGREAD, "r"),
187 (TGWRITE, "w"),
188 (TGEXEC, "x", TSGID, "S", TGEXEC|TSGID, "s"),
189 (TOREAD, "r"),
190 (TOWRITE, "w"),
191 (TOEXEC, "x", TSVTX, "T", TOEXEC|TSVTX, "t"))
192
193def filemode(mode):
194 """Convert a file's mode to a string of the form
195 -rwxrwxrwx.
196 Used by TarFile.list()
197 """
198 s = ""
199 for t in filemode_table:
200 while True:
201 if mode & t[0] == t[0]:
202 s += t[1]
203 elif len(t) > 2:
204 t = t[2:]
205 continue
206 else:
207 s += "-"
208 break
209 return s
210
211if os.sep != "/":
212 normpath = lambda path: os.path.normpath(path).replace(os.sep, "/")
213else:
214 normpath = os.path.normpath
215
216class TarError(Exception):
217 """Base exception."""
218 pass
219class ExtractError(TarError):
220 """General exception for extract errors."""
221 pass
222class ReadError(TarError):
223 """Exception for unreadble tar archives."""
224 pass
225class CompressionError(TarError):
226 """Exception for unavailable compression methods."""
227 pass
228class StreamError(TarError):
229 """Exception for unsupported operations on stream-like TarFiles."""
230 pass
231
232#---------------------------
233# internal stream interface
234#---------------------------
235class _LowLevelFile:
236 """Low-level file object. Supports reading and writing.
237 It is used instead of a regular file object for streaming
238 access.
239 """
240
241 def __init__(self, name, mode):
242 mode = {
243 "r": os.O_RDONLY,
244 "w": os.O_WRONLY | os.O_CREAT | os.O_TRUNC,
245 }[mode]
246 if hasattr(os, "O_BINARY"):
247 mode |= os.O_BINARY
248 self.fd = os.open(name, mode)
249
250 def close(self):
251 os.close(self.fd)
252
253 def read(self, size):
254 return os.read(self.fd, size)
255
256 def write(self, s):
257 os.write(self.fd, s)
258
259class _Stream:
260 """Class that serves as an adapter between TarFile and
261 a stream-like object. The stream-like object only
262 needs to have a read() or write() method and is accessed
263 blockwise. Use of gzip or bzip2 compression is possible.
264 A stream-like object could be for example: sys.stdin,
265 sys.stdout, a socket, a tape device etc.
266
267 _Stream is intended to be used only internally.
268 """
269
270 def __init__(self, name, mode, type, fileobj, bufsize):
271 """Construct a _Stream object.
272 """
273 self._extfileobj = True
274 if fileobj is None:
275 fileobj = _LowLevelFile(name, mode)
276 self._extfileobj = False
277
278 self.name = name or ""
279 self.mode = mode
280 self.type = type
281 self.fileobj = fileobj
282 self.bufsize = bufsize
283 self.buf = ""
284 self.pos = 0L
285 self.closed = False
286
287 if type == "gz":
288 try:
289 import zlib
290 except ImportError:
291 raise CompressionError, "zlib module is not available"
292 self.zlib = zlib
293 self.crc = zlib.crc32("")
294 if mode == "r":
295 self._init_read_gz()
296 else:
297 self._init_write_gz()
298
299 if type == "bz2":
300 try:
301 import bz2
302 except ImportError:
303 raise CompressionError, "bz2 module is not available"
304 if mode == "r":
305 self.dbuf = ""
306 self.cmp = bz2.BZ2Decompressor()
307 else:
308 self.cmp = bz2.BZ2Compressor()
309
310 def __del__(self):
311 if not self.closed:
312 self.close()
313
314 def _init_write_gz(self):
315 """Initialize for writing with gzip compression.
316 """
317 self.cmp = self.zlib.compressobj(9, self.zlib.DEFLATED,
318 -self.zlib.MAX_WBITS,
319 self.zlib.DEF_MEM_LEVEL,
320 0)
321 timestamp = struct.pack("<L", long(time.time()))
322 self.__write("\037\213\010\010%s\002\377" % timestamp)
323 if self.name.endswith(".gz"):
324 self.name = self.name[:-3]
325 self.__write(self.name + NUL)
326
327 def write(self, s):
328 """Write string s to the stream.
329 """
330 if self.type == "gz":
331 self.crc = self.zlib.crc32(s, self.crc)
332 self.pos += len(s)
333 if self.type != "tar":
334 s = self.cmp.compress(s)
335 self.__write(s)
336
337 def __write(self, s):
338 """Write string s to the stream if a whole new block
339 is ready to be written.
340 """
341 self.buf += s
342 while len(self.buf) > self.bufsize:
343 self.fileobj.write(self.buf[:self.bufsize])
344 self.buf = self.buf[self.bufsize:]
345
346 def close(self):
347 """Close the _Stream object. No operation should be
348 done on it afterwards.
349 """
350 if self.closed:
351 return
352
Martin v. Löwisc234a522004-08-22 21:28:33 +0000353 if self.mode == "w" and self.type != "tar":
354 self.buf += self.cmp.flush()
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000355 if self.mode == "w" and self.buf:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000356 self.fileobj.write(self.buf)
357 self.buf = ""
358 if self.type == "gz":
359 self.fileobj.write(struct.pack("<l", self.crc))
Andrew M. Kuchling10a44492003-10-24 17:38:34 +0000360 self.fileobj.write(struct.pack("<L", self.pos & 0xffffFFFFL))
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000361
362 if not self._extfileobj:
363 self.fileobj.close()
364
365 self.closed = True
366
367 def _init_read_gz(self):
368 """Initialize for reading a gzip compressed fileobj.
369 """
370 self.cmp = self.zlib.decompressobj(-self.zlib.MAX_WBITS)
371 self.dbuf = ""
372
373 # taken from gzip.GzipFile with some alterations
374 if self.__read(2) != "\037\213":
375 raise ReadError, "not a gzip file"
376 if self.__read(1) != "\010":
377 raise CompressionError, "unsupported compression method"
378
379 flag = ord(self.__read(1))
380 self.__read(6)
381
382 if flag & 4:
383 xlen = ord(self.__read(1)) + 256 * ord(self.__read(1))
384 self.read(xlen)
385 if flag & 8:
386 while True:
387 s = self.__read(1)
388 if not s or s == NUL:
389 break
390 if flag & 16:
391 while True:
392 s = self.__read(1)
393 if not s or s == NUL:
394 break
395 if flag & 2:
396 self.__read(2)
397
398 def tell(self):
399 """Return the stream's file pointer position.
400 """
401 return self.pos
402
403 def seek(self, pos=0):
404 """Set the stream's file pointer to pos. Negative seeking
405 is forbidden.
406 """
407 if pos - self.pos >= 0:
408 blocks, remainder = divmod(pos - self.pos, self.bufsize)
409 for i in xrange(blocks):
410 self.read(self.bufsize)
411 self.read(remainder)
412 else:
413 raise StreamError, "seeking backwards is not allowed"
414 return self.pos
415
416 def read(self, size=None):
417 """Return the next size number of bytes from the stream.
418 If size is not defined, return all bytes of the stream
419 up to EOF.
420 """
421 if size is None:
422 t = []
423 while True:
424 buf = self._read(self.bufsize)
425 if not buf:
426 break
427 t.append(buf)
428 buf = "".join(t)
429 else:
430 buf = self._read(size)
431 self.pos += len(buf)
432 return buf
433
434 def _read(self, size):
435 """Return size bytes from the stream.
436 """
437 if self.type == "tar":
438 return self.__read(size)
439
440 c = len(self.dbuf)
441 t = [self.dbuf]
442 while c < size:
443 buf = self.__read(self.bufsize)
444 if not buf:
445 break
446 buf = self.cmp.decompress(buf)
447 t.append(buf)
448 c += len(buf)
449 t = "".join(t)
450 self.dbuf = t[size:]
451 return t[:size]
452
453 def __read(self, size):
454 """Return size bytes from stream. If internal buffer is empty,
455 read another block from the stream.
456 """
457 c = len(self.buf)
458 t = [self.buf]
459 while c < size:
460 buf = self.fileobj.read(self.bufsize)
461 if not buf:
462 break
463 t.append(buf)
464 c += len(buf)
465 t = "".join(t)
466 self.buf = t[size:]
467 return t[:size]
468# class _Stream
469
470#------------------------
471# Extraction file object
472#------------------------
473class ExFileObject(object):
474 """File-like object for reading an archive member.
475 Is returned by TarFile.extractfile(). Support for
476 sparse files included.
477 """
478
479 def __init__(self, tarfile, tarinfo):
480 self.fileobj = tarfile.fileobj
481 self.name = tarinfo.name
482 self.mode = "r"
483 self.closed = False
484 self.offset = tarinfo.offset_data
485 self.size = tarinfo.size
486 self.pos = 0L
487 self.linebuffer = ""
488 if tarinfo.issparse():
489 self.sparse = tarinfo.sparse
490 self.read = self._readsparse
491 else:
492 self.read = self._readnormal
493
494 def __read(self, size):
495 """Overloadable read method.
496 """
497 return self.fileobj.read(size)
498
499 def readline(self, size=-1):
500 """Read a line with approx. size. If size is negative,
501 read a whole line. readline() and read() must not
502 be mixed up (!).
503 """
504 if size < 0:
505 size = sys.maxint
506
507 nl = self.linebuffer.find("\n")
508 if nl >= 0:
509 nl = min(nl, size)
510 else:
511 size -= len(self.linebuffer)
Martin v. Löwisc11d6f12004-08-25 10:52:58 +0000512 while (nl < 0 and size > 0):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000513 buf = self.read(min(size, 100))
514 if not buf:
515 break
516 self.linebuffer += buf
517 size -= len(buf)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000518 nl = self.linebuffer.find("\n")
519 if nl == -1:
520 s = self.linebuffer
521 self.linebuffer = ""
522 return s
523 buf = self.linebuffer[:nl]
524 self.linebuffer = self.linebuffer[nl + 1:]
525 while buf[-1:] == "\r":
526 buf = buf[:-1]
527 return buf + "\n"
528
529 def readlines(self):
530 """Return a list with all (following) lines.
531 """
532 result = []
533 while True:
534 line = self.readline()
535 if not line: break
536 result.append(line)
537 return result
538
539 def _readnormal(self, size=None):
540 """Read operation for regular files.
541 """
542 if self.closed:
543 raise ValueError, "file is closed"
544 self.fileobj.seek(self.offset + self.pos)
545 bytesleft = self.size - self.pos
546 if size is None:
547 bytestoread = bytesleft
548 else:
549 bytestoread = min(size, bytesleft)
550 self.pos += bytestoread
551 return self.__read(bytestoread)
552
553 def _readsparse(self, size=None):
554 """Read operation for sparse files.
555 """
556 if self.closed:
557 raise ValueError, "file is closed"
558
559 if size is None:
560 size = self.size - self.pos
561
562 data = []
563 while size > 0:
564 buf = self._readsparsesection(size)
565 if not buf:
566 break
567 size -= len(buf)
568 data.append(buf)
569 return "".join(data)
570
571 def _readsparsesection(self, size):
572 """Read a single section of a sparse file.
573 """
574 section = self.sparse.find(self.pos)
575
576 if section is None:
577 return ""
578
579 toread = min(size, section.offset + section.size - self.pos)
580 if isinstance(section, _data):
581 realpos = section.realpos + self.pos - section.offset
582 self.pos += toread
583 self.fileobj.seek(self.offset + realpos)
584 return self.__read(toread)
585 else:
586 self.pos += toread
587 return NUL * toread
588
589 def tell(self):
590 """Return the current file position.
591 """
592 return self.pos
593
594 def seek(self, pos, whence=0):
595 """Seek to a position in the file.
596 """
597 self.linebuffer = ""
598 if whence == 0:
599 self.pos = min(max(pos, 0), self.size)
600 if whence == 1:
601 if pos < 0:
602 self.pos = max(self.pos + pos, 0)
603 else:
604 self.pos = min(self.pos + pos, self.size)
605 if whence == 2:
606 self.pos = max(min(self.size + pos, self.size), 0)
607
608 def close(self):
609 """Close the file object.
610 """
611 self.closed = True
612#class ExFileObject
613
614#------------------
615# Exported Classes
616#------------------
617class TarInfo(object):
618 """Informational class which holds the details about an
619 archive member given by a tar header block.
620 TarInfo objects are returned by TarFile.getmember(),
621 TarFile.getmembers() and TarFile.gettarinfo() and are
622 usually created internally.
623 """
624
625 def __init__(self, name=""):
626 """Construct a TarInfo object. name is the optional name
627 of the member.
628 """
629
630 self.name = name # member name (dirnames must end with '/')
631 self.mode = 0666 # file permissions
632 self.uid = 0 # user id
633 self.gid = 0 # group id
634 self.size = 0 # file size
635 self.mtime = 0 # modification time
636 self.chksum = 0 # header checksum
637 self.type = REGTYPE # member type
638 self.linkname = "" # link name
639 self.uname = "user" # user name
640 self.gname = "group" # group name
641 self.devmajor = 0 #-
642 self.devminor = 0 #-for use with CHRTYPE and BLKTYPE
643 self.prefix = "" # prefix to filename or holding information
644 # about sparse files
645
646 self.offset = 0 # the tar header starts here
647 self.offset_data = 0 # the file's data starts here
648
649 def __repr__(self):
650 return "<%s %r at %#x>" % (self.__class__.__name__,self.name,id(self))
651
652 def frombuf(cls, buf):
653 """Construct a TarInfo object from a 512 byte string buffer.
654 """
655 tarinfo = cls()
Neal Norwitzd96d1012004-07-20 22:23:02 +0000656 tarinfo.name = nts(buf[0:100])
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000657 tarinfo.mode = int(buf[100:108], 8)
658 tarinfo.uid = int(buf[108:116],8)
659 tarinfo.gid = int(buf[116:124],8)
Neal Norwitzd96d1012004-07-20 22:23:02 +0000660
661 # There are two possible codings for the size field we
662 # have to discriminate, see comment in tobuf() below.
663 if buf[124] != chr(0200):
664 tarinfo.size = long(buf[124:136], 8)
665 else:
666 tarinfo.size = 0L
667 for i in range(11):
668 tarinfo.size <<= 8
669 tarinfo.size += ord(buf[125 + i])
670
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000671 tarinfo.mtime = long(buf[136:148], 8)
672 tarinfo.chksum = int(buf[148:156], 8)
673 tarinfo.type = buf[156:157]
674 tarinfo.linkname = nts(buf[157:257])
675 tarinfo.uname = nts(buf[265:297])
676 tarinfo.gname = nts(buf[297:329])
677 try:
678 tarinfo.devmajor = int(buf[329:337], 8)
679 tarinfo.devminor = int(buf[337:345], 8)
680 except ValueError:
681 tarinfo.devmajor = tarinfo.devmajor = 0
Jack Jansen2b4b5a52003-04-22 22:03:11 +0000682 tarinfo.prefix = buf[345:500]
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000683
684 # The prefix field is used for filenames > 100 in
685 # the POSIX standard.
Jack Jansen2b4b5a52003-04-22 22:03:11 +0000686 # name = prefix + '/' + name
687 if tarinfo.type != GNUTYPE_SPARSE:
688 tarinfo.name = normpath(os.path.join(nts(tarinfo.prefix), tarinfo.name))
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000689
690 # Directory names should have a '/' at the end.
691 if tarinfo.isdir() and tarinfo.name[-1:] != "/":
692 tarinfo.name += "/"
693 return tarinfo
694
695 frombuf = classmethod(frombuf)
696
697 def tobuf(self):
698 """Return a tar header block as a 512 byte string.
699 """
Neal Norwitzd96d1012004-07-20 22:23:02 +0000700 # Prefer the size to be encoded as 11 octal ascii digits
701 # which is the most portable. If the size exceeds this
702 # limit (>= 8 GB), encode it as an 88-bit value which is
703 # a GNU tar feature.
704 if self.size <= MAXSIZE_MEMBER:
705 size = "%011o" % self.size
706 else:
707 s = self.size
708 size = ""
709 for i in range(11):
710 size = chr(s & 0377) + size
711 s >>= 8
712 size = chr(0200) + size
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000713
714 # The following code was contributed by Detlef Lannert.
715 parts = []
716 for value, fieldsize in (
Neal Norwitzd96d1012004-07-20 22:23:02 +0000717 (self.name, 100),
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000718 ("%07o" % (self.mode & 07777), 8),
719 ("%07o" % self.uid, 8),
720 ("%07o" % self.gid, 8),
Neal Norwitzd96d1012004-07-20 22:23:02 +0000721 (size, 12),
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000722 ("%011o" % self.mtime, 12),
723 (" ", 8),
724 (self.type, 1),
725 (self.linkname, 100),
726 (MAGIC, 6),
727 (VERSION, 2),
728 (self.uname, 32),
729 (self.gname, 32),
730 ("%07o" % self.devmajor, 8),
731 ("%07o" % self.devminor, 8),
732 (self.prefix, 155)
733 ):
734 l = len(value)
Andrew M. Kuchling864bba12004-07-10 22:02:11 +0000735 parts.append(value[:fieldsize] + (fieldsize - l) * NUL)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000736
737 buf = "".join(parts)
738 chksum = calc_chksum(buf)
739 buf = buf[:148] + "%06o\0" % chksum + buf[155:]
740 buf += (BLOCKSIZE - len(buf)) * NUL
741 self.buf = buf
742 return buf
743
744 def isreg(self):
745 return self.type in REGULAR_TYPES
746 def isfile(self):
747 return self.isreg()
748 def isdir(self):
749 return self.type == DIRTYPE
750 def issym(self):
751 return self.type == SYMTYPE
752 def islnk(self):
753 return self.type == LNKTYPE
754 def ischr(self):
755 return self.type == CHRTYPE
756 def isblk(self):
757 return self.type == BLKTYPE
758 def isfifo(self):
759 return self.type == FIFOTYPE
760 def issparse(self):
761 return self.type == GNUTYPE_SPARSE
762 def isdev(self):
763 return self.type in (CHRTYPE, BLKTYPE, FIFOTYPE)
764# class TarInfo
765
766class TarFile(object):
767 """The TarFile Class provides an interface to tar archives.
768 """
769
770 debug = 0 # May be set from 0 (no msgs) to 3 (all msgs)
771
772 dereference = False # If true, add content of linked file to the
773 # tar file, else the link.
774
775 ignore_zeros = False # If true, skips empty or invalid blocks and
776 # continues processing.
777
778 errorlevel = 0 # If 0, fatal errors only appear in debug
779 # messages (if debug >= 0). If > 0, errors
780 # are passed to the caller as exceptions.
781
Martin v. Löwis75b9da42004-08-18 13:57:44 +0000782 posix = False # If True, generates POSIX.1-1990-compliant
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000783 # archives (no GNU extensions!)
784
785 fileobject = ExFileObject
786
787 def __init__(self, name=None, mode="r", fileobj=None):
788 """Open an (uncompressed) tar archive `name'. `mode' is either 'r' to
789 read from an existing archive, 'a' to append data to an existing
790 file or 'w' to create a new file overwriting an existing one. `mode'
791 defaults to 'r'.
792 If `fileobj' is given, it is used for reading or writing data. If it
793 can be determined, `mode' is overridden by `fileobj's mode.
794 `fileobj' is not closed, when TarFile is closed.
795 """
796 self.name = name
797
798 if len(mode) > 1 or mode not in "raw":
799 raise ValueError, "mode must be 'r', 'a' or 'w'"
800 self._mode = mode
801 self.mode = {"r": "rb", "a": "r+b", "w": "wb"}[mode]
802
803 if not fileobj:
804 fileobj = file(self.name, self.mode)
805 self._extfileobj = False
806 else:
807 if self.name is None and hasattr(fileobj, "name"):
808 self.name = fileobj.name
809 if hasattr(fileobj, "mode"):
810 self.mode = fileobj.mode
811 self._extfileobj = True
812 self.fileobj = fileobj
813
814 # Init datastructures
815 self.closed = False
816 self.members = [] # list of members as TarInfo objects
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000817 self._loaded = False # flag if all members have been read
818 self.offset = 0L # current position in the archive file
819 self.inodes = {} # dictionary caching the inodes of
820 # archive members already added
821
822 if self._mode == "r":
823 self.firstmember = None
824 self.firstmember = self.next()
825
826 if self._mode == "a":
827 # Move to the end of the archive,
828 # before the first empty block.
829 self.firstmember = None
830 while True:
831 try:
832 tarinfo = self.next()
833 except ReadError:
834 self.fileobj.seek(0)
835 break
836 if tarinfo is None:
837 self.fileobj.seek(- BLOCKSIZE, 1)
838 break
839
840 if self._mode in "aw":
841 self._loaded = True
842
843 #--------------------------------------------------------------------------
844 # Below are the classmethods which act as alternate constructors to the
845 # TarFile class. The open() method is the only one that is needed for
846 # public use; it is the "super"-constructor and is able to select an
847 # adequate "sub"-constructor for a particular compression using the mapping
848 # from OPEN_METH.
849 #
850 # This concept allows one to subclass TarFile without losing the comfort of
851 # the super-constructor. A sub-constructor is registered and made available
852 # by adding it to the mapping in OPEN_METH.
853
854 def open(cls, name=None, mode="r", fileobj=None, bufsize=20*512):
855 """Open a tar archive for reading, writing or appending. Return
856 an appropriate TarFile class.
857
858 mode:
859 'r' open for reading with transparent compression
860 'r:' open for reading exclusively uncompressed
861 'r:gz' open for reading with gzip compression
862 'r:bz2' open for reading with bzip2 compression
863 'a' or 'a:' open for appending
864 'w' or 'w:' open for writing without compression
865 'w:gz' open for writing with gzip compression
866 'w:bz2' open for writing with bzip2 compression
867 'r|' open an uncompressed stream of tar blocks for reading
868 'r|gz' open a gzip compressed stream of tar blocks
869 'r|bz2' open a bzip2 compressed stream of tar blocks
870 'w|' open an uncompressed stream for writing
871 'w|gz' open a gzip compressed stream for writing
872 'w|bz2' open a bzip2 compressed stream for writing
873 """
874
875 if not name and not fileobj:
876 raise ValueError, "nothing to open"
877
878 if ":" in mode:
879 filemode, comptype = mode.split(":", 1)
880 filemode = filemode or "r"
881 comptype = comptype or "tar"
882
883 # Select the *open() function according to
884 # given compression.
885 if comptype in cls.OPEN_METH:
886 func = getattr(cls, cls.OPEN_METH[comptype])
887 else:
888 raise CompressionError, "unknown compression type %r" % comptype
889 return func(name, filemode, fileobj)
890
891 elif "|" in mode:
892 filemode, comptype = mode.split("|", 1)
893 filemode = filemode or "r"
894 comptype = comptype or "tar"
895
896 if filemode not in "rw":
897 raise ValueError, "mode must be 'r' or 'w'"
898
899 t = cls(name, filemode,
900 _Stream(name, filemode, comptype, fileobj, bufsize))
901 t._extfileobj = False
902 return t
903
904 elif mode == "r":
905 # Find out which *open() is appropriate for opening the file.
906 for comptype in cls.OPEN_METH:
907 func = getattr(cls, cls.OPEN_METH[comptype])
908 try:
909 return func(name, "r", fileobj)
910 except (ReadError, CompressionError):
911 continue
912 raise ReadError, "file could not be opened successfully"
913
914 elif mode in "aw":
915 return cls.taropen(name, mode, fileobj)
916
917 raise ValueError, "undiscernible mode"
918
919 open = classmethod(open)
920
921 def taropen(cls, name, mode="r", fileobj=None):
922 """Open uncompressed tar archive name for reading or writing.
923 """
924 if len(mode) > 1 or mode not in "raw":
925 raise ValueError, "mode must be 'r', 'a' or 'w'"
926 return cls(name, mode, fileobj)
927
928 taropen = classmethod(taropen)
929
930 def gzopen(cls, name, mode="r", fileobj=None, compresslevel=9):
931 """Open gzip compressed tar archive name for reading or writing.
932 Appending is not allowed.
933 """
934 if len(mode) > 1 or mode not in "rw":
935 raise ValueError, "mode must be 'r' or 'w'"
936
937 try:
938 import gzip
Neal Norwitz4ec68242003-04-11 03:05:56 +0000939 gzip.GzipFile
940 except (ImportError, AttributeError):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000941 raise CompressionError, "gzip module is not available"
942
943 pre, ext = os.path.splitext(name)
944 pre = os.path.basename(pre)
945 if ext == ".tgz":
946 ext = ".tar"
947 if ext == ".gz":
948 ext = ""
949 tarname = pre + ext
950
951 if fileobj is None:
952 fileobj = file(name, mode + "b")
953
954 if mode != "r":
955 name = tarname
956
957 try:
958 t = cls.taropen(tarname, mode,
959 gzip.GzipFile(name, mode, compresslevel, fileobj)
960 )
961 except IOError:
962 raise ReadError, "not a gzip file"
963 t._extfileobj = False
964 return t
965
966 gzopen = classmethod(gzopen)
967
968 def bz2open(cls, name, mode="r", fileobj=None, compresslevel=9):
969 """Open bzip2 compressed tar archive name for reading or writing.
970 Appending is not allowed.
971 """
972 if len(mode) > 1 or mode not in "rw":
973 raise ValueError, "mode must be 'r' or 'w'."
974
975 try:
976 import bz2
977 except ImportError:
978 raise CompressionError, "bz2 module is not available"
979
980 pre, ext = os.path.splitext(name)
981 pre = os.path.basename(pre)
982 if ext == ".tbz2":
983 ext = ".tar"
984 if ext == ".bz2":
985 ext = ""
986 tarname = pre + ext
987
988 if fileobj is not None:
989 raise ValueError, "no support for external file objects"
990
991 try:
992 t = cls.taropen(tarname, mode, bz2.BZ2File(name, mode, compresslevel=compresslevel))
993 except IOError:
994 raise ReadError, "not a bzip2 file"
995 t._extfileobj = False
996 return t
997
998 bz2open = classmethod(bz2open)
999
1000 # All *open() methods are registered here.
1001 OPEN_METH = {
1002 "tar": "taropen", # uncompressed tar
1003 "gz": "gzopen", # gzip compressed tar
1004 "bz2": "bz2open" # bzip2 compressed tar
1005 }
1006
1007 #--------------------------------------------------------------------------
1008 # The public methods which TarFile provides:
1009
1010 def close(self):
1011 """Close the TarFile. In write-mode, two finishing zero blocks are
1012 appended to the archive.
1013 """
1014 if self.closed:
1015 return
1016
1017 if self._mode in "aw":
1018 self.fileobj.write(NUL * (BLOCKSIZE * 2))
1019 self.offset += (BLOCKSIZE * 2)
1020 # fill up the end with zero-blocks
1021 # (like option -b20 for tar does)
1022 blocks, remainder = divmod(self.offset, RECORDSIZE)
1023 if remainder > 0:
1024 self.fileobj.write(NUL * (RECORDSIZE - remainder))
1025
1026 if not self._extfileobj:
1027 self.fileobj.close()
1028 self.closed = True
1029
1030 def getmember(self, name):
1031 """Return a TarInfo object for member `name'. If `name' can not be
1032 found in the archive, KeyError is raised. If a member occurs more
1033 than once in the archive, its last occurence is assumed to be the
1034 most up-to-date version.
1035 """
Martin v. Löwisf3c56112004-09-18 09:08:52 +00001036 tarinfo = self._getmember(name)
1037 if tarinfo is None:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001038 raise KeyError, "filename %r not found" % name
Martin v. Löwisf3c56112004-09-18 09:08:52 +00001039 return tarinfo
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001040
1041 def getmembers(self):
1042 """Return the members of the archive as a list of TarInfo objects. The
1043 list has the same order as the members in the archive.
1044 """
1045 self._check()
1046 if not self._loaded: # if we want to obtain a list of
1047 self._load() # all members, we first have to
1048 # scan the whole archive.
1049 return self.members
1050
1051 def getnames(self):
1052 """Return the members of the archive as a list of their names. It has
1053 the same order as the list returned by getmembers().
1054 """
Martin v. Löwisf3c56112004-09-18 09:08:52 +00001055 return [tarinfo.name for tarinfo in self.getmembers()]
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001056
1057 def gettarinfo(self, name=None, arcname=None, fileobj=None):
1058 """Create a TarInfo object for either the file `name' or the file
1059 object `fileobj' (using os.fstat on its file descriptor). You can
1060 modify some of the TarInfo's attributes before you add it using
1061 addfile(). If given, `arcname' specifies an alternative name for the
1062 file in the archive.
1063 """
1064 self._check("aw")
1065
1066 # When fileobj is given, replace name by
1067 # fileobj's real name.
1068 if fileobj is not None:
1069 name = fileobj.name
1070
1071 # Building the name of the member in the archive.
1072 # Backward slashes are converted to forward slashes,
1073 # Absolute paths are turned to relative paths.
1074 if arcname is None:
1075 arcname = name
1076 arcname = normpath(arcname)
1077 drv, arcname = os.path.splitdrive(arcname)
1078 while arcname[0:1] == "/":
1079 arcname = arcname[1:]
1080
1081 # Now, fill the TarInfo object with
1082 # information specific for the file.
1083 tarinfo = TarInfo()
1084
1085 # Use os.stat or os.lstat, depending on platform
1086 # and if symlinks shall be resolved.
1087 if fileobj is None:
1088 if hasattr(os, "lstat") and not self.dereference:
1089 statres = os.lstat(name)
1090 else:
1091 statres = os.stat(name)
1092 else:
1093 statres = os.fstat(fileobj.fileno())
1094 linkname = ""
1095
1096 stmd = statres.st_mode
1097 if stat.S_ISREG(stmd):
1098 inode = (statres.st_ino, statres.st_dev)
1099 if inode in self.inodes and not self.dereference:
1100 # Is it a hardlink to an already
1101 # archived file?
1102 type = LNKTYPE
1103 linkname = self.inodes[inode]
1104 else:
1105 # The inode is added only if its valid.
1106 # For win32 it is always 0.
1107 type = REGTYPE
1108 if inode[0]:
1109 self.inodes[inode] = arcname
1110 elif stat.S_ISDIR(stmd):
1111 type = DIRTYPE
1112 if arcname[-1:] != "/":
1113 arcname += "/"
1114 elif stat.S_ISFIFO(stmd):
1115 type = FIFOTYPE
1116 elif stat.S_ISLNK(stmd):
1117 type = SYMTYPE
1118 linkname = os.readlink(name)
1119 elif stat.S_ISCHR(stmd):
1120 type = CHRTYPE
1121 elif stat.S_ISBLK(stmd):
1122 type = BLKTYPE
1123 else:
1124 return None
1125
1126 # Fill the TarInfo object with all
1127 # information we can get.
1128 tarinfo.name = arcname
1129 tarinfo.mode = stmd
1130 tarinfo.uid = statres.st_uid
1131 tarinfo.gid = statres.st_gid
Martin v. Löwis61d77e02004-08-20 06:35:46 +00001132 if stat.S_ISDIR(stmd):
1133 # For a directory, the size must be 0
1134 tarinfo.size = 0
1135 else:
1136 tarinfo.size = statres.st_size
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001137 tarinfo.mtime = statres.st_mtime
1138 tarinfo.type = type
1139 tarinfo.linkname = linkname
1140 if pwd:
1141 try:
1142 tarinfo.uname = pwd.getpwuid(tarinfo.uid)[0]
1143 except KeyError:
1144 pass
1145 if grp:
1146 try:
1147 tarinfo.gname = grp.getgrgid(tarinfo.gid)[0]
1148 except KeyError:
1149 pass
1150
1151 if type in (CHRTYPE, BLKTYPE):
1152 if hasattr(os, "major") and hasattr(os, "minor"):
1153 tarinfo.devmajor = os.major(statres.st_rdev)
1154 tarinfo.devminor = os.minor(statres.st_rdev)
1155 return tarinfo
1156
1157 def list(self, verbose=True):
1158 """Print a table of contents to sys.stdout. If `verbose' is False, only
1159 the names of the members are printed. If it is True, an `ls -l'-like
1160 output is produced.
1161 """
1162 self._check()
1163
1164 for tarinfo in self:
1165 if verbose:
1166 print filemode(tarinfo.mode),
1167 print "%s/%s" % (tarinfo.uname or tarinfo.uid,
1168 tarinfo.gname or tarinfo.gid),
1169 if tarinfo.ischr() or tarinfo.isblk():
1170 print "%10s" % ("%d,%d" \
1171 % (tarinfo.devmajor, tarinfo.devminor)),
1172 else:
1173 print "%10d" % tarinfo.size,
1174 print "%d-%02d-%02d %02d:%02d:%02d" \
1175 % time.localtime(tarinfo.mtime)[:6],
1176
1177 print tarinfo.name,
1178
1179 if verbose:
1180 if tarinfo.issym():
1181 print "->", tarinfo.linkname,
1182 if tarinfo.islnk():
1183 print "link to", tarinfo.linkname,
1184 print
1185
1186 def add(self, name, arcname=None, recursive=True):
1187 """Add the file `name' to the archive. `name' may be any type of file
1188 (directory, fifo, symbolic link, etc.). If given, `arcname'
1189 specifies an alternative name for the file in the archive.
1190 Directories are added recursively by default. This can be avoided by
1191 setting `recursive' to False.
1192 """
1193 self._check("aw")
1194
1195 if arcname is None:
1196 arcname = name
1197
1198 # Skip if somebody tries to archive the archive...
1199 if self.name is not None \
1200 and os.path.abspath(name) == os.path.abspath(self.name):
1201 self._dbg(2, "tarfile: Skipped %r" % name)
1202 return
1203
1204 # Special case: The user wants to add the current
1205 # working directory.
1206 if name == ".":
1207 if recursive:
1208 if arcname == ".":
1209 arcname = ""
1210 for f in os.listdir("."):
1211 self.add(f, os.path.join(arcname, f))
1212 return
1213
1214 self._dbg(1, name)
1215
1216 # Create a TarInfo object from the file.
1217 tarinfo = self.gettarinfo(name, arcname)
1218
1219 if tarinfo is None:
1220 self._dbg(1, "tarfile: Unsupported type %r" % name)
1221 return
1222
1223 # Append the tar header and data to the archive.
1224 if tarinfo.isreg():
1225 f = file(name, "rb")
1226 self.addfile(tarinfo, f)
1227 f.close()
1228
1229 if tarinfo.type in (LNKTYPE, SYMTYPE, FIFOTYPE, CHRTYPE, BLKTYPE):
1230 tarinfo.size = 0L
1231 self.addfile(tarinfo)
1232
1233 if tarinfo.isdir():
1234 self.addfile(tarinfo)
1235 if recursive:
1236 for f in os.listdir(name):
1237 self.add(os.path.join(name, f), os.path.join(arcname, f))
1238
1239 def addfile(self, tarinfo, fileobj=None):
1240 """Add the TarInfo object `tarinfo' to the archive. If `fileobj' is
1241 given, tarinfo.size bytes are read from it and added to the archive.
1242 You can create TarInfo objects using gettarinfo().
1243 On Windows platforms, `fileobj' should always be opened with mode
1244 'rb' to avoid irritation about the file size.
1245 """
1246 self._check("aw")
1247
1248 tarinfo.name = normpath(tarinfo.name)
1249 if tarinfo.isdir():
1250 # directories should end with '/'
1251 tarinfo.name += "/"
1252
1253 if tarinfo.linkname:
1254 tarinfo.linkname = normpath(tarinfo.linkname)
1255
1256 if tarinfo.size > MAXSIZE_MEMBER:
Neal Norwitzd96d1012004-07-20 22:23:02 +00001257 if self.posix:
1258 raise ValueError, "file is too large (>= 8 GB)"
1259 else:
1260 self._dbg(2, "tarfile: Created GNU tar largefile header")
1261
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001262
1263 if len(tarinfo.linkname) > LENGTH_LINK:
1264 if self.posix:
1265 raise ValueError, "linkname is too long (>%d)" \
1266 % (LENGTH_LINK)
1267 else:
1268 self._create_gnulong(tarinfo.linkname, GNUTYPE_LONGLINK)
1269 tarinfo.linkname = tarinfo.linkname[:LENGTH_LINK -1]
1270 self._dbg(2, "tarfile: Created GNU tar extension LONGLINK")
1271
1272 if len(tarinfo.name) > LENGTH_NAME:
1273 if self.posix:
1274 prefix = tarinfo.name[:LENGTH_PREFIX + 1]
1275 while prefix and prefix[-1] != "/":
Tim Peters2c60f7a2003-01-29 03:49:43 +00001276 prefix = prefix[:-1]
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001277
1278 name = tarinfo.name[len(prefix):]
1279 prefix = prefix[:-1]
1280
1281 if not prefix or len(name) > LENGTH_NAME:
1282 raise ValueError, "name is too long (>%d)" \
1283 % (LENGTH_NAME)
1284
1285 tarinfo.name = name
1286 tarinfo.prefix = prefix
1287 else:
1288 self._create_gnulong(tarinfo.name, GNUTYPE_LONGNAME)
1289 tarinfo.name = tarinfo.name[:LENGTH_NAME - 1]
1290 self._dbg(2, "tarfile: Created GNU tar extension LONGNAME")
1291
1292 self.fileobj.write(tarinfo.tobuf())
1293 self.offset += BLOCKSIZE
1294
1295 # If there's data to follow, append it.
1296 if fileobj is not None:
1297 copyfileobj(fileobj, self.fileobj, tarinfo.size)
1298 blocks, remainder = divmod(tarinfo.size, BLOCKSIZE)
1299 if remainder > 0:
1300 self.fileobj.write(NUL * (BLOCKSIZE - remainder))
1301 blocks += 1
1302 self.offset += blocks * BLOCKSIZE
1303
Martin v. Löwisf3c56112004-09-18 09:08:52 +00001304 self.members.append(tarinfo)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001305
1306 def extract(self, member, path=""):
1307 """Extract a member from the archive to the current working directory,
1308 using its full name. Its file information is extracted as accurately
1309 as possible. `member' may be a filename or a TarInfo object. You can
1310 specify a different directory using `path'.
1311 """
1312 self._check("r")
1313
1314 if isinstance(member, TarInfo):
1315 tarinfo = member
1316 else:
1317 tarinfo = self.getmember(member)
1318
Neal Norwitza4f651a2004-07-20 22:07:44 +00001319 # Prepare the link target for makelink().
1320 if tarinfo.islnk():
1321 tarinfo._link_target = os.path.join(path, tarinfo.linkname)
1322
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001323 try:
1324 self._extract_member(tarinfo, os.path.join(path, tarinfo.name))
1325 except EnvironmentError, e:
1326 if self.errorlevel > 0:
1327 raise
1328 else:
1329 if e.filename is None:
1330 self._dbg(1, "tarfile: %s" % e.strerror)
1331 else:
1332 self._dbg(1, "tarfile: %s %r" % (e.strerror, e.filename))
1333 except ExtractError, e:
1334 if self.errorlevel > 1:
1335 raise
1336 else:
1337 self._dbg(1, "tarfile: %s" % e)
1338
1339 def extractfile(self, member):
1340 """Extract a member from the archive as a file object. `member' may be
1341 a filename or a TarInfo object. If `member' is a regular file, a
1342 file-like object is returned. If `member' is a link, a file-like
1343 object is constructed from the link's target. If `member' is none of
1344 the above, None is returned.
1345 The file-like object is read-only and provides the following
1346 methods: read(), readline(), readlines(), seek() and tell()
1347 """
1348 self._check("r")
1349
1350 if isinstance(member, TarInfo):
1351 tarinfo = member
1352 else:
1353 tarinfo = self.getmember(member)
1354
1355 if tarinfo.isreg():
1356 return self.fileobject(self, tarinfo)
1357
1358 elif tarinfo.type not in SUPPORTED_TYPES:
1359 # If a member's type is unknown, it is treated as a
1360 # regular file.
1361 return self.fileobject(self, tarinfo)
1362
1363 elif tarinfo.islnk() or tarinfo.issym():
1364 if isinstance(self.fileobj, _Stream):
1365 # A small but ugly workaround for the case that someone tries
1366 # to extract a (sym)link as a file-object from a non-seekable
1367 # stream of tar blocks.
1368 raise StreamError, "cannot extract (sym)link as file object"
1369 else:
1370 # A (sym)link's file object is it's target's file object.
1371 return self.extractfile(self._getmember(tarinfo.linkname,
1372 tarinfo))
1373 else:
1374 # If there's no data associated with the member (directory, chrdev,
1375 # blkdev, etc.), return None instead of a file object.
1376 return None
1377
1378 def _extract_member(self, tarinfo, targetpath):
1379 """Extract the TarInfo object tarinfo to a physical
1380 file called targetpath.
1381 """
1382 # Fetch the TarInfo object for the given name
1383 # and build the destination pathname, replacing
1384 # forward slashes to platform specific separators.
1385 if targetpath[-1:] == "/":
1386 targetpath = targetpath[:-1]
1387 targetpath = os.path.normpath(targetpath)
1388
1389 # Create all upper directories.
1390 upperdirs = os.path.dirname(targetpath)
1391 if upperdirs and not os.path.exists(upperdirs):
1392 ti = TarInfo()
1393 ti.name = upperdirs
1394 ti.type = DIRTYPE
1395 ti.mode = 0777
1396 ti.mtime = tarinfo.mtime
1397 ti.uid = tarinfo.uid
1398 ti.gid = tarinfo.gid
1399 ti.uname = tarinfo.uname
1400 ti.gname = tarinfo.gname
1401 try:
1402 self._extract_member(ti, ti.name)
1403 except:
1404 pass
1405
1406 if tarinfo.islnk() or tarinfo.issym():
1407 self._dbg(1, "%s -> %s" % (tarinfo.name, tarinfo.linkname))
1408 else:
1409 self._dbg(1, tarinfo.name)
1410
1411 if tarinfo.isreg():
1412 self.makefile(tarinfo, targetpath)
1413 elif tarinfo.isdir():
1414 self.makedir(tarinfo, targetpath)
1415 elif tarinfo.isfifo():
1416 self.makefifo(tarinfo, targetpath)
1417 elif tarinfo.ischr() or tarinfo.isblk():
1418 self.makedev(tarinfo, targetpath)
1419 elif tarinfo.islnk() or tarinfo.issym():
1420 self.makelink(tarinfo, targetpath)
1421 elif tarinfo.type not in SUPPORTED_TYPES:
1422 self.makeunknown(tarinfo, targetpath)
1423 else:
1424 self.makefile(tarinfo, targetpath)
1425
1426 self.chown(tarinfo, targetpath)
1427 if not tarinfo.issym():
1428 self.chmod(tarinfo, targetpath)
1429 self.utime(tarinfo, targetpath)
1430
1431 #--------------------------------------------------------------------------
1432 # Below are the different file methods. They are called via
1433 # _extract_member() when extract() is called. They can be replaced in a
1434 # subclass to implement other functionality.
1435
1436 def makedir(self, tarinfo, targetpath):
1437 """Make a directory called targetpath.
1438 """
1439 try:
1440 os.mkdir(targetpath)
1441 except EnvironmentError, e:
1442 if e.errno != errno.EEXIST:
1443 raise
1444
1445 def makefile(self, tarinfo, targetpath):
1446 """Make a file called targetpath.
1447 """
1448 source = self.extractfile(tarinfo)
1449 target = file(targetpath, "wb")
1450 copyfileobj(source, target)
1451 source.close()
1452 target.close()
1453
1454 def makeunknown(self, tarinfo, targetpath):
1455 """Make a file from a TarInfo object with an unknown type
1456 at targetpath.
1457 """
1458 self.makefile(tarinfo, targetpath)
1459 self._dbg(1, "tarfile: Unknown file type %r, " \
1460 "extracted as regular file." % tarinfo.type)
1461
1462 def makefifo(self, tarinfo, targetpath):
1463 """Make a fifo called targetpath.
1464 """
1465 if hasattr(os, "mkfifo"):
1466 os.mkfifo(targetpath)
1467 else:
1468 raise ExtractError, "fifo not supported by system"
1469
1470 def makedev(self, tarinfo, targetpath):
1471 """Make a character or block device called targetpath.
1472 """
1473 if not hasattr(os, "mknod") or not hasattr(os, "makedev"):
1474 raise ExtractError, "special devices not supported by system"
1475
1476 mode = tarinfo.mode
1477 if tarinfo.isblk():
1478 mode |= stat.S_IFBLK
1479 else:
1480 mode |= stat.S_IFCHR
1481
1482 os.mknod(targetpath, mode,
1483 os.makedev(tarinfo.devmajor, tarinfo.devminor))
1484
1485 def makelink(self, tarinfo, targetpath):
1486 """Make a (symbolic) link called targetpath. If it cannot be created
1487 (platform limitation), we try to make a copy of the referenced file
1488 instead of a link.
1489 """
1490 linkpath = tarinfo.linkname
1491 try:
1492 if tarinfo.issym():
1493 os.symlink(linkpath, targetpath)
1494 else:
Neal Norwitza4f651a2004-07-20 22:07:44 +00001495 # See extract().
1496 os.link(tarinfo._link_target, targetpath)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001497 except AttributeError:
1498 if tarinfo.issym():
1499 linkpath = os.path.join(os.path.dirname(tarinfo.name),
1500 linkpath)
1501 linkpath = normpath(linkpath)
1502
1503 try:
1504 self._extract_member(self.getmember(linkpath), targetpath)
1505 except (EnvironmentError, KeyError), e:
1506 linkpath = os.path.normpath(linkpath)
1507 try:
1508 shutil.copy2(linkpath, targetpath)
1509 except EnvironmentError, e:
1510 raise IOError, "link could not be created"
1511
1512 def chown(self, tarinfo, targetpath):
1513 """Set owner of targetpath according to tarinfo.
1514 """
1515 if pwd and hasattr(os, "geteuid") and os.geteuid() == 0:
1516 # We have to be root to do so.
1517 try:
1518 g = grp.getgrnam(tarinfo.gname)[2]
1519 except KeyError:
1520 try:
1521 g = grp.getgrgid(tarinfo.gid)[2]
1522 except KeyError:
1523 g = os.getgid()
1524 try:
1525 u = pwd.getpwnam(tarinfo.uname)[2]
1526 except KeyError:
1527 try:
1528 u = pwd.getpwuid(tarinfo.uid)[2]
1529 except KeyError:
1530 u = os.getuid()
1531 try:
1532 if tarinfo.issym() and hasattr(os, "lchown"):
1533 os.lchown(targetpath, u, g)
1534 else:
Andrew MacIntyre7970d202003-02-19 12:51:34 +00001535 if sys.platform != "os2emx":
1536 os.chown(targetpath, u, g)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001537 except EnvironmentError, e:
1538 raise ExtractError, "could not change owner"
1539
1540 def chmod(self, tarinfo, targetpath):
1541 """Set file permissions of targetpath according to tarinfo.
1542 """
Jack Jansen834eff62003-03-07 12:47:06 +00001543 if hasattr(os, 'chmod'):
1544 try:
1545 os.chmod(targetpath, tarinfo.mode)
1546 except EnvironmentError, e:
1547 raise ExtractError, "could not change mode"
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001548
1549 def utime(self, tarinfo, targetpath):
1550 """Set modification time of targetpath according to tarinfo.
1551 """
Jack Jansen834eff62003-03-07 12:47:06 +00001552 if not hasattr(os, 'utime'):
Tim Petersf9347782003-03-07 15:36:41 +00001553 return
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001554 if sys.platform == "win32" and tarinfo.isdir():
1555 # According to msdn.microsoft.com, it is an error (EACCES)
1556 # to use utime() on directories.
1557 return
1558 try:
1559 os.utime(targetpath, (tarinfo.mtime, tarinfo.mtime))
1560 except EnvironmentError, e:
1561 raise ExtractError, "could not change modification time"
1562
1563 #--------------------------------------------------------------------------
1564
1565 def next(self):
1566 """Return the next member of the archive as a TarInfo object, when
1567 TarFile is opened for reading. Return None if there is no more
1568 available.
1569 """
1570 self._check("ra")
1571 if self.firstmember is not None:
1572 m = self.firstmember
1573 self.firstmember = None
1574 return m
1575
1576 # Read the next block.
Andrew M. Kuchling864bba12004-07-10 22:02:11 +00001577 self.fileobj.seek(self.offset)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001578 while True:
1579 buf = self.fileobj.read(BLOCKSIZE)
1580 if not buf:
1581 return None
1582 try:
1583 tarinfo = TarInfo.frombuf(buf)
1584 except ValueError:
1585 if self.ignore_zeros:
1586 if buf.count(NUL) == BLOCKSIZE:
1587 adj = "empty"
1588 else:
1589 adj = "invalid"
1590 self._dbg(2, "0x%X: %s block" % (self.offset, adj))
1591 self.offset += BLOCKSIZE
1592 continue
1593 else:
1594 # Block is empty or unreadable.
Andrew M. Kuchling864bba12004-07-10 22:02:11 +00001595 if self.offset == 0:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001596 # If the first block is invalid. That does not
1597 # look like a tar archive we can handle.
1598 raise ReadError,"empty, unreadable or compressed file"
1599 return None
1600 break
1601
1602 # We shouldn't rely on this checksum, because some tar programs
1603 # calculate it differently and it is merely validating the
1604 # header block. We could just as well skip this part, which would
1605 # have a slight effect on performance...
1606 if tarinfo.chksum != calc_chksum(buf):
1607 self._dbg(1, "tarfile: Bad Checksum %r" % tarinfo.name)
1608
1609 # Set the TarInfo object's offset to the current position of the
1610 # TarFile and set self.offset to the position where the data blocks
1611 # should begin.
1612 tarinfo.offset = self.offset
1613 self.offset += BLOCKSIZE
1614
1615 # Check if the TarInfo object has a typeflag for which a callback
1616 # method is registered in the TYPE_METH. If so, then call it.
1617 if tarinfo.type in self.TYPE_METH:
Andrew M. Kuchling864bba12004-07-10 22:02:11 +00001618 return self.TYPE_METH[tarinfo.type](self, tarinfo)
1619
1620 tarinfo.offset_data = self.offset
1621 if tarinfo.isreg() or tarinfo.type not in SUPPORTED_TYPES:
1622 # Skip the following data blocks.
1623 self.offset += self._block(tarinfo.size)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001624
1625 if tarinfo.isreg() and tarinfo.name[:-1] == "/":
1626 # some old tar programs don't know DIRTYPE
1627 tarinfo.type = DIRTYPE
1628
Martin v. Löwisf3c56112004-09-18 09:08:52 +00001629 self.members.append(tarinfo)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001630 return tarinfo
1631
1632 #--------------------------------------------------------------------------
1633 # Below are some methods which are called for special typeflags in the
1634 # next() method, e.g. for unwrapping GNU longname/longlink blocks. They
1635 # are registered in TYPE_METH below. You can register your own methods
1636 # with this mapping.
1637 # A registered method is called with a TarInfo object as only argument.
1638 #
1639 # During its execution the method MUST perform the following tasks:
1640 # 1. set tarinfo.offset_data to the position where the data blocks begin,
1641 # if there is data to follow.
1642 # 2. set self.offset to the position where the next member's header will
1643 # begin.
Martin v. Löwisf3c56112004-09-18 09:08:52 +00001644 # 3. append the tarinfo object to self.members, if it is supposed to appear
1645 # as a member of the TarFile object.
Andrew M. Kuchling864bba12004-07-10 22:02:11 +00001646 # 4. return tarinfo or another valid TarInfo object.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001647
1648 def proc_gnulong(self, tarinfo):
1649 """Evaluate the blocks that hold a GNU longname
1650 or longlink member.
1651 """
1652 buf = ""
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001653 count = tarinfo.size
1654 while count > 0:
1655 block = self.fileobj.read(BLOCKSIZE)
1656 buf += block
1657 self.offset += BLOCKSIZE
1658 count -= BLOCKSIZE
1659
Andrew M. Kuchling864bba12004-07-10 22:02:11 +00001660 # Fetch the next header
1661 next = self.next()
1662
1663 next.offset = tarinfo.offset
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001664 if tarinfo.type == GNUTYPE_LONGNAME:
Andrew M. Kuchling864bba12004-07-10 22:02:11 +00001665 next.name = nts(buf)
1666 elif tarinfo.type == GNUTYPE_LONGLINK:
1667 next.linkname = nts(buf)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001668
Andrew M. Kuchling864bba12004-07-10 22:02:11 +00001669 return next
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001670
1671 def proc_sparse(self, tarinfo):
1672 """Analyze a GNU sparse header plus extra headers.
1673 """
1674 buf = tarinfo.tobuf()
1675 sp = _ringbuffer()
1676 pos = 386
1677 lastpos = 0L
1678 realpos = 0L
1679 # There are 4 possible sparse structs in the
1680 # first header.
1681 for i in xrange(4):
1682 try:
1683 offset = int(buf[pos:pos + 12], 8)
1684 numbytes = int(buf[pos + 12:pos + 24], 8)
1685 except ValueError:
1686 break
1687 if offset > lastpos:
1688 sp.append(_hole(lastpos, offset - lastpos))
1689 sp.append(_data(offset, numbytes, realpos))
1690 realpos += numbytes
1691 lastpos = offset + numbytes
1692 pos += 24
1693
1694 isextended = ord(buf[482])
1695 origsize = int(buf[483:495], 8)
1696
1697 # If the isextended flag is given,
1698 # there are extra headers to process.
1699 while isextended == 1:
1700 buf = self.fileobj.read(BLOCKSIZE)
1701 self.offset += BLOCKSIZE
1702 pos = 0
1703 for i in xrange(21):
1704 try:
1705 offset = int(buf[pos:pos + 12], 8)
1706 numbytes = int(buf[pos + 12:pos + 24], 8)
1707 except ValueError:
1708 break
1709 if offset > lastpos:
1710 sp.append(_hole(lastpos, offset - lastpos))
1711 sp.append(_data(offset, numbytes, realpos))
1712 realpos += numbytes
1713 lastpos = offset + numbytes
1714 pos += 24
1715 isextended = ord(buf[504])
1716
1717 if lastpos < origsize:
1718 sp.append(_hole(lastpos, origsize - lastpos))
1719
1720 tarinfo.sparse = sp
1721
1722 tarinfo.offset_data = self.offset
1723 self.offset += self._block(tarinfo.size)
1724 tarinfo.size = origsize
Andrew M. Kuchling864bba12004-07-10 22:02:11 +00001725
Martin v. Löwisf3c56112004-09-18 09:08:52 +00001726 self.members.append(tarinfo)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001727 return tarinfo
1728
1729 # The type mapping for the next() method. The keys are single character
1730 # strings, the typeflag. The values are methods which are called when
1731 # next() encounters such a typeflag.
1732 TYPE_METH = {
1733 GNUTYPE_LONGNAME: proc_gnulong,
1734 GNUTYPE_LONGLINK: proc_gnulong,
1735 GNUTYPE_SPARSE: proc_sparse
1736 }
1737
1738 #--------------------------------------------------------------------------
1739 # Little helper methods:
1740
1741 def _block(self, count):
1742 """Round up a byte count by BLOCKSIZE and return it,
1743 e.g. _block(834) => 1024.
1744 """
1745 blocks, remainder = divmod(count, BLOCKSIZE)
1746 if remainder:
1747 blocks += 1
1748 return blocks * BLOCKSIZE
1749
1750 def _getmember(self, name, tarinfo=None):
1751 """Find an archive member by name from bottom to top.
1752 If tarinfo is given, it is used as the starting point.
1753 """
Martin v. Löwisf3c56112004-09-18 09:08:52 +00001754 # Ensure that all members have been loaded.
1755 members = self.getmembers()
1756
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001757 if tarinfo is None:
Martin v. Löwisf3c56112004-09-18 09:08:52 +00001758 end = len(members)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001759 else:
Martin v. Löwisf3c56112004-09-18 09:08:52 +00001760 end = members.index(tarinfo)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001761
1762 for i in xrange(end - 1, -1, -1):
Martin v. Löwisf3c56112004-09-18 09:08:52 +00001763 if name == members[i].name:
1764 return members[i]
Andrew M. Kuchling864bba12004-07-10 22:02:11 +00001765
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001766 def _load(self):
1767 """Read through the entire archive file and look for readable
1768 members.
1769 """
1770 while True:
1771 tarinfo = self.next()
1772 if tarinfo is None:
1773 break
1774 self._loaded = True
1775
1776 def _check(self, mode=None):
1777 """Check if TarFile is still open, and if the operation's mode
1778 corresponds to TarFile's mode.
1779 """
1780 if self.closed:
1781 raise IOError, "%s is closed" % self.__class__.__name__
1782 if mode is not None and self._mode not in mode:
1783 raise IOError, "bad operation for mode %r" % self._mode
1784
1785 def __iter__(self):
1786 """Provide an iterator object.
1787 """
1788 if self._loaded:
1789 return iter(self.members)
1790 else:
1791 return TarIter(self)
1792
1793 def _create_gnulong(self, name, type):
1794 """Write a GNU longname/longlink member to the TarFile.
1795 It consists of an extended tar header, with the length
1796 of the longname as size, followed by data blocks,
1797 which contain the longname as a null terminated string.
1798 """
Neal Norwitz0662f8a2004-07-20 21:54:18 +00001799 name += NUL
1800
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001801 tarinfo = TarInfo()
1802 tarinfo.name = "././@LongLink"
1803 tarinfo.type = type
1804 tarinfo.mode = 0
1805 tarinfo.size = len(name)
1806
1807 # write extended header
1808 self.fileobj.write(tarinfo.tobuf())
Neal Norwitz0662f8a2004-07-20 21:54:18 +00001809 self.offset += BLOCKSIZE
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001810 # write name blocks
1811 self.fileobj.write(name)
1812 blocks, remainder = divmod(tarinfo.size, BLOCKSIZE)
1813 if remainder > 0:
1814 self.fileobj.write(NUL * (BLOCKSIZE - remainder))
1815 blocks += 1
1816 self.offset += blocks * BLOCKSIZE
1817
1818 def _dbg(self, level, msg):
1819 """Write debugging output to sys.stderr.
1820 """
1821 if level <= self.debug:
1822 print >> sys.stderr, msg
1823# class TarFile
1824
1825class TarIter:
1826 """Iterator Class.
1827
1828 for tarinfo in TarFile(...):
1829 suite...
1830 """
1831
1832 def __init__(self, tarfile):
1833 """Construct a TarIter object.
1834 """
1835 self.tarfile = tarfile
1836 def __iter__(self):
1837 """Return iterator object.
1838 """
1839 return self
1840 def next(self):
1841 """Return the next item using TarFile's next() method.
1842 When all members have been read, set TarFile as _loaded.
1843 """
1844 tarinfo = self.tarfile.next()
1845 if not tarinfo:
1846 self.tarfile._loaded = True
1847 raise StopIteration
1848 return tarinfo
1849
1850# Helper classes for sparse file support
1851class _section:
1852 """Base class for _data and _hole.
1853 """
1854 def __init__(self, offset, size):
1855 self.offset = offset
1856 self.size = size
1857 def __contains__(self, offset):
1858 return self.offset <= offset < self.offset + self.size
1859
1860class _data(_section):
1861 """Represent a data section in a sparse file.
1862 """
1863 def __init__(self, offset, size, realpos):
1864 _section.__init__(self, offset, size)
1865 self.realpos = realpos
1866
1867class _hole(_section):
1868 """Represent a hole section in a sparse file.
1869 """
1870 pass
1871
1872class _ringbuffer(list):
1873 """Ringbuffer class which increases performance
1874 over a regular list.
1875 """
1876 def __init__(self):
1877 self.idx = 0
1878 def find(self, offset):
1879 idx = self.idx
1880 while True:
1881 item = self[idx]
1882 if offset in item:
1883 break
1884 idx += 1
1885 if idx == len(self):
1886 idx = 0
1887 if idx == self.idx:
1888 # End of File
1889 return None
1890 self.idx = idx
1891 return item
1892
1893#---------------------------------------------
1894# zipfile compatible TarFile class
1895#---------------------------------------------
1896TAR_PLAIN = 0 # zipfile.ZIP_STORED
1897TAR_GZIPPED = 8 # zipfile.ZIP_DEFLATED
1898class TarFileCompat:
1899 """TarFile class compatible with standard module zipfile's
1900 ZipFile class.
1901 """
1902 def __init__(self, file, mode="r", compression=TAR_PLAIN):
1903 if compression == TAR_PLAIN:
1904 self.tarfile = TarFile.taropen(file, mode)
1905 elif compression == TAR_GZIPPED:
1906 self.tarfile = TarFile.gzopen(file, mode)
1907 else:
1908 raise ValueError, "unknown compression constant"
1909 if mode[0:1] == "r":
1910 members = self.tarfile.getmembers()
1911 for i in xrange(len(members)):
1912 m = members[i]
1913 m.filename = m.name
1914 m.file_size = m.size
1915 m.date_time = time.gmtime(m.mtime)[:6]
1916 def namelist(self):
1917 return map(lambda m: m.name, self.infolist())
1918 def infolist(self):
1919 return filter(lambda m: m.type in REGULAR_TYPES,
1920 self.tarfile.getmembers())
1921 def printdir(self):
1922 self.tarfile.list()
1923 def testzip(self):
1924 return
1925 def getinfo(self, name):
1926 return self.tarfile.getmember(name)
1927 def read(self, name):
1928 return self.tarfile.extractfile(self.tarfile.getmember(name)).read()
1929 def write(self, filename, arcname=None, compress_type=None):
1930 self.tarfile.add(filename, arcname)
1931 def writestr(self, zinfo, bytes):
1932 import StringIO
1933 import calendar
1934 zinfo.name = zinfo.filename
1935 zinfo.size = zinfo.file_size
1936 zinfo.mtime = calendar.timegm(zinfo.date_time)
1937 self.tarfile.addfile(zinfo, StringIO.StringIO(bytes))
1938 def close(self):
1939 self.tarfile.close()
1940#class TarFileCompat
1941
1942#--------------------
1943# exported functions
1944#--------------------
1945def is_tarfile(name):
1946 """Return True if name points to a tar archive that we
1947 are able to handle, else return False.
1948 """
1949 try:
1950 t = open(name)
1951 t.close()
1952 return True
1953 except TarError:
1954 return False
1955
1956open = TarFile.open