blob: 06d859087bd89711e4394dd5e2cc478de9bfbe7d [file] [log] [blame]
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001#!/usr/bin/env python
2# -*- coding: iso-8859-1 -*-
3#-------------------------------------------------------------------
4# tarfile.py
5#-------------------------------------------------------------------
6# Copyright (C) 2002 Lars Gustäbel <lars@gustaebel.de>
7# All rights reserved.
8#
9# Permission is hereby granted, free of charge, to any person
10# obtaining a copy of this software and associated documentation
11# files (the "Software"), to deal in the Software without
12# restriction, including without limitation the rights to use,
13# copy, modify, merge, publish, distribute, sublicense, and/or sell
14# copies of the Software, and to permit persons to whom the
15# Software is furnished to do so, subject to the following
16# conditions:
17#
18# The above copyright notice and this permission notice shall be
19# included in all copies or substantial portions of the Software.
20#
21# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
22# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
23# OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
24# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
25# HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
26# WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
27# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
28# OTHER DEALINGS IN THE SOFTWARE.
29#
30"""Read from and write to tar format archives.
31"""
32
33__version__ = "$Revision$"
34# $Source$
35
36version = "0.6.4"
37__author__ = "Lars Gustäbel (lars@gustaebel.de)"
38__date__ = "$Date$"
39__cvsid__ = "$Id$"
40__credits__ = "Gustavo Niemeyer, Niels Gustäbel, Richard Townsend."
41
42#---------
43# Imports
44#---------
45import sys
46import os
47import shutil
48import stat
49import errno
50import time
51import struct
52
Jack Jansencfc49022003-03-07 13:37:32 +000053if sys.platform == 'mac':
54 # This module needs work for MacOS9, especially in the area of pathname
55 # handling. In many places it is assumed a simple substitution of / by the
56 # local os.path.sep is good enough to convert pathnames, but this does not
57 # work with the mac rooted:path:name versus :nonrooted:path:name syntax
58 raise ImportError, "tarfile does not work for platform==mac"
59
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +000060try:
61 import grp, pwd
62except ImportError:
63 grp = pwd = None
64
65# from tarfile import *
66__all__ = ["TarFile", "TarInfo", "is_tarfile", "TarError"]
67
68#---------------------------------------------------------
69# tar constants
70#---------------------------------------------------------
71NUL = "\0" # the null character
72BLOCKSIZE = 512 # length of processing blocks
73RECORDSIZE = BLOCKSIZE * 20 # length of records
74MAGIC = "ustar" # magic tar string
75VERSION = "00" # version number
76
77LENGTH_NAME = 100 # maximum length of a filename
78LENGTH_LINK = 100 # maximum length of a linkname
79LENGTH_PREFIX = 155 # maximum length of the prefix field
80MAXSIZE_MEMBER = 077777777777L # maximum size of a file (11 octal digits)
81
82REGTYPE = "0" # regular file
83AREGTYPE = "\0" # regular file
84LNKTYPE = "1" # link (inside tarfile)
85SYMTYPE = "2" # symbolic link
86CHRTYPE = "3" # character special device
87BLKTYPE = "4" # block special device
88DIRTYPE = "5" # directory
89FIFOTYPE = "6" # fifo special device
90CONTTYPE = "7" # contiguous file
91
92GNUTYPE_LONGNAME = "L" # GNU tar extension for longnames
93GNUTYPE_LONGLINK = "K" # GNU tar extension for longlink
94GNUTYPE_SPARSE = "S" # GNU tar extension for sparse file
95
96#---------------------------------------------------------
97# tarfile constants
98#---------------------------------------------------------
99SUPPORTED_TYPES = (REGTYPE, AREGTYPE, LNKTYPE, # file types that tarfile
100 SYMTYPE, DIRTYPE, FIFOTYPE, # can cope with.
101 CONTTYPE, CHRTYPE, BLKTYPE,
102 GNUTYPE_LONGNAME, GNUTYPE_LONGLINK,
103 GNUTYPE_SPARSE)
104
105REGULAR_TYPES = (REGTYPE, AREGTYPE, # file types that somehow
106 CONTTYPE, GNUTYPE_SPARSE) # represent regular files
107
108#---------------------------------------------------------
109# Bits used in the mode field, values in octal.
110#---------------------------------------------------------
111S_IFLNK = 0120000 # symbolic link
112S_IFREG = 0100000 # regular file
113S_IFBLK = 0060000 # block device
114S_IFDIR = 0040000 # directory
115S_IFCHR = 0020000 # character device
116S_IFIFO = 0010000 # fifo
117
118TSUID = 04000 # set UID on execution
119TSGID = 02000 # set GID on execution
120TSVTX = 01000 # reserved
121
122TUREAD = 0400 # read by owner
123TUWRITE = 0200 # write by owner
124TUEXEC = 0100 # execute/search by owner
125TGREAD = 0040 # read by group
126TGWRITE = 0020 # write by group
127TGEXEC = 0010 # execute/search by group
128TOREAD = 0004 # read by other
129TOWRITE = 0002 # write by other
130TOEXEC = 0001 # execute/search by other
131
132#---------------------------------------------------------
133# Some useful functions
134#---------------------------------------------------------
135def nts(s):
136 """Convert a null-terminated string buffer to a python string.
137 """
Andrew M. Kuchling864bba12004-07-10 22:02:11 +0000138 return s.rstrip(NUL)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000139
140def calc_chksum(buf):
141 """Calculate the checksum for a member's header. It's a simple addition
142 of all bytes, treating the chksum field as if filled with spaces.
143 buf is a 512 byte long string buffer which holds the header.
144 """
145 chk = 256 # chksum field is treated as blanks,
146 # so the initial value is 8 * ord(" ")
147 for c in buf[:148]: chk += ord(c) # sum up all bytes before chksum
148 for c in buf[156:]: chk += ord(c) # sum up all bytes after chksum
149 return chk
150
151def copyfileobj(src, dst, length=None):
152 """Copy length bytes from fileobj src to fileobj dst.
153 If length is None, copy the entire content.
154 """
155 if length == 0:
156 return
157 if length is None:
158 shutil.copyfileobj(src, dst)
159 return
160
161 BUFSIZE = 16 * 1024
162 blocks, remainder = divmod(length, BUFSIZE)
163 for b in xrange(blocks):
164 buf = src.read(BUFSIZE)
165 if len(buf) < BUFSIZE:
166 raise IOError, "end of file reached"
167 dst.write(buf)
168
169 if remainder != 0:
170 buf = src.read(remainder)
171 if len(buf) < remainder:
172 raise IOError, "end of file reached"
173 dst.write(buf)
174 return
175
176filemode_table = (
177 (S_IFLNK, "l",
178 S_IFREG, "-",
179 S_IFBLK, "b",
180 S_IFDIR, "d",
181 S_IFCHR, "c",
182 S_IFIFO, "p"),
183 (TUREAD, "r"),
184 (TUWRITE, "w"),
185 (TUEXEC, "x", TSUID, "S", TUEXEC|TSUID, "s"),
186 (TGREAD, "r"),
187 (TGWRITE, "w"),
188 (TGEXEC, "x", TSGID, "S", TGEXEC|TSGID, "s"),
189 (TOREAD, "r"),
190 (TOWRITE, "w"),
191 (TOEXEC, "x", TSVTX, "T", TOEXEC|TSVTX, "t"))
192
193def filemode(mode):
194 """Convert a file's mode to a string of the form
195 -rwxrwxrwx.
196 Used by TarFile.list()
197 """
198 s = ""
199 for t in filemode_table:
200 while True:
201 if mode & t[0] == t[0]:
202 s += t[1]
203 elif len(t) > 2:
204 t = t[2:]
205 continue
206 else:
207 s += "-"
208 break
209 return s
210
211if os.sep != "/":
212 normpath = lambda path: os.path.normpath(path).replace(os.sep, "/")
213else:
214 normpath = os.path.normpath
215
216class TarError(Exception):
217 """Base exception."""
218 pass
219class ExtractError(TarError):
220 """General exception for extract errors."""
221 pass
222class ReadError(TarError):
223 """Exception for unreadble tar archives."""
224 pass
225class CompressionError(TarError):
226 """Exception for unavailable compression methods."""
227 pass
228class StreamError(TarError):
229 """Exception for unsupported operations on stream-like TarFiles."""
230 pass
231
232#---------------------------
233# internal stream interface
234#---------------------------
235class _LowLevelFile:
236 """Low-level file object. Supports reading and writing.
237 It is used instead of a regular file object for streaming
238 access.
239 """
240
241 def __init__(self, name, mode):
242 mode = {
243 "r": os.O_RDONLY,
244 "w": os.O_WRONLY | os.O_CREAT | os.O_TRUNC,
245 }[mode]
246 if hasattr(os, "O_BINARY"):
247 mode |= os.O_BINARY
248 self.fd = os.open(name, mode)
249
250 def close(self):
251 os.close(self.fd)
252
253 def read(self, size):
254 return os.read(self.fd, size)
255
256 def write(self, s):
257 os.write(self.fd, s)
258
259class _Stream:
260 """Class that serves as an adapter between TarFile and
261 a stream-like object. The stream-like object only
262 needs to have a read() or write() method and is accessed
263 blockwise. Use of gzip or bzip2 compression is possible.
264 A stream-like object could be for example: sys.stdin,
265 sys.stdout, a socket, a tape device etc.
266
267 _Stream is intended to be used only internally.
268 """
269
270 def __init__(self, name, mode, type, fileobj, bufsize):
271 """Construct a _Stream object.
272 """
273 self._extfileobj = True
274 if fileobj is None:
275 fileobj = _LowLevelFile(name, mode)
276 self._extfileobj = False
277
278 self.name = name or ""
279 self.mode = mode
280 self.type = type
281 self.fileobj = fileobj
282 self.bufsize = bufsize
283 self.buf = ""
284 self.pos = 0L
285 self.closed = False
286
287 if type == "gz":
288 try:
289 import zlib
290 except ImportError:
291 raise CompressionError, "zlib module is not available"
292 self.zlib = zlib
293 self.crc = zlib.crc32("")
294 if mode == "r":
295 self._init_read_gz()
296 else:
297 self._init_write_gz()
298
299 if type == "bz2":
300 try:
301 import bz2
302 except ImportError:
303 raise CompressionError, "bz2 module is not available"
304 if mode == "r":
305 self.dbuf = ""
306 self.cmp = bz2.BZ2Decompressor()
307 else:
308 self.cmp = bz2.BZ2Compressor()
309
310 def __del__(self):
311 if not self.closed:
312 self.close()
313
314 def _init_write_gz(self):
315 """Initialize for writing with gzip compression.
316 """
317 self.cmp = self.zlib.compressobj(9, self.zlib.DEFLATED,
318 -self.zlib.MAX_WBITS,
319 self.zlib.DEF_MEM_LEVEL,
320 0)
321 timestamp = struct.pack("<L", long(time.time()))
322 self.__write("\037\213\010\010%s\002\377" % timestamp)
323 if self.name.endswith(".gz"):
324 self.name = self.name[:-3]
325 self.__write(self.name + NUL)
326
327 def write(self, s):
328 """Write string s to the stream.
329 """
330 if self.type == "gz":
331 self.crc = self.zlib.crc32(s, self.crc)
332 self.pos += len(s)
333 if self.type != "tar":
334 s = self.cmp.compress(s)
335 self.__write(s)
336
337 def __write(self, s):
338 """Write string s to the stream if a whole new block
339 is ready to be written.
340 """
341 self.buf += s
342 while len(self.buf) > self.bufsize:
343 self.fileobj.write(self.buf[:self.bufsize])
344 self.buf = self.buf[self.bufsize:]
345
346 def close(self):
347 """Close the _Stream object. No operation should be
348 done on it afterwards.
349 """
350 if self.closed:
351 return
352
Martin v. Löwisc234a522004-08-22 21:28:33 +0000353 if self.mode == "w" and self.type != "tar":
354 self.buf += self.cmp.flush()
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000355 if self.mode == "w" and self.buf:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000356 self.fileobj.write(self.buf)
357 self.buf = ""
358 if self.type == "gz":
359 self.fileobj.write(struct.pack("<l", self.crc))
Andrew M. Kuchling10a44492003-10-24 17:38:34 +0000360 self.fileobj.write(struct.pack("<L", self.pos & 0xffffFFFFL))
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000361
362 if not self._extfileobj:
363 self.fileobj.close()
364
365 self.closed = True
366
367 def _init_read_gz(self):
368 """Initialize for reading a gzip compressed fileobj.
369 """
370 self.cmp = self.zlib.decompressobj(-self.zlib.MAX_WBITS)
371 self.dbuf = ""
372
373 # taken from gzip.GzipFile with some alterations
374 if self.__read(2) != "\037\213":
375 raise ReadError, "not a gzip file"
376 if self.__read(1) != "\010":
377 raise CompressionError, "unsupported compression method"
378
379 flag = ord(self.__read(1))
380 self.__read(6)
381
382 if flag & 4:
383 xlen = ord(self.__read(1)) + 256 * ord(self.__read(1))
384 self.read(xlen)
385 if flag & 8:
386 while True:
387 s = self.__read(1)
388 if not s or s == NUL:
389 break
390 if flag & 16:
391 while True:
392 s = self.__read(1)
393 if not s or s == NUL:
394 break
395 if flag & 2:
396 self.__read(2)
397
398 def tell(self):
399 """Return the stream's file pointer position.
400 """
401 return self.pos
402
403 def seek(self, pos=0):
404 """Set the stream's file pointer to pos. Negative seeking
405 is forbidden.
406 """
407 if pos - self.pos >= 0:
408 blocks, remainder = divmod(pos - self.pos, self.bufsize)
409 for i in xrange(blocks):
410 self.read(self.bufsize)
411 self.read(remainder)
412 else:
413 raise StreamError, "seeking backwards is not allowed"
414 return self.pos
415
416 def read(self, size=None):
417 """Return the next size number of bytes from the stream.
418 If size is not defined, return all bytes of the stream
419 up to EOF.
420 """
421 if size is None:
422 t = []
423 while True:
424 buf = self._read(self.bufsize)
425 if not buf:
426 break
427 t.append(buf)
428 buf = "".join(t)
429 else:
430 buf = self._read(size)
431 self.pos += len(buf)
432 return buf
433
434 def _read(self, size):
435 """Return size bytes from the stream.
436 """
437 if self.type == "tar":
438 return self.__read(size)
439
440 c = len(self.dbuf)
441 t = [self.dbuf]
442 while c < size:
443 buf = self.__read(self.bufsize)
444 if not buf:
445 break
446 buf = self.cmp.decompress(buf)
447 t.append(buf)
448 c += len(buf)
449 t = "".join(t)
450 self.dbuf = t[size:]
451 return t[:size]
452
453 def __read(self, size):
454 """Return size bytes from stream. If internal buffer is empty,
455 read another block from the stream.
456 """
457 c = len(self.buf)
458 t = [self.buf]
459 while c < size:
460 buf = self.fileobj.read(self.bufsize)
461 if not buf:
462 break
463 t.append(buf)
464 c += len(buf)
465 t = "".join(t)
466 self.buf = t[size:]
467 return t[:size]
468# class _Stream
469
470#------------------------
471# Extraction file object
472#------------------------
473class ExFileObject(object):
474 """File-like object for reading an archive member.
475 Is returned by TarFile.extractfile(). Support for
476 sparse files included.
477 """
478
479 def __init__(self, tarfile, tarinfo):
480 self.fileobj = tarfile.fileobj
481 self.name = tarinfo.name
482 self.mode = "r"
483 self.closed = False
484 self.offset = tarinfo.offset_data
485 self.size = tarinfo.size
486 self.pos = 0L
487 self.linebuffer = ""
488 if tarinfo.issparse():
489 self.sparse = tarinfo.sparse
490 self.read = self._readsparse
491 else:
492 self.read = self._readnormal
493
494 def __read(self, size):
495 """Overloadable read method.
496 """
497 return self.fileobj.read(size)
498
499 def readline(self, size=-1):
500 """Read a line with approx. size. If size is negative,
501 read a whole line. readline() and read() must not
502 be mixed up (!).
503 """
504 if size < 0:
505 size = sys.maxint
506
507 nl = self.linebuffer.find("\n")
508 if nl >= 0:
509 nl = min(nl, size)
510 else:
511 size -= len(self.linebuffer)
Martin v. Löwisc11d6f12004-08-25 10:52:58 +0000512 while (nl < 0 and size > 0):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000513 buf = self.read(min(size, 100))
514 if not buf:
515 break
516 self.linebuffer += buf
517 size -= len(buf)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000518 nl = self.linebuffer.find("\n")
519 if nl == -1:
520 s = self.linebuffer
521 self.linebuffer = ""
522 return s
523 buf = self.linebuffer[:nl]
524 self.linebuffer = self.linebuffer[nl + 1:]
525 while buf[-1:] == "\r":
526 buf = buf[:-1]
527 return buf + "\n"
528
529 def readlines(self):
530 """Return a list with all (following) lines.
531 """
532 result = []
533 while True:
534 line = self.readline()
535 if not line: break
536 result.append(line)
537 return result
538
539 def _readnormal(self, size=None):
540 """Read operation for regular files.
541 """
542 if self.closed:
543 raise ValueError, "file is closed"
544 self.fileobj.seek(self.offset + self.pos)
545 bytesleft = self.size - self.pos
546 if size is None:
547 bytestoread = bytesleft
548 else:
549 bytestoread = min(size, bytesleft)
550 self.pos += bytestoread
551 return self.__read(bytestoread)
552
553 def _readsparse(self, size=None):
554 """Read operation for sparse files.
555 """
556 if self.closed:
557 raise ValueError, "file is closed"
558
559 if size is None:
560 size = self.size - self.pos
561
562 data = []
563 while size > 0:
564 buf = self._readsparsesection(size)
565 if not buf:
566 break
567 size -= len(buf)
568 data.append(buf)
569 return "".join(data)
570
571 def _readsparsesection(self, size):
572 """Read a single section of a sparse file.
573 """
574 section = self.sparse.find(self.pos)
575
576 if section is None:
577 return ""
578
579 toread = min(size, section.offset + section.size - self.pos)
580 if isinstance(section, _data):
581 realpos = section.realpos + self.pos - section.offset
582 self.pos += toread
583 self.fileobj.seek(self.offset + realpos)
584 return self.__read(toread)
585 else:
586 self.pos += toread
587 return NUL * toread
588
589 def tell(self):
590 """Return the current file position.
591 """
592 return self.pos
593
594 def seek(self, pos, whence=0):
595 """Seek to a position in the file.
596 """
597 self.linebuffer = ""
598 if whence == 0:
599 self.pos = min(max(pos, 0), self.size)
600 if whence == 1:
601 if pos < 0:
602 self.pos = max(self.pos + pos, 0)
603 else:
604 self.pos = min(self.pos + pos, self.size)
605 if whence == 2:
606 self.pos = max(min(self.size + pos, self.size), 0)
607
608 def close(self):
609 """Close the file object.
610 """
611 self.closed = True
612#class ExFileObject
613
614#------------------
615# Exported Classes
616#------------------
617class TarInfo(object):
618 """Informational class which holds the details about an
619 archive member given by a tar header block.
620 TarInfo objects are returned by TarFile.getmember(),
621 TarFile.getmembers() and TarFile.gettarinfo() and are
622 usually created internally.
623 """
624
625 def __init__(self, name=""):
626 """Construct a TarInfo object. name is the optional name
627 of the member.
628 """
629
630 self.name = name # member name (dirnames must end with '/')
631 self.mode = 0666 # file permissions
632 self.uid = 0 # user id
633 self.gid = 0 # group id
634 self.size = 0 # file size
635 self.mtime = 0 # modification time
636 self.chksum = 0 # header checksum
637 self.type = REGTYPE # member type
638 self.linkname = "" # link name
639 self.uname = "user" # user name
640 self.gname = "group" # group name
641 self.devmajor = 0 #-
642 self.devminor = 0 #-for use with CHRTYPE and BLKTYPE
643 self.prefix = "" # prefix to filename or holding information
644 # about sparse files
645
646 self.offset = 0 # the tar header starts here
647 self.offset_data = 0 # the file's data starts here
648
649 def __repr__(self):
650 return "<%s %r at %#x>" % (self.__class__.__name__,self.name,id(self))
651
652 def frombuf(cls, buf):
653 """Construct a TarInfo object from a 512 byte string buffer.
654 """
655 tarinfo = cls()
Neal Norwitzd96d1012004-07-20 22:23:02 +0000656 tarinfo.name = nts(buf[0:100])
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000657 tarinfo.mode = int(buf[100:108], 8)
658 tarinfo.uid = int(buf[108:116],8)
659 tarinfo.gid = int(buf[116:124],8)
Neal Norwitzd96d1012004-07-20 22:23:02 +0000660
661 # There are two possible codings for the size field we
662 # have to discriminate, see comment in tobuf() below.
663 if buf[124] != chr(0200):
664 tarinfo.size = long(buf[124:136], 8)
665 else:
666 tarinfo.size = 0L
667 for i in range(11):
668 tarinfo.size <<= 8
669 tarinfo.size += ord(buf[125 + i])
670
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000671 tarinfo.mtime = long(buf[136:148], 8)
672 tarinfo.chksum = int(buf[148:156], 8)
673 tarinfo.type = buf[156:157]
674 tarinfo.linkname = nts(buf[157:257])
675 tarinfo.uname = nts(buf[265:297])
676 tarinfo.gname = nts(buf[297:329])
677 try:
678 tarinfo.devmajor = int(buf[329:337], 8)
679 tarinfo.devminor = int(buf[337:345], 8)
680 except ValueError:
681 tarinfo.devmajor = tarinfo.devmajor = 0
Jack Jansen2b4b5a52003-04-22 22:03:11 +0000682 tarinfo.prefix = buf[345:500]
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000683
684 # The prefix field is used for filenames > 100 in
685 # the POSIX standard.
Jack Jansen2b4b5a52003-04-22 22:03:11 +0000686 # name = prefix + '/' + name
687 if tarinfo.type != GNUTYPE_SPARSE:
688 tarinfo.name = normpath(os.path.join(nts(tarinfo.prefix), tarinfo.name))
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000689
690 # Directory names should have a '/' at the end.
691 if tarinfo.isdir() and tarinfo.name[-1:] != "/":
692 tarinfo.name += "/"
693 return tarinfo
694
695 frombuf = classmethod(frombuf)
696
697 def tobuf(self):
698 """Return a tar header block as a 512 byte string.
699 """
Neal Norwitzd96d1012004-07-20 22:23:02 +0000700 # Prefer the size to be encoded as 11 octal ascii digits
701 # which is the most portable. If the size exceeds this
702 # limit (>= 8 GB), encode it as an 88-bit value which is
703 # a GNU tar feature.
704 if self.size <= MAXSIZE_MEMBER:
705 size = "%011o" % self.size
706 else:
707 s = self.size
708 size = ""
709 for i in range(11):
710 size = chr(s & 0377) + size
711 s >>= 8
712 size = chr(0200) + size
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000713
714 # The following code was contributed by Detlef Lannert.
715 parts = []
716 for value, fieldsize in (
Neal Norwitzd96d1012004-07-20 22:23:02 +0000717 (self.name, 100),
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000718 ("%07o" % (self.mode & 07777), 8),
719 ("%07o" % self.uid, 8),
720 ("%07o" % self.gid, 8),
Neal Norwitzd96d1012004-07-20 22:23:02 +0000721 (size, 12),
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000722 ("%011o" % self.mtime, 12),
723 (" ", 8),
724 (self.type, 1),
725 (self.linkname, 100),
726 (MAGIC, 6),
727 (VERSION, 2),
728 (self.uname, 32),
729 (self.gname, 32),
730 ("%07o" % self.devmajor, 8),
731 ("%07o" % self.devminor, 8),
732 (self.prefix, 155)
733 ):
734 l = len(value)
Andrew M. Kuchling864bba12004-07-10 22:02:11 +0000735 parts.append(value[:fieldsize] + (fieldsize - l) * NUL)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000736
737 buf = "".join(parts)
738 chksum = calc_chksum(buf)
739 buf = buf[:148] + "%06o\0" % chksum + buf[155:]
740 buf += (BLOCKSIZE - len(buf)) * NUL
741 self.buf = buf
742 return buf
743
744 def isreg(self):
745 return self.type in REGULAR_TYPES
746 def isfile(self):
747 return self.isreg()
748 def isdir(self):
749 return self.type == DIRTYPE
750 def issym(self):
751 return self.type == SYMTYPE
752 def islnk(self):
753 return self.type == LNKTYPE
754 def ischr(self):
755 return self.type == CHRTYPE
756 def isblk(self):
757 return self.type == BLKTYPE
758 def isfifo(self):
759 return self.type == FIFOTYPE
760 def issparse(self):
761 return self.type == GNUTYPE_SPARSE
762 def isdev(self):
763 return self.type in (CHRTYPE, BLKTYPE, FIFOTYPE)
764# class TarInfo
765
766class TarFile(object):
767 """The TarFile Class provides an interface to tar archives.
768 """
769
770 debug = 0 # May be set from 0 (no msgs) to 3 (all msgs)
771
772 dereference = False # If true, add content of linked file to the
773 # tar file, else the link.
774
775 ignore_zeros = False # If true, skips empty or invalid blocks and
776 # continues processing.
777
778 errorlevel = 0 # If 0, fatal errors only appear in debug
779 # messages (if debug >= 0). If > 0, errors
780 # are passed to the caller as exceptions.
781
Martin v. Löwis75b9da42004-08-18 13:57:44 +0000782 posix = False # If True, generates POSIX.1-1990-compliant
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000783 # archives (no GNU extensions!)
784
785 fileobject = ExFileObject
786
787 def __init__(self, name=None, mode="r", fileobj=None):
788 """Open an (uncompressed) tar archive `name'. `mode' is either 'r' to
789 read from an existing archive, 'a' to append data to an existing
790 file or 'w' to create a new file overwriting an existing one. `mode'
791 defaults to 'r'.
792 If `fileobj' is given, it is used for reading or writing data. If it
793 can be determined, `mode' is overridden by `fileobj's mode.
794 `fileobj' is not closed, when TarFile is closed.
795 """
796 self.name = name
797
798 if len(mode) > 1 or mode not in "raw":
799 raise ValueError, "mode must be 'r', 'a' or 'w'"
800 self._mode = mode
801 self.mode = {"r": "rb", "a": "r+b", "w": "wb"}[mode]
802
803 if not fileobj:
804 fileobj = file(self.name, self.mode)
805 self._extfileobj = False
806 else:
807 if self.name is None and hasattr(fileobj, "name"):
808 self.name = fileobj.name
809 if hasattr(fileobj, "mode"):
810 self.mode = fileobj.mode
811 self._extfileobj = True
812 self.fileobj = fileobj
813
814 # Init datastructures
815 self.closed = False
816 self.members = [] # list of members as TarInfo objects
817 self.membernames = [] # names of members
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000818 self._loaded = False # flag if all members have been read
819 self.offset = 0L # current position in the archive file
820 self.inodes = {} # dictionary caching the inodes of
821 # archive members already added
822
823 if self._mode == "r":
824 self.firstmember = None
825 self.firstmember = self.next()
826
827 if self._mode == "a":
828 # Move to the end of the archive,
829 # before the first empty block.
830 self.firstmember = None
831 while True:
832 try:
833 tarinfo = self.next()
834 except ReadError:
835 self.fileobj.seek(0)
836 break
837 if tarinfo is None:
838 self.fileobj.seek(- BLOCKSIZE, 1)
839 break
840
841 if self._mode in "aw":
842 self._loaded = True
843
844 #--------------------------------------------------------------------------
845 # Below are the classmethods which act as alternate constructors to the
846 # TarFile class. The open() method is the only one that is needed for
847 # public use; it is the "super"-constructor and is able to select an
848 # adequate "sub"-constructor for a particular compression using the mapping
849 # from OPEN_METH.
850 #
851 # This concept allows one to subclass TarFile without losing the comfort of
852 # the super-constructor. A sub-constructor is registered and made available
853 # by adding it to the mapping in OPEN_METH.
854
855 def open(cls, name=None, mode="r", fileobj=None, bufsize=20*512):
856 """Open a tar archive for reading, writing or appending. Return
857 an appropriate TarFile class.
858
859 mode:
860 'r' open for reading with transparent compression
861 'r:' open for reading exclusively uncompressed
862 'r:gz' open for reading with gzip compression
863 'r:bz2' open for reading with bzip2 compression
864 'a' or 'a:' open for appending
865 'w' or 'w:' open for writing without compression
866 'w:gz' open for writing with gzip compression
867 'w:bz2' open for writing with bzip2 compression
868 'r|' open an uncompressed stream of tar blocks for reading
869 'r|gz' open a gzip compressed stream of tar blocks
870 'r|bz2' open a bzip2 compressed stream of tar blocks
871 'w|' open an uncompressed stream for writing
872 'w|gz' open a gzip compressed stream for writing
873 'w|bz2' open a bzip2 compressed stream for writing
874 """
875
876 if not name and not fileobj:
877 raise ValueError, "nothing to open"
878
879 if ":" in mode:
880 filemode, comptype = mode.split(":", 1)
881 filemode = filemode or "r"
882 comptype = comptype or "tar"
883
884 # Select the *open() function according to
885 # given compression.
886 if comptype in cls.OPEN_METH:
887 func = getattr(cls, cls.OPEN_METH[comptype])
888 else:
889 raise CompressionError, "unknown compression type %r" % comptype
890 return func(name, filemode, fileobj)
891
892 elif "|" in mode:
893 filemode, comptype = mode.split("|", 1)
894 filemode = filemode or "r"
895 comptype = comptype or "tar"
896
897 if filemode not in "rw":
898 raise ValueError, "mode must be 'r' or 'w'"
899
900 t = cls(name, filemode,
901 _Stream(name, filemode, comptype, fileobj, bufsize))
902 t._extfileobj = False
903 return t
904
905 elif mode == "r":
906 # Find out which *open() is appropriate for opening the file.
907 for comptype in cls.OPEN_METH:
908 func = getattr(cls, cls.OPEN_METH[comptype])
909 try:
910 return func(name, "r", fileobj)
911 except (ReadError, CompressionError):
912 continue
913 raise ReadError, "file could not be opened successfully"
914
915 elif mode in "aw":
916 return cls.taropen(name, mode, fileobj)
917
918 raise ValueError, "undiscernible mode"
919
920 open = classmethod(open)
921
922 def taropen(cls, name, mode="r", fileobj=None):
923 """Open uncompressed tar archive name for reading or writing.
924 """
925 if len(mode) > 1 or mode not in "raw":
926 raise ValueError, "mode must be 'r', 'a' or 'w'"
927 return cls(name, mode, fileobj)
928
929 taropen = classmethod(taropen)
930
931 def gzopen(cls, name, mode="r", fileobj=None, compresslevel=9):
932 """Open gzip compressed tar archive name for reading or writing.
933 Appending is not allowed.
934 """
935 if len(mode) > 1 or mode not in "rw":
936 raise ValueError, "mode must be 'r' or 'w'"
937
938 try:
939 import gzip
Neal Norwitz4ec68242003-04-11 03:05:56 +0000940 gzip.GzipFile
941 except (ImportError, AttributeError):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000942 raise CompressionError, "gzip module is not available"
943
944 pre, ext = os.path.splitext(name)
945 pre = os.path.basename(pre)
946 if ext == ".tgz":
947 ext = ".tar"
948 if ext == ".gz":
949 ext = ""
950 tarname = pre + ext
951
952 if fileobj is None:
953 fileobj = file(name, mode + "b")
954
955 if mode != "r":
956 name = tarname
957
958 try:
959 t = cls.taropen(tarname, mode,
960 gzip.GzipFile(name, mode, compresslevel, fileobj)
961 )
962 except IOError:
963 raise ReadError, "not a gzip file"
964 t._extfileobj = False
965 return t
966
967 gzopen = classmethod(gzopen)
968
969 def bz2open(cls, name, mode="r", fileobj=None, compresslevel=9):
970 """Open bzip2 compressed tar archive name for reading or writing.
971 Appending is not allowed.
972 """
973 if len(mode) > 1 or mode not in "rw":
974 raise ValueError, "mode must be 'r' or 'w'."
975
976 try:
977 import bz2
978 except ImportError:
979 raise CompressionError, "bz2 module is not available"
980
981 pre, ext = os.path.splitext(name)
982 pre = os.path.basename(pre)
983 if ext == ".tbz2":
984 ext = ".tar"
985 if ext == ".bz2":
986 ext = ""
987 tarname = pre + ext
988
989 if fileobj is not None:
990 raise ValueError, "no support for external file objects"
991
992 try:
993 t = cls.taropen(tarname, mode, bz2.BZ2File(name, mode, compresslevel=compresslevel))
994 except IOError:
995 raise ReadError, "not a bzip2 file"
996 t._extfileobj = False
997 return t
998
999 bz2open = classmethod(bz2open)
1000
1001 # All *open() methods are registered here.
1002 OPEN_METH = {
1003 "tar": "taropen", # uncompressed tar
1004 "gz": "gzopen", # gzip compressed tar
1005 "bz2": "bz2open" # bzip2 compressed tar
1006 }
1007
1008 #--------------------------------------------------------------------------
1009 # The public methods which TarFile provides:
1010
1011 def close(self):
1012 """Close the TarFile. In write-mode, two finishing zero blocks are
1013 appended to the archive.
1014 """
1015 if self.closed:
1016 return
1017
1018 if self._mode in "aw":
1019 self.fileobj.write(NUL * (BLOCKSIZE * 2))
1020 self.offset += (BLOCKSIZE * 2)
1021 # fill up the end with zero-blocks
1022 # (like option -b20 for tar does)
1023 blocks, remainder = divmod(self.offset, RECORDSIZE)
1024 if remainder > 0:
1025 self.fileobj.write(NUL * (RECORDSIZE - remainder))
1026
1027 if not self._extfileobj:
1028 self.fileobj.close()
1029 self.closed = True
1030
1031 def getmember(self, name):
1032 """Return a TarInfo object for member `name'. If `name' can not be
1033 found in the archive, KeyError is raised. If a member occurs more
1034 than once in the archive, its last occurence is assumed to be the
1035 most up-to-date version.
1036 """
1037 self._check()
1038 if name not in self.membernames and not self._loaded:
1039 self._load()
1040 if name not in self.membernames:
1041 raise KeyError, "filename %r not found" % name
1042 return self._getmember(name)
1043
1044 def getmembers(self):
1045 """Return the members of the archive as a list of TarInfo objects. The
1046 list has the same order as the members in the archive.
1047 """
1048 self._check()
1049 if not self._loaded: # if we want to obtain a list of
1050 self._load() # all members, we first have to
1051 # scan the whole archive.
1052 return self.members
1053
1054 def getnames(self):
1055 """Return the members of the archive as a list of their names. It has
1056 the same order as the list returned by getmembers().
1057 """
1058 self._check()
1059 if not self._loaded:
1060 self._load()
1061 return self.membernames
1062
1063 def gettarinfo(self, name=None, arcname=None, fileobj=None):
1064 """Create a TarInfo object for either the file `name' or the file
1065 object `fileobj' (using os.fstat on its file descriptor). You can
1066 modify some of the TarInfo's attributes before you add it using
1067 addfile(). If given, `arcname' specifies an alternative name for the
1068 file in the archive.
1069 """
1070 self._check("aw")
1071
1072 # When fileobj is given, replace name by
1073 # fileobj's real name.
1074 if fileobj is not None:
1075 name = fileobj.name
1076
1077 # Building the name of the member in the archive.
1078 # Backward slashes are converted to forward slashes,
1079 # Absolute paths are turned to relative paths.
1080 if arcname is None:
1081 arcname = name
1082 arcname = normpath(arcname)
1083 drv, arcname = os.path.splitdrive(arcname)
1084 while arcname[0:1] == "/":
1085 arcname = arcname[1:]
1086
1087 # Now, fill the TarInfo object with
1088 # information specific for the file.
1089 tarinfo = TarInfo()
1090
1091 # Use os.stat or os.lstat, depending on platform
1092 # and if symlinks shall be resolved.
1093 if fileobj is None:
1094 if hasattr(os, "lstat") and not self.dereference:
1095 statres = os.lstat(name)
1096 else:
1097 statres = os.stat(name)
1098 else:
1099 statres = os.fstat(fileobj.fileno())
1100 linkname = ""
1101
1102 stmd = statres.st_mode
1103 if stat.S_ISREG(stmd):
1104 inode = (statres.st_ino, statres.st_dev)
1105 if inode in self.inodes and not self.dereference:
1106 # Is it a hardlink to an already
1107 # archived file?
1108 type = LNKTYPE
1109 linkname = self.inodes[inode]
1110 else:
1111 # The inode is added only if its valid.
1112 # For win32 it is always 0.
1113 type = REGTYPE
1114 if inode[0]:
1115 self.inodes[inode] = arcname
1116 elif stat.S_ISDIR(stmd):
1117 type = DIRTYPE
1118 if arcname[-1:] != "/":
1119 arcname += "/"
1120 elif stat.S_ISFIFO(stmd):
1121 type = FIFOTYPE
1122 elif stat.S_ISLNK(stmd):
1123 type = SYMTYPE
1124 linkname = os.readlink(name)
1125 elif stat.S_ISCHR(stmd):
1126 type = CHRTYPE
1127 elif stat.S_ISBLK(stmd):
1128 type = BLKTYPE
1129 else:
1130 return None
1131
1132 # Fill the TarInfo object with all
1133 # information we can get.
1134 tarinfo.name = arcname
1135 tarinfo.mode = stmd
1136 tarinfo.uid = statres.st_uid
1137 tarinfo.gid = statres.st_gid
Martin v. Löwis61d77e02004-08-20 06:35:46 +00001138 if stat.S_ISDIR(stmd):
1139 # For a directory, the size must be 0
1140 tarinfo.size = 0
1141 else:
1142 tarinfo.size = statres.st_size
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001143 tarinfo.mtime = statres.st_mtime
1144 tarinfo.type = type
1145 tarinfo.linkname = linkname
1146 if pwd:
1147 try:
1148 tarinfo.uname = pwd.getpwuid(tarinfo.uid)[0]
1149 except KeyError:
1150 pass
1151 if grp:
1152 try:
1153 tarinfo.gname = grp.getgrgid(tarinfo.gid)[0]
1154 except KeyError:
1155 pass
1156
1157 if type in (CHRTYPE, BLKTYPE):
1158 if hasattr(os, "major") and hasattr(os, "minor"):
1159 tarinfo.devmajor = os.major(statres.st_rdev)
1160 tarinfo.devminor = os.minor(statres.st_rdev)
1161 return tarinfo
1162
1163 def list(self, verbose=True):
1164 """Print a table of contents to sys.stdout. If `verbose' is False, only
1165 the names of the members are printed. If it is True, an `ls -l'-like
1166 output is produced.
1167 """
1168 self._check()
1169
1170 for tarinfo in self:
1171 if verbose:
1172 print filemode(tarinfo.mode),
1173 print "%s/%s" % (tarinfo.uname or tarinfo.uid,
1174 tarinfo.gname or tarinfo.gid),
1175 if tarinfo.ischr() or tarinfo.isblk():
1176 print "%10s" % ("%d,%d" \
1177 % (tarinfo.devmajor, tarinfo.devminor)),
1178 else:
1179 print "%10d" % tarinfo.size,
1180 print "%d-%02d-%02d %02d:%02d:%02d" \
1181 % time.localtime(tarinfo.mtime)[:6],
1182
1183 print tarinfo.name,
1184
1185 if verbose:
1186 if tarinfo.issym():
1187 print "->", tarinfo.linkname,
1188 if tarinfo.islnk():
1189 print "link to", tarinfo.linkname,
1190 print
1191
1192 def add(self, name, arcname=None, recursive=True):
1193 """Add the file `name' to the archive. `name' may be any type of file
1194 (directory, fifo, symbolic link, etc.). If given, `arcname'
1195 specifies an alternative name for the file in the archive.
1196 Directories are added recursively by default. This can be avoided by
1197 setting `recursive' to False.
1198 """
1199 self._check("aw")
1200
1201 if arcname is None:
1202 arcname = name
1203
1204 # Skip if somebody tries to archive the archive...
1205 if self.name is not None \
1206 and os.path.abspath(name) == os.path.abspath(self.name):
1207 self._dbg(2, "tarfile: Skipped %r" % name)
1208 return
1209
1210 # Special case: The user wants to add the current
1211 # working directory.
1212 if name == ".":
1213 if recursive:
1214 if arcname == ".":
1215 arcname = ""
1216 for f in os.listdir("."):
1217 self.add(f, os.path.join(arcname, f))
1218 return
1219
1220 self._dbg(1, name)
1221
1222 # Create a TarInfo object from the file.
1223 tarinfo = self.gettarinfo(name, arcname)
1224
1225 if tarinfo is None:
1226 self._dbg(1, "tarfile: Unsupported type %r" % name)
1227 return
1228
1229 # Append the tar header and data to the archive.
1230 if tarinfo.isreg():
1231 f = file(name, "rb")
1232 self.addfile(tarinfo, f)
1233 f.close()
1234
1235 if tarinfo.type in (LNKTYPE, SYMTYPE, FIFOTYPE, CHRTYPE, BLKTYPE):
1236 tarinfo.size = 0L
1237 self.addfile(tarinfo)
1238
1239 if tarinfo.isdir():
1240 self.addfile(tarinfo)
1241 if recursive:
1242 for f in os.listdir(name):
1243 self.add(os.path.join(name, f), os.path.join(arcname, f))
1244
1245 def addfile(self, tarinfo, fileobj=None):
1246 """Add the TarInfo object `tarinfo' to the archive. If `fileobj' is
1247 given, tarinfo.size bytes are read from it and added to the archive.
1248 You can create TarInfo objects using gettarinfo().
1249 On Windows platforms, `fileobj' should always be opened with mode
1250 'rb' to avoid irritation about the file size.
1251 """
1252 self._check("aw")
1253
1254 tarinfo.name = normpath(tarinfo.name)
1255 if tarinfo.isdir():
1256 # directories should end with '/'
1257 tarinfo.name += "/"
1258
1259 if tarinfo.linkname:
1260 tarinfo.linkname = normpath(tarinfo.linkname)
1261
1262 if tarinfo.size > MAXSIZE_MEMBER:
Neal Norwitzd96d1012004-07-20 22:23:02 +00001263 if self.posix:
1264 raise ValueError, "file is too large (>= 8 GB)"
1265 else:
1266 self._dbg(2, "tarfile: Created GNU tar largefile header")
1267
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001268
1269 if len(tarinfo.linkname) > LENGTH_LINK:
1270 if self.posix:
1271 raise ValueError, "linkname is too long (>%d)" \
1272 % (LENGTH_LINK)
1273 else:
1274 self._create_gnulong(tarinfo.linkname, GNUTYPE_LONGLINK)
1275 tarinfo.linkname = tarinfo.linkname[:LENGTH_LINK -1]
1276 self._dbg(2, "tarfile: Created GNU tar extension LONGLINK")
1277
1278 if len(tarinfo.name) > LENGTH_NAME:
1279 if self.posix:
1280 prefix = tarinfo.name[:LENGTH_PREFIX + 1]
1281 while prefix and prefix[-1] != "/":
Tim Peters2c60f7a2003-01-29 03:49:43 +00001282 prefix = prefix[:-1]
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001283
1284 name = tarinfo.name[len(prefix):]
1285 prefix = prefix[:-1]
1286
1287 if not prefix or len(name) > LENGTH_NAME:
1288 raise ValueError, "name is too long (>%d)" \
1289 % (LENGTH_NAME)
1290
1291 tarinfo.name = name
1292 tarinfo.prefix = prefix
1293 else:
1294 self._create_gnulong(tarinfo.name, GNUTYPE_LONGNAME)
1295 tarinfo.name = tarinfo.name[:LENGTH_NAME - 1]
1296 self._dbg(2, "tarfile: Created GNU tar extension LONGNAME")
1297
1298 self.fileobj.write(tarinfo.tobuf())
1299 self.offset += BLOCKSIZE
1300
1301 # If there's data to follow, append it.
1302 if fileobj is not None:
1303 copyfileobj(fileobj, self.fileobj, tarinfo.size)
1304 blocks, remainder = divmod(tarinfo.size, BLOCKSIZE)
1305 if remainder > 0:
1306 self.fileobj.write(NUL * (BLOCKSIZE - remainder))
1307 blocks += 1
1308 self.offset += blocks * BLOCKSIZE
1309
Andrew M. Kuchling864bba12004-07-10 22:02:11 +00001310 self._record_member(tarinfo)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001311
1312 def extract(self, member, path=""):
1313 """Extract a member from the archive to the current working directory,
1314 using its full name. Its file information is extracted as accurately
1315 as possible. `member' may be a filename or a TarInfo object. You can
1316 specify a different directory using `path'.
1317 """
1318 self._check("r")
1319
1320 if isinstance(member, TarInfo):
1321 tarinfo = member
1322 else:
1323 tarinfo = self.getmember(member)
1324
Neal Norwitza4f651a2004-07-20 22:07:44 +00001325 # Prepare the link target for makelink().
1326 if tarinfo.islnk():
1327 tarinfo._link_target = os.path.join(path, tarinfo.linkname)
1328
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001329 try:
1330 self._extract_member(tarinfo, os.path.join(path, tarinfo.name))
1331 except EnvironmentError, e:
1332 if self.errorlevel > 0:
1333 raise
1334 else:
1335 if e.filename is None:
1336 self._dbg(1, "tarfile: %s" % e.strerror)
1337 else:
1338 self._dbg(1, "tarfile: %s %r" % (e.strerror, e.filename))
1339 except ExtractError, e:
1340 if self.errorlevel > 1:
1341 raise
1342 else:
1343 self._dbg(1, "tarfile: %s" % e)
1344
1345 def extractfile(self, member):
1346 """Extract a member from the archive as a file object. `member' may be
1347 a filename or a TarInfo object. If `member' is a regular file, a
1348 file-like object is returned. If `member' is a link, a file-like
1349 object is constructed from the link's target. If `member' is none of
1350 the above, None is returned.
1351 The file-like object is read-only and provides the following
1352 methods: read(), readline(), readlines(), seek() and tell()
1353 """
1354 self._check("r")
1355
1356 if isinstance(member, TarInfo):
1357 tarinfo = member
1358 else:
1359 tarinfo = self.getmember(member)
1360
1361 if tarinfo.isreg():
1362 return self.fileobject(self, tarinfo)
1363
1364 elif tarinfo.type not in SUPPORTED_TYPES:
1365 # If a member's type is unknown, it is treated as a
1366 # regular file.
1367 return self.fileobject(self, tarinfo)
1368
1369 elif tarinfo.islnk() or tarinfo.issym():
1370 if isinstance(self.fileobj, _Stream):
1371 # A small but ugly workaround for the case that someone tries
1372 # to extract a (sym)link as a file-object from a non-seekable
1373 # stream of tar blocks.
1374 raise StreamError, "cannot extract (sym)link as file object"
1375 else:
1376 # A (sym)link's file object is it's target's file object.
1377 return self.extractfile(self._getmember(tarinfo.linkname,
1378 tarinfo))
1379 else:
1380 # If there's no data associated with the member (directory, chrdev,
1381 # blkdev, etc.), return None instead of a file object.
1382 return None
1383
1384 def _extract_member(self, tarinfo, targetpath):
1385 """Extract the TarInfo object tarinfo to a physical
1386 file called targetpath.
1387 """
1388 # Fetch the TarInfo object for the given name
1389 # and build the destination pathname, replacing
1390 # forward slashes to platform specific separators.
1391 if targetpath[-1:] == "/":
1392 targetpath = targetpath[:-1]
1393 targetpath = os.path.normpath(targetpath)
1394
1395 # Create all upper directories.
1396 upperdirs = os.path.dirname(targetpath)
1397 if upperdirs and not os.path.exists(upperdirs):
1398 ti = TarInfo()
1399 ti.name = upperdirs
1400 ti.type = DIRTYPE
1401 ti.mode = 0777
1402 ti.mtime = tarinfo.mtime
1403 ti.uid = tarinfo.uid
1404 ti.gid = tarinfo.gid
1405 ti.uname = tarinfo.uname
1406 ti.gname = tarinfo.gname
1407 try:
1408 self._extract_member(ti, ti.name)
1409 except:
1410 pass
1411
1412 if tarinfo.islnk() or tarinfo.issym():
1413 self._dbg(1, "%s -> %s" % (tarinfo.name, tarinfo.linkname))
1414 else:
1415 self._dbg(1, tarinfo.name)
1416
1417 if tarinfo.isreg():
1418 self.makefile(tarinfo, targetpath)
1419 elif tarinfo.isdir():
1420 self.makedir(tarinfo, targetpath)
1421 elif tarinfo.isfifo():
1422 self.makefifo(tarinfo, targetpath)
1423 elif tarinfo.ischr() or tarinfo.isblk():
1424 self.makedev(tarinfo, targetpath)
1425 elif tarinfo.islnk() or tarinfo.issym():
1426 self.makelink(tarinfo, targetpath)
1427 elif tarinfo.type not in SUPPORTED_TYPES:
1428 self.makeunknown(tarinfo, targetpath)
1429 else:
1430 self.makefile(tarinfo, targetpath)
1431
1432 self.chown(tarinfo, targetpath)
1433 if not tarinfo.issym():
1434 self.chmod(tarinfo, targetpath)
1435 self.utime(tarinfo, targetpath)
1436
1437 #--------------------------------------------------------------------------
1438 # Below are the different file methods. They are called via
1439 # _extract_member() when extract() is called. They can be replaced in a
1440 # subclass to implement other functionality.
1441
1442 def makedir(self, tarinfo, targetpath):
1443 """Make a directory called targetpath.
1444 """
1445 try:
1446 os.mkdir(targetpath)
1447 except EnvironmentError, e:
1448 if e.errno != errno.EEXIST:
1449 raise
1450
1451 def makefile(self, tarinfo, targetpath):
1452 """Make a file called targetpath.
1453 """
1454 source = self.extractfile(tarinfo)
1455 target = file(targetpath, "wb")
1456 copyfileobj(source, target)
1457 source.close()
1458 target.close()
1459
1460 def makeunknown(self, tarinfo, targetpath):
1461 """Make a file from a TarInfo object with an unknown type
1462 at targetpath.
1463 """
1464 self.makefile(tarinfo, targetpath)
1465 self._dbg(1, "tarfile: Unknown file type %r, " \
1466 "extracted as regular file." % tarinfo.type)
1467
1468 def makefifo(self, tarinfo, targetpath):
1469 """Make a fifo called targetpath.
1470 """
1471 if hasattr(os, "mkfifo"):
1472 os.mkfifo(targetpath)
1473 else:
1474 raise ExtractError, "fifo not supported by system"
1475
1476 def makedev(self, tarinfo, targetpath):
1477 """Make a character or block device called targetpath.
1478 """
1479 if not hasattr(os, "mknod") or not hasattr(os, "makedev"):
1480 raise ExtractError, "special devices not supported by system"
1481
1482 mode = tarinfo.mode
1483 if tarinfo.isblk():
1484 mode |= stat.S_IFBLK
1485 else:
1486 mode |= stat.S_IFCHR
1487
1488 os.mknod(targetpath, mode,
1489 os.makedev(tarinfo.devmajor, tarinfo.devminor))
1490
1491 def makelink(self, tarinfo, targetpath):
1492 """Make a (symbolic) link called targetpath. If it cannot be created
1493 (platform limitation), we try to make a copy of the referenced file
1494 instead of a link.
1495 """
1496 linkpath = tarinfo.linkname
1497 try:
1498 if tarinfo.issym():
1499 os.symlink(linkpath, targetpath)
1500 else:
Neal Norwitza4f651a2004-07-20 22:07:44 +00001501 # See extract().
1502 os.link(tarinfo._link_target, targetpath)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001503 except AttributeError:
1504 if tarinfo.issym():
1505 linkpath = os.path.join(os.path.dirname(tarinfo.name),
1506 linkpath)
1507 linkpath = normpath(linkpath)
1508
1509 try:
1510 self._extract_member(self.getmember(linkpath), targetpath)
1511 except (EnvironmentError, KeyError), e:
1512 linkpath = os.path.normpath(linkpath)
1513 try:
1514 shutil.copy2(linkpath, targetpath)
1515 except EnvironmentError, e:
1516 raise IOError, "link could not be created"
1517
1518 def chown(self, tarinfo, targetpath):
1519 """Set owner of targetpath according to tarinfo.
1520 """
1521 if pwd and hasattr(os, "geteuid") and os.geteuid() == 0:
1522 # We have to be root to do so.
1523 try:
1524 g = grp.getgrnam(tarinfo.gname)[2]
1525 except KeyError:
1526 try:
1527 g = grp.getgrgid(tarinfo.gid)[2]
1528 except KeyError:
1529 g = os.getgid()
1530 try:
1531 u = pwd.getpwnam(tarinfo.uname)[2]
1532 except KeyError:
1533 try:
1534 u = pwd.getpwuid(tarinfo.uid)[2]
1535 except KeyError:
1536 u = os.getuid()
1537 try:
1538 if tarinfo.issym() and hasattr(os, "lchown"):
1539 os.lchown(targetpath, u, g)
1540 else:
Andrew MacIntyre7970d202003-02-19 12:51:34 +00001541 if sys.platform != "os2emx":
1542 os.chown(targetpath, u, g)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001543 except EnvironmentError, e:
1544 raise ExtractError, "could not change owner"
1545
1546 def chmod(self, tarinfo, targetpath):
1547 """Set file permissions of targetpath according to tarinfo.
1548 """
Jack Jansen834eff62003-03-07 12:47:06 +00001549 if hasattr(os, 'chmod'):
1550 try:
1551 os.chmod(targetpath, tarinfo.mode)
1552 except EnvironmentError, e:
1553 raise ExtractError, "could not change mode"
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001554
1555 def utime(self, tarinfo, targetpath):
1556 """Set modification time of targetpath according to tarinfo.
1557 """
Jack Jansen834eff62003-03-07 12:47:06 +00001558 if not hasattr(os, 'utime'):
Tim Petersf9347782003-03-07 15:36:41 +00001559 return
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001560 if sys.platform == "win32" and tarinfo.isdir():
1561 # According to msdn.microsoft.com, it is an error (EACCES)
1562 # to use utime() on directories.
1563 return
1564 try:
1565 os.utime(targetpath, (tarinfo.mtime, tarinfo.mtime))
1566 except EnvironmentError, e:
1567 raise ExtractError, "could not change modification time"
1568
1569 #--------------------------------------------------------------------------
1570
1571 def next(self):
1572 """Return the next member of the archive as a TarInfo object, when
1573 TarFile is opened for reading. Return None if there is no more
1574 available.
1575 """
1576 self._check("ra")
1577 if self.firstmember is not None:
1578 m = self.firstmember
1579 self.firstmember = None
1580 return m
1581
1582 # Read the next block.
Andrew M. Kuchling864bba12004-07-10 22:02:11 +00001583 self.fileobj.seek(self.offset)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001584 while True:
1585 buf = self.fileobj.read(BLOCKSIZE)
1586 if not buf:
1587 return None
1588 try:
1589 tarinfo = TarInfo.frombuf(buf)
1590 except ValueError:
1591 if self.ignore_zeros:
1592 if buf.count(NUL) == BLOCKSIZE:
1593 adj = "empty"
1594 else:
1595 adj = "invalid"
1596 self._dbg(2, "0x%X: %s block" % (self.offset, adj))
1597 self.offset += BLOCKSIZE
1598 continue
1599 else:
1600 # Block is empty or unreadable.
Andrew M. Kuchling864bba12004-07-10 22:02:11 +00001601 if self.offset == 0:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001602 # If the first block is invalid. That does not
1603 # look like a tar archive we can handle.
1604 raise ReadError,"empty, unreadable or compressed file"
1605 return None
1606 break
1607
1608 # We shouldn't rely on this checksum, because some tar programs
1609 # calculate it differently and it is merely validating the
1610 # header block. We could just as well skip this part, which would
1611 # have a slight effect on performance...
1612 if tarinfo.chksum != calc_chksum(buf):
1613 self._dbg(1, "tarfile: Bad Checksum %r" % tarinfo.name)
1614
1615 # Set the TarInfo object's offset to the current position of the
1616 # TarFile and set self.offset to the position where the data blocks
1617 # should begin.
1618 tarinfo.offset = self.offset
1619 self.offset += BLOCKSIZE
1620
1621 # Check if the TarInfo object has a typeflag for which a callback
1622 # method is registered in the TYPE_METH. If so, then call it.
1623 if tarinfo.type in self.TYPE_METH:
Andrew M. Kuchling864bba12004-07-10 22:02:11 +00001624 return self.TYPE_METH[tarinfo.type](self, tarinfo)
1625
1626 tarinfo.offset_data = self.offset
1627 if tarinfo.isreg() or tarinfo.type not in SUPPORTED_TYPES:
1628 # Skip the following data blocks.
1629 self.offset += self._block(tarinfo.size)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001630
1631 if tarinfo.isreg() and tarinfo.name[:-1] == "/":
1632 # some old tar programs don't know DIRTYPE
1633 tarinfo.type = DIRTYPE
1634
Andrew M. Kuchling864bba12004-07-10 22:02:11 +00001635 self._record_member(tarinfo)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001636 return tarinfo
1637
1638 #--------------------------------------------------------------------------
1639 # Below are some methods which are called for special typeflags in the
1640 # next() method, e.g. for unwrapping GNU longname/longlink blocks. They
1641 # are registered in TYPE_METH below. You can register your own methods
1642 # with this mapping.
1643 # A registered method is called with a TarInfo object as only argument.
1644 #
1645 # During its execution the method MUST perform the following tasks:
1646 # 1. set tarinfo.offset_data to the position where the data blocks begin,
1647 # if there is data to follow.
1648 # 2. set self.offset to the position where the next member's header will
1649 # begin.
Andrew M. Kuchling864bba12004-07-10 22:02:11 +00001650 # 3. call self._record_member() if the tarinfo object is supposed to
1651 # appear as a member of the TarFile object.
1652 # 4. return tarinfo or another valid TarInfo object.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001653
1654 def proc_gnulong(self, tarinfo):
1655 """Evaluate the blocks that hold a GNU longname
1656 or longlink member.
1657 """
1658 buf = ""
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001659 count = tarinfo.size
1660 while count > 0:
1661 block = self.fileobj.read(BLOCKSIZE)
1662 buf += block
1663 self.offset += BLOCKSIZE
1664 count -= BLOCKSIZE
1665
Andrew M. Kuchling864bba12004-07-10 22:02:11 +00001666 # Fetch the next header
1667 next = self.next()
1668
1669 next.offset = tarinfo.offset
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001670 if tarinfo.type == GNUTYPE_LONGNAME:
Andrew M. Kuchling864bba12004-07-10 22:02:11 +00001671 next.name = nts(buf)
1672 elif tarinfo.type == GNUTYPE_LONGLINK:
1673 next.linkname = nts(buf)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001674
Andrew M. Kuchling864bba12004-07-10 22:02:11 +00001675 return next
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001676
1677 def proc_sparse(self, tarinfo):
1678 """Analyze a GNU sparse header plus extra headers.
1679 """
1680 buf = tarinfo.tobuf()
1681 sp = _ringbuffer()
1682 pos = 386
1683 lastpos = 0L
1684 realpos = 0L
1685 # There are 4 possible sparse structs in the
1686 # first header.
1687 for i in xrange(4):
1688 try:
1689 offset = int(buf[pos:pos + 12], 8)
1690 numbytes = int(buf[pos + 12:pos + 24], 8)
1691 except ValueError:
1692 break
1693 if offset > lastpos:
1694 sp.append(_hole(lastpos, offset - lastpos))
1695 sp.append(_data(offset, numbytes, realpos))
1696 realpos += numbytes
1697 lastpos = offset + numbytes
1698 pos += 24
1699
1700 isextended = ord(buf[482])
1701 origsize = int(buf[483:495], 8)
1702
1703 # If the isextended flag is given,
1704 # there are extra headers to process.
1705 while isextended == 1:
1706 buf = self.fileobj.read(BLOCKSIZE)
1707 self.offset += BLOCKSIZE
1708 pos = 0
1709 for i in xrange(21):
1710 try:
1711 offset = int(buf[pos:pos + 12], 8)
1712 numbytes = int(buf[pos + 12:pos + 24], 8)
1713 except ValueError:
1714 break
1715 if offset > lastpos:
1716 sp.append(_hole(lastpos, offset - lastpos))
1717 sp.append(_data(offset, numbytes, realpos))
1718 realpos += numbytes
1719 lastpos = offset + numbytes
1720 pos += 24
1721 isextended = ord(buf[504])
1722
1723 if lastpos < origsize:
1724 sp.append(_hole(lastpos, origsize - lastpos))
1725
1726 tarinfo.sparse = sp
1727
1728 tarinfo.offset_data = self.offset
1729 self.offset += self._block(tarinfo.size)
1730 tarinfo.size = origsize
Andrew M. Kuchling864bba12004-07-10 22:02:11 +00001731
1732 self._record_member(tarinfo)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001733 return tarinfo
1734
1735 # The type mapping for the next() method. The keys are single character
1736 # strings, the typeflag. The values are methods which are called when
1737 # next() encounters such a typeflag.
1738 TYPE_METH = {
1739 GNUTYPE_LONGNAME: proc_gnulong,
1740 GNUTYPE_LONGLINK: proc_gnulong,
1741 GNUTYPE_SPARSE: proc_sparse
1742 }
1743
1744 #--------------------------------------------------------------------------
1745 # Little helper methods:
1746
1747 def _block(self, count):
1748 """Round up a byte count by BLOCKSIZE and return it,
1749 e.g. _block(834) => 1024.
1750 """
1751 blocks, remainder = divmod(count, BLOCKSIZE)
1752 if remainder:
1753 blocks += 1
1754 return blocks * BLOCKSIZE
1755
1756 def _getmember(self, name, tarinfo=None):
1757 """Find an archive member by name from bottom to top.
1758 If tarinfo is given, it is used as the starting point.
1759 """
1760 if tarinfo is None:
1761 end = len(self.members)
1762 else:
1763 end = self.members.index(tarinfo)
1764
1765 for i in xrange(end - 1, -1, -1):
1766 if name == self.membernames[i]:
1767 return self.members[i]
1768
Andrew M. Kuchling864bba12004-07-10 22:02:11 +00001769 def _record_member(self, tarinfo):
1770 """Record a tarinfo object in the internal datastructures.
1771 """
1772 self.members.append(tarinfo)
1773 self.membernames.append(tarinfo.name)
1774
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001775 def _load(self):
1776 """Read through the entire archive file and look for readable
1777 members.
1778 """
1779 while True:
1780 tarinfo = self.next()
1781 if tarinfo is None:
1782 break
1783 self._loaded = True
1784
1785 def _check(self, mode=None):
1786 """Check if TarFile is still open, and if the operation's mode
1787 corresponds to TarFile's mode.
1788 """
1789 if self.closed:
1790 raise IOError, "%s is closed" % self.__class__.__name__
1791 if mode is not None and self._mode not in mode:
1792 raise IOError, "bad operation for mode %r" % self._mode
1793
1794 def __iter__(self):
1795 """Provide an iterator object.
1796 """
1797 if self._loaded:
1798 return iter(self.members)
1799 else:
1800 return TarIter(self)
1801
1802 def _create_gnulong(self, name, type):
1803 """Write a GNU longname/longlink member to the TarFile.
1804 It consists of an extended tar header, with the length
1805 of the longname as size, followed by data blocks,
1806 which contain the longname as a null terminated string.
1807 """
Neal Norwitz0662f8a2004-07-20 21:54:18 +00001808 name += NUL
1809
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001810 tarinfo = TarInfo()
1811 tarinfo.name = "././@LongLink"
1812 tarinfo.type = type
1813 tarinfo.mode = 0
1814 tarinfo.size = len(name)
1815
1816 # write extended header
1817 self.fileobj.write(tarinfo.tobuf())
Neal Norwitz0662f8a2004-07-20 21:54:18 +00001818 self.offset += BLOCKSIZE
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001819 # write name blocks
1820 self.fileobj.write(name)
1821 blocks, remainder = divmod(tarinfo.size, BLOCKSIZE)
1822 if remainder > 0:
1823 self.fileobj.write(NUL * (BLOCKSIZE - remainder))
1824 blocks += 1
1825 self.offset += blocks * BLOCKSIZE
1826
1827 def _dbg(self, level, msg):
1828 """Write debugging output to sys.stderr.
1829 """
1830 if level <= self.debug:
1831 print >> sys.stderr, msg
1832# class TarFile
1833
1834class TarIter:
1835 """Iterator Class.
1836
1837 for tarinfo in TarFile(...):
1838 suite...
1839 """
1840
1841 def __init__(self, tarfile):
1842 """Construct a TarIter object.
1843 """
1844 self.tarfile = tarfile
1845 def __iter__(self):
1846 """Return iterator object.
1847 """
1848 return self
1849 def next(self):
1850 """Return the next item using TarFile's next() method.
1851 When all members have been read, set TarFile as _loaded.
1852 """
1853 tarinfo = self.tarfile.next()
1854 if not tarinfo:
1855 self.tarfile._loaded = True
1856 raise StopIteration
1857 return tarinfo
1858
1859# Helper classes for sparse file support
1860class _section:
1861 """Base class for _data and _hole.
1862 """
1863 def __init__(self, offset, size):
1864 self.offset = offset
1865 self.size = size
1866 def __contains__(self, offset):
1867 return self.offset <= offset < self.offset + self.size
1868
1869class _data(_section):
1870 """Represent a data section in a sparse file.
1871 """
1872 def __init__(self, offset, size, realpos):
1873 _section.__init__(self, offset, size)
1874 self.realpos = realpos
1875
1876class _hole(_section):
1877 """Represent a hole section in a sparse file.
1878 """
1879 pass
1880
1881class _ringbuffer(list):
1882 """Ringbuffer class which increases performance
1883 over a regular list.
1884 """
1885 def __init__(self):
1886 self.idx = 0
1887 def find(self, offset):
1888 idx = self.idx
1889 while True:
1890 item = self[idx]
1891 if offset in item:
1892 break
1893 idx += 1
1894 if idx == len(self):
1895 idx = 0
1896 if idx == self.idx:
1897 # End of File
1898 return None
1899 self.idx = idx
1900 return item
1901
1902#---------------------------------------------
1903# zipfile compatible TarFile class
1904#---------------------------------------------
1905TAR_PLAIN = 0 # zipfile.ZIP_STORED
1906TAR_GZIPPED = 8 # zipfile.ZIP_DEFLATED
1907class TarFileCompat:
1908 """TarFile class compatible with standard module zipfile's
1909 ZipFile class.
1910 """
1911 def __init__(self, file, mode="r", compression=TAR_PLAIN):
1912 if compression == TAR_PLAIN:
1913 self.tarfile = TarFile.taropen(file, mode)
1914 elif compression == TAR_GZIPPED:
1915 self.tarfile = TarFile.gzopen(file, mode)
1916 else:
1917 raise ValueError, "unknown compression constant"
1918 if mode[0:1] == "r":
1919 members = self.tarfile.getmembers()
1920 for i in xrange(len(members)):
1921 m = members[i]
1922 m.filename = m.name
1923 m.file_size = m.size
1924 m.date_time = time.gmtime(m.mtime)[:6]
1925 def namelist(self):
1926 return map(lambda m: m.name, self.infolist())
1927 def infolist(self):
1928 return filter(lambda m: m.type in REGULAR_TYPES,
1929 self.tarfile.getmembers())
1930 def printdir(self):
1931 self.tarfile.list()
1932 def testzip(self):
1933 return
1934 def getinfo(self, name):
1935 return self.tarfile.getmember(name)
1936 def read(self, name):
1937 return self.tarfile.extractfile(self.tarfile.getmember(name)).read()
1938 def write(self, filename, arcname=None, compress_type=None):
1939 self.tarfile.add(filename, arcname)
1940 def writestr(self, zinfo, bytes):
1941 import StringIO
1942 import calendar
1943 zinfo.name = zinfo.filename
1944 zinfo.size = zinfo.file_size
1945 zinfo.mtime = calendar.timegm(zinfo.date_time)
1946 self.tarfile.addfile(zinfo, StringIO.StringIO(bytes))
1947 def close(self):
1948 self.tarfile.close()
1949#class TarFileCompat
1950
1951#--------------------
1952# exported functions
1953#--------------------
1954def is_tarfile(name):
1955 """Return True if name points to a tar archive that we
1956 are able to handle, else return False.
1957 """
1958 try:
1959 t = open(name)
1960 t.close()
1961 return True
1962 except TarError:
1963 return False
1964
1965open = TarFile.open