blob: 9c25962b893ad03f94081be9d88a0b433759c260 [file] [log] [blame]
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001#!/usr/bin/env python
2# -*- coding: iso-8859-1 -*-
3#-------------------------------------------------------------------
4# tarfile.py
5#-------------------------------------------------------------------
6# Copyright (C) 2002 Lars Gustäbel <lars@gustaebel.de>
7# All rights reserved.
8#
9# Permission is hereby granted, free of charge, to any person
10# obtaining a copy of this software and associated documentation
11# files (the "Software"), to deal in the Software without
12# restriction, including without limitation the rights to use,
13# copy, modify, merge, publish, distribute, sublicense, and/or sell
14# copies of the Software, and to permit persons to whom the
15# Software is furnished to do so, subject to the following
16# conditions:
17#
18# The above copyright notice and this permission notice shall be
19# included in all copies or substantial portions of the Software.
20#
21# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
22# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
23# OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
24# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
25# HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
26# WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
27# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
28# OTHER DEALINGS IN THE SOFTWARE.
29#
30"""Read from and write to tar format archives.
31"""
32
33__version__ = "$Revision$"
34# $Source$
35
36version = "0.6.4"
37__author__ = "Lars Gustäbel (lars@gustaebel.de)"
38__date__ = "$Date$"
39__cvsid__ = "$Id$"
40__credits__ = "Gustavo Niemeyer, Niels Gustäbel, Richard Townsend."
41
42#---------
43# Imports
44#---------
45import sys
46import os
47import shutil
48import stat
49import errno
50import time
51import struct
52
Jack Jansencfc49022003-03-07 13:37:32 +000053if sys.platform == 'mac':
54 # This module needs work for MacOS9, especially in the area of pathname
55 # handling. In many places it is assumed a simple substitution of / by the
56 # local os.path.sep is good enough to convert pathnames, but this does not
57 # work with the mac rooted:path:name versus :nonrooted:path:name syntax
58 raise ImportError, "tarfile does not work for platform==mac"
59
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +000060try:
61 import grp, pwd
62except ImportError:
63 grp = pwd = None
64
65# from tarfile import *
66__all__ = ["TarFile", "TarInfo", "is_tarfile", "TarError"]
67
68#---------------------------------------------------------
69# tar constants
70#---------------------------------------------------------
71NUL = "\0" # the null character
72BLOCKSIZE = 512 # length of processing blocks
73RECORDSIZE = BLOCKSIZE * 20 # length of records
74MAGIC = "ustar" # magic tar string
75VERSION = "00" # version number
76
77LENGTH_NAME = 100 # maximum length of a filename
78LENGTH_LINK = 100 # maximum length of a linkname
79LENGTH_PREFIX = 155 # maximum length of the prefix field
80MAXSIZE_MEMBER = 077777777777L # maximum size of a file (11 octal digits)
81
82REGTYPE = "0" # regular file
83AREGTYPE = "\0" # regular file
84LNKTYPE = "1" # link (inside tarfile)
85SYMTYPE = "2" # symbolic link
86CHRTYPE = "3" # character special device
87BLKTYPE = "4" # block special device
88DIRTYPE = "5" # directory
89FIFOTYPE = "6" # fifo special device
90CONTTYPE = "7" # contiguous file
91
92GNUTYPE_LONGNAME = "L" # GNU tar extension for longnames
93GNUTYPE_LONGLINK = "K" # GNU tar extension for longlink
94GNUTYPE_SPARSE = "S" # GNU tar extension for sparse file
95
96#---------------------------------------------------------
97# tarfile constants
98#---------------------------------------------------------
99SUPPORTED_TYPES = (REGTYPE, AREGTYPE, LNKTYPE, # file types that tarfile
100 SYMTYPE, DIRTYPE, FIFOTYPE, # can cope with.
101 CONTTYPE, CHRTYPE, BLKTYPE,
102 GNUTYPE_LONGNAME, GNUTYPE_LONGLINK,
103 GNUTYPE_SPARSE)
104
105REGULAR_TYPES = (REGTYPE, AREGTYPE, # file types that somehow
106 CONTTYPE, GNUTYPE_SPARSE) # represent regular files
107
108#---------------------------------------------------------
109# Bits used in the mode field, values in octal.
110#---------------------------------------------------------
111S_IFLNK = 0120000 # symbolic link
112S_IFREG = 0100000 # regular file
113S_IFBLK = 0060000 # block device
114S_IFDIR = 0040000 # directory
115S_IFCHR = 0020000 # character device
116S_IFIFO = 0010000 # fifo
117
118TSUID = 04000 # set UID on execution
119TSGID = 02000 # set GID on execution
120TSVTX = 01000 # reserved
121
122TUREAD = 0400 # read by owner
123TUWRITE = 0200 # write by owner
124TUEXEC = 0100 # execute/search by owner
125TGREAD = 0040 # read by group
126TGWRITE = 0020 # write by group
127TGEXEC = 0010 # execute/search by group
128TOREAD = 0004 # read by other
129TOWRITE = 0002 # write by other
130TOEXEC = 0001 # execute/search by other
131
132#---------------------------------------------------------
133# Some useful functions
134#---------------------------------------------------------
135def nts(s):
136 """Convert a null-terminated string buffer to a python string.
137 """
Andrew M. Kuchling864bba12004-07-10 22:02:11 +0000138 return s.rstrip(NUL)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000139
140def calc_chksum(buf):
141 """Calculate the checksum for a member's header. It's a simple addition
142 of all bytes, treating the chksum field as if filled with spaces.
143 buf is a 512 byte long string buffer which holds the header.
144 """
145 chk = 256 # chksum field is treated as blanks,
146 # so the initial value is 8 * ord(" ")
147 for c in buf[:148]: chk += ord(c) # sum up all bytes before chksum
148 for c in buf[156:]: chk += ord(c) # sum up all bytes after chksum
149 return chk
150
151def copyfileobj(src, dst, length=None):
152 """Copy length bytes from fileobj src to fileobj dst.
153 If length is None, copy the entire content.
154 """
155 if length == 0:
156 return
157 if length is None:
158 shutil.copyfileobj(src, dst)
159 return
160
161 BUFSIZE = 16 * 1024
162 blocks, remainder = divmod(length, BUFSIZE)
163 for b in xrange(blocks):
164 buf = src.read(BUFSIZE)
165 if len(buf) < BUFSIZE:
166 raise IOError, "end of file reached"
167 dst.write(buf)
168
169 if remainder != 0:
170 buf = src.read(remainder)
171 if len(buf) < remainder:
172 raise IOError, "end of file reached"
173 dst.write(buf)
174 return
175
176filemode_table = (
Andrew M. Kuchling8bc462f2004-10-20 11:48:42 +0000177 ((S_IFLNK, "l"),
178 (S_IFREG, "-"),
179 (S_IFBLK, "b"),
180 (S_IFDIR, "d"),
181 (S_IFCHR, "c"),
182 (S_IFIFO, "p")),
183
184 ((TUREAD, "r"),),
185 ((TUWRITE, "w"),),
186 ((TUEXEC|TSUID, "s"),
187 (TSUID, "S"),
188 (TUEXEC, "x")),
189
190 ((TGREAD, "r"),),
191 ((TGWRITE, "w"),),
192 ((TGEXEC|TSGID, "s"),
193 (TSGID, "S"),
194 (TGEXEC, "x")),
195
196 ((TOREAD, "r"),),
197 ((TOWRITE, "w"),),
198 ((TOEXEC|TSVTX, "t"),
199 (TSVTX, "T"),
200 (TOEXEC, "x"))
201)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000202
203def filemode(mode):
204 """Convert a file's mode to a string of the form
205 -rwxrwxrwx.
206 Used by TarFile.list()
207 """
Andrew M. Kuchling8bc462f2004-10-20 11:48:42 +0000208 perm = []
209 for table in filemode_table:
210 for bit, char in table:
211 if mode & bit == bit:
212 perm.append(char)
213 break
214 else:
215 perm.append("-")
216 return "".join(perm)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000217
218if os.sep != "/":
219 normpath = lambda path: os.path.normpath(path).replace(os.sep, "/")
220else:
221 normpath = os.path.normpath
222
223class TarError(Exception):
224 """Base exception."""
225 pass
226class ExtractError(TarError):
227 """General exception for extract errors."""
228 pass
229class ReadError(TarError):
230 """Exception for unreadble tar archives."""
231 pass
232class CompressionError(TarError):
233 """Exception for unavailable compression methods."""
234 pass
235class StreamError(TarError):
236 """Exception for unsupported operations on stream-like TarFiles."""
237 pass
238
239#---------------------------
240# internal stream interface
241#---------------------------
242class _LowLevelFile:
243 """Low-level file object. Supports reading and writing.
244 It is used instead of a regular file object for streaming
245 access.
246 """
247
248 def __init__(self, name, mode):
249 mode = {
250 "r": os.O_RDONLY,
251 "w": os.O_WRONLY | os.O_CREAT | os.O_TRUNC,
252 }[mode]
253 if hasattr(os, "O_BINARY"):
254 mode |= os.O_BINARY
255 self.fd = os.open(name, mode)
256
257 def close(self):
258 os.close(self.fd)
259
260 def read(self, size):
261 return os.read(self.fd, size)
262
263 def write(self, s):
264 os.write(self.fd, s)
265
266class _Stream:
267 """Class that serves as an adapter between TarFile and
268 a stream-like object. The stream-like object only
269 needs to have a read() or write() method and is accessed
270 blockwise. Use of gzip or bzip2 compression is possible.
271 A stream-like object could be for example: sys.stdin,
272 sys.stdout, a socket, a tape device etc.
273
274 _Stream is intended to be used only internally.
275 """
276
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000277 def __init__(self, name, mode, comptype, fileobj, bufsize):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000278 """Construct a _Stream object.
279 """
280 self._extfileobj = True
281 if fileobj is None:
282 fileobj = _LowLevelFile(name, mode)
283 self._extfileobj = False
284
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000285 if comptype == '*':
286 # Enable transparent compression detection for the
287 # stream interface
288 fileobj = _StreamProxy(fileobj)
289 comptype = fileobj.getcomptype()
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000290
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000291 self.name = name or ""
292 self.mode = mode
293 self.comptype = comptype
294 self.fileobj = fileobj
295 self.bufsize = bufsize
296 self.buf = ""
297 self.pos = 0L
298 self.closed = False
299
300 if comptype == "gz":
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000301 try:
302 import zlib
303 except ImportError:
304 raise CompressionError, "zlib module is not available"
305 self.zlib = zlib
306 self.crc = zlib.crc32("")
307 if mode == "r":
308 self._init_read_gz()
309 else:
310 self._init_write_gz()
311
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000312 if comptype == "bz2":
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000313 try:
314 import bz2
315 except ImportError:
316 raise CompressionError, "bz2 module is not available"
317 if mode == "r":
318 self.dbuf = ""
319 self.cmp = bz2.BZ2Decompressor()
320 else:
321 self.cmp = bz2.BZ2Compressor()
322
323 def __del__(self):
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000324 if hasattr(self, "closed") and not self.closed:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000325 self.close()
326
327 def _init_write_gz(self):
328 """Initialize for writing with gzip compression.
329 """
330 self.cmp = self.zlib.compressobj(9, self.zlib.DEFLATED,
331 -self.zlib.MAX_WBITS,
332 self.zlib.DEF_MEM_LEVEL,
333 0)
334 timestamp = struct.pack("<L", long(time.time()))
335 self.__write("\037\213\010\010%s\002\377" % timestamp)
336 if self.name.endswith(".gz"):
337 self.name = self.name[:-3]
338 self.__write(self.name + NUL)
339
340 def write(self, s):
341 """Write string s to the stream.
342 """
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000343 if self.comptype == "gz":
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000344 self.crc = self.zlib.crc32(s, self.crc)
345 self.pos += len(s)
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000346 if self.comptype != "tar":
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000347 s = self.cmp.compress(s)
348 self.__write(s)
349
350 def __write(self, s):
351 """Write string s to the stream if a whole new block
352 is ready to be written.
353 """
354 self.buf += s
355 while len(self.buf) > self.bufsize:
356 self.fileobj.write(self.buf[:self.bufsize])
357 self.buf = self.buf[self.bufsize:]
358
359 def close(self):
360 """Close the _Stream object. No operation should be
361 done on it afterwards.
362 """
363 if self.closed:
364 return
365
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000366 if self.mode == "w" and self.comptype != "tar":
Martin v. Löwisc234a522004-08-22 21:28:33 +0000367 self.buf += self.cmp.flush()
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000368
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000369 if self.mode == "w" and self.buf:
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000370 blocks, remainder = divmod(len(self.buf), self.bufsize)
371 if remainder > 0:
372 self.buf += NUL * (self.bufsize - remainder)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000373 self.fileobj.write(self.buf)
374 self.buf = ""
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000375 if self.comptype == "gz":
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000376 self.fileobj.write(struct.pack("<l", self.crc))
Andrew M. Kuchling10a44492003-10-24 17:38:34 +0000377 self.fileobj.write(struct.pack("<L", self.pos & 0xffffFFFFL))
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000378
379 if not self._extfileobj:
380 self.fileobj.close()
381
382 self.closed = True
383
384 def _init_read_gz(self):
385 """Initialize for reading a gzip compressed fileobj.
386 """
387 self.cmp = self.zlib.decompressobj(-self.zlib.MAX_WBITS)
388 self.dbuf = ""
389
390 # taken from gzip.GzipFile with some alterations
391 if self.__read(2) != "\037\213":
392 raise ReadError, "not a gzip file"
393 if self.__read(1) != "\010":
394 raise CompressionError, "unsupported compression method"
395
396 flag = ord(self.__read(1))
397 self.__read(6)
398
399 if flag & 4:
400 xlen = ord(self.__read(1)) + 256 * ord(self.__read(1))
401 self.read(xlen)
402 if flag & 8:
403 while True:
404 s = self.__read(1)
405 if not s or s == NUL:
406 break
407 if flag & 16:
408 while True:
409 s = self.__read(1)
410 if not s or s == NUL:
411 break
412 if flag & 2:
413 self.__read(2)
414
415 def tell(self):
416 """Return the stream's file pointer position.
417 """
418 return self.pos
419
420 def seek(self, pos=0):
421 """Set the stream's file pointer to pos. Negative seeking
422 is forbidden.
423 """
424 if pos - self.pos >= 0:
425 blocks, remainder = divmod(pos - self.pos, self.bufsize)
426 for i in xrange(blocks):
427 self.read(self.bufsize)
428 self.read(remainder)
429 else:
430 raise StreamError, "seeking backwards is not allowed"
431 return self.pos
432
433 def read(self, size=None):
434 """Return the next size number of bytes from the stream.
435 If size is not defined, return all bytes of the stream
436 up to EOF.
437 """
438 if size is None:
439 t = []
440 while True:
441 buf = self._read(self.bufsize)
442 if not buf:
443 break
444 t.append(buf)
445 buf = "".join(t)
446 else:
447 buf = self._read(size)
448 self.pos += len(buf)
449 return buf
450
451 def _read(self, size):
452 """Return size bytes from the stream.
453 """
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000454 if self.comptype == "tar":
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000455 return self.__read(size)
456
457 c = len(self.dbuf)
458 t = [self.dbuf]
459 while c < size:
460 buf = self.__read(self.bufsize)
461 if not buf:
462 break
463 buf = self.cmp.decompress(buf)
464 t.append(buf)
465 c += len(buf)
466 t = "".join(t)
467 self.dbuf = t[size:]
468 return t[:size]
469
470 def __read(self, size):
471 """Return size bytes from stream. If internal buffer is empty,
472 read another block from the stream.
473 """
474 c = len(self.buf)
475 t = [self.buf]
476 while c < size:
477 buf = self.fileobj.read(self.bufsize)
478 if not buf:
479 break
480 t.append(buf)
481 c += len(buf)
482 t = "".join(t)
483 self.buf = t[size:]
484 return t[:size]
485# class _Stream
486
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000487class _StreamProxy(object):
488 """Small proxy class that enables transparent compression
489 detection for the Stream interface (mode 'r|*').
490 """
491
492 def __init__(self, fileobj):
493 self.fileobj = fileobj
494 self.buf = self.fileobj.read(BLOCKSIZE)
495
496 def read(self, size):
497 self.read = self.fileobj.read
498 return self.buf
499
500 def getcomptype(self):
501 if self.buf.startswith("\037\213\010"):
502 return "gz"
503 if self.buf.startswith("BZh91"):
504 return "bz2"
505 return "tar"
506
507 def close(self):
508 self.fileobj.close()
509# class StreamProxy
510
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000511#------------------------
512# Extraction file object
513#------------------------
514class ExFileObject(object):
515 """File-like object for reading an archive member.
516 Is returned by TarFile.extractfile(). Support for
517 sparse files included.
518 """
519
520 def __init__(self, tarfile, tarinfo):
521 self.fileobj = tarfile.fileobj
522 self.name = tarinfo.name
523 self.mode = "r"
524 self.closed = False
525 self.offset = tarinfo.offset_data
526 self.size = tarinfo.size
527 self.pos = 0L
528 self.linebuffer = ""
529 if tarinfo.issparse():
530 self.sparse = tarinfo.sparse
531 self.read = self._readsparse
532 else:
533 self.read = self._readnormal
534
535 def __read(self, size):
536 """Overloadable read method.
537 """
538 return self.fileobj.read(size)
539
540 def readline(self, size=-1):
541 """Read a line with approx. size. If size is negative,
542 read a whole line. readline() and read() must not
543 be mixed up (!).
544 """
545 if size < 0:
546 size = sys.maxint
547
548 nl = self.linebuffer.find("\n")
549 if nl >= 0:
550 nl = min(nl, size)
551 else:
552 size -= len(self.linebuffer)
Martin v. Löwisc11d6f12004-08-25 10:52:58 +0000553 while (nl < 0 and size > 0):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000554 buf = self.read(min(size, 100))
555 if not buf:
556 break
557 self.linebuffer += buf
558 size -= len(buf)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000559 nl = self.linebuffer.find("\n")
560 if nl == -1:
561 s = self.linebuffer
562 self.linebuffer = ""
563 return s
564 buf = self.linebuffer[:nl]
565 self.linebuffer = self.linebuffer[nl + 1:]
566 while buf[-1:] == "\r":
567 buf = buf[:-1]
568 return buf + "\n"
569
570 def readlines(self):
571 """Return a list with all (following) lines.
572 """
573 result = []
574 while True:
575 line = self.readline()
576 if not line: break
577 result.append(line)
578 return result
579
580 def _readnormal(self, size=None):
581 """Read operation for regular files.
582 """
583 if self.closed:
584 raise ValueError, "file is closed"
585 self.fileobj.seek(self.offset + self.pos)
586 bytesleft = self.size - self.pos
587 if size is None:
588 bytestoread = bytesleft
589 else:
590 bytestoread = min(size, bytesleft)
591 self.pos += bytestoread
592 return self.__read(bytestoread)
593
594 def _readsparse(self, size=None):
595 """Read operation for sparse files.
596 """
597 if self.closed:
598 raise ValueError, "file is closed"
599
600 if size is None:
601 size = self.size - self.pos
602
603 data = []
604 while size > 0:
605 buf = self._readsparsesection(size)
606 if not buf:
607 break
608 size -= len(buf)
609 data.append(buf)
610 return "".join(data)
611
612 def _readsparsesection(self, size):
613 """Read a single section of a sparse file.
614 """
615 section = self.sparse.find(self.pos)
616
617 if section is None:
618 return ""
619
620 toread = min(size, section.offset + section.size - self.pos)
621 if isinstance(section, _data):
622 realpos = section.realpos + self.pos - section.offset
623 self.pos += toread
624 self.fileobj.seek(self.offset + realpos)
625 return self.__read(toread)
626 else:
627 self.pos += toread
628 return NUL * toread
629
630 def tell(self):
631 """Return the current file position.
632 """
633 return self.pos
634
635 def seek(self, pos, whence=0):
636 """Seek to a position in the file.
637 """
638 self.linebuffer = ""
639 if whence == 0:
640 self.pos = min(max(pos, 0), self.size)
641 if whence == 1:
642 if pos < 0:
643 self.pos = max(self.pos + pos, 0)
644 else:
645 self.pos = min(self.pos + pos, self.size)
646 if whence == 2:
647 self.pos = max(min(self.size + pos, self.size), 0)
648
649 def close(self):
650 """Close the file object.
651 """
652 self.closed = True
Martin v. Löwisdf241532005-03-03 08:17:42 +0000653
654 def __iter__(self):
655 """Get an iterator over the file object.
656 """
657 if self.closed:
658 raise ValueError("I/O operation on closed file")
659 return self
660
661 def next(self):
662 """Get the next item from the file iterator.
663 """
664 result = self.readline()
665 if not result:
666 raise StopIteration
667 return result
Tim Peterseba28be2005-03-28 01:08:02 +0000668
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000669#class ExFileObject
670
671#------------------
672# Exported Classes
673#------------------
674class TarInfo(object):
675 """Informational class which holds the details about an
676 archive member given by a tar header block.
677 TarInfo objects are returned by TarFile.getmember(),
678 TarFile.getmembers() and TarFile.gettarinfo() and are
679 usually created internally.
680 """
681
682 def __init__(self, name=""):
683 """Construct a TarInfo object. name is the optional name
684 of the member.
685 """
686
687 self.name = name # member name (dirnames must end with '/')
688 self.mode = 0666 # file permissions
689 self.uid = 0 # user id
690 self.gid = 0 # group id
691 self.size = 0 # file size
692 self.mtime = 0 # modification time
693 self.chksum = 0 # header checksum
694 self.type = REGTYPE # member type
695 self.linkname = "" # link name
696 self.uname = "user" # user name
697 self.gname = "group" # group name
698 self.devmajor = 0 #-
699 self.devminor = 0 #-for use with CHRTYPE and BLKTYPE
700 self.prefix = "" # prefix to filename or holding information
701 # about sparse files
702
703 self.offset = 0 # the tar header starts here
704 self.offset_data = 0 # the file's data starts here
705
706 def __repr__(self):
707 return "<%s %r at %#x>" % (self.__class__.__name__,self.name,id(self))
708
Guido van Rossum75b64e62005-01-16 00:16:11 +0000709 @classmethod
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000710 def frombuf(cls, buf):
711 """Construct a TarInfo object from a 512 byte string buffer.
712 """
713 tarinfo = cls()
Neal Norwitzd96d1012004-07-20 22:23:02 +0000714 tarinfo.name = nts(buf[0:100])
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000715 tarinfo.mode = int(buf[100:108], 8)
716 tarinfo.uid = int(buf[108:116],8)
717 tarinfo.gid = int(buf[116:124],8)
Neal Norwitzd96d1012004-07-20 22:23:02 +0000718
719 # There are two possible codings for the size field we
720 # have to discriminate, see comment in tobuf() below.
721 if buf[124] != chr(0200):
722 tarinfo.size = long(buf[124:136], 8)
723 else:
724 tarinfo.size = 0L
725 for i in range(11):
726 tarinfo.size <<= 8
727 tarinfo.size += ord(buf[125 + i])
728
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000729 tarinfo.mtime = long(buf[136:148], 8)
730 tarinfo.chksum = int(buf[148:156], 8)
731 tarinfo.type = buf[156:157]
732 tarinfo.linkname = nts(buf[157:257])
733 tarinfo.uname = nts(buf[265:297])
734 tarinfo.gname = nts(buf[297:329])
735 try:
736 tarinfo.devmajor = int(buf[329:337], 8)
737 tarinfo.devminor = int(buf[337:345], 8)
738 except ValueError:
739 tarinfo.devmajor = tarinfo.devmajor = 0
Jack Jansen2b4b5a52003-04-22 22:03:11 +0000740 tarinfo.prefix = buf[345:500]
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000741
742 # The prefix field is used for filenames > 100 in
743 # the POSIX standard.
Jack Jansen2b4b5a52003-04-22 22:03:11 +0000744 # name = prefix + '/' + name
745 if tarinfo.type != GNUTYPE_SPARSE:
746 tarinfo.name = normpath(os.path.join(nts(tarinfo.prefix), tarinfo.name))
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000747
748 # Directory names should have a '/' at the end.
749 if tarinfo.isdir() and tarinfo.name[-1:] != "/":
750 tarinfo.name += "/"
751 return tarinfo
752
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000753 def tobuf(self):
754 """Return a tar header block as a 512 byte string.
755 """
Neal Norwitzd96d1012004-07-20 22:23:02 +0000756 # Prefer the size to be encoded as 11 octal ascii digits
757 # which is the most portable. If the size exceeds this
758 # limit (>= 8 GB), encode it as an 88-bit value which is
759 # a GNU tar feature.
760 if self.size <= MAXSIZE_MEMBER:
761 size = "%011o" % self.size
762 else:
763 s = self.size
764 size = ""
765 for i in range(11):
766 size = chr(s & 0377) + size
767 s >>= 8
768 size = chr(0200) + size
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000769
770 # The following code was contributed by Detlef Lannert.
771 parts = []
772 for value, fieldsize in (
Neal Norwitzd96d1012004-07-20 22:23:02 +0000773 (self.name, 100),
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000774 ("%07o" % (self.mode & 07777), 8),
775 ("%07o" % self.uid, 8),
776 ("%07o" % self.gid, 8),
Neal Norwitzd96d1012004-07-20 22:23:02 +0000777 (size, 12),
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000778 ("%011o" % self.mtime, 12),
779 (" ", 8),
780 (self.type, 1),
781 (self.linkname, 100),
782 (MAGIC, 6),
783 (VERSION, 2),
784 (self.uname, 32),
785 (self.gname, 32),
786 ("%07o" % self.devmajor, 8),
787 ("%07o" % self.devminor, 8),
788 (self.prefix, 155)
789 ):
790 l = len(value)
Andrew M. Kuchling864bba12004-07-10 22:02:11 +0000791 parts.append(value[:fieldsize] + (fieldsize - l) * NUL)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000792
793 buf = "".join(parts)
794 chksum = calc_chksum(buf)
795 buf = buf[:148] + "%06o\0" % chksum + buf[155:]
796 buf += (BLOCKSIZE - len(buf)) * NUL
797 self.buf = buf
798 return buf
799
800 def isreg(self):
801 return self.type in REGULAR_TYPES
802 def isfile(self):
803 return self.isreg()
804 def isdir(self):
805 return self.type == DIRTYPE
806 def issym(self):
807 return self.type == SYMTYPE
808 def islnk(self):
809 return self.type == LNKTYPE
810 def ischr(self):
811 return self.type == CHRTYPE
812 def isblk(self):
813 return self.type == BLKTYPE
814 def isfifo(self):
815 return self.type == FIFOTYPE
816 def issparse(self):
817 return self.type == GNUTYPE_SPARSE
818 def isdev(self):
819 return self.type in (CHRTYPE, BLKTYPE, FIFOTYPE)
820# class TarInfo
821
822class TarFile(object):
823 """The TarFile Class provides an interface to tar archives.
824 """
825
826 debug = 0 # May be set from 0 (no msgs) to 3 (all msgs)
827
828 dereference = False # If true, add content of linked file to the
829 # tar file, else the link.
830
831 ignore_zeros = False # If true, skips empty or invalid blocks and
832 # continues processing.
833
834 errorlevel = 0 # If 0, fatal errors only appear in debug
835 # messages (if debug >= 0). If > 0, errors
836 # are passed to the caller as exceptions.
837
Martin v. Löwis75b9da42004-08-18 13:57:44 +0000838 posix = False # If True, generates POSIX.1-1990-compliant
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000839 # archives (no GNU extensions!)
840
841 fileobject = ExFileObject
842
843 def __init__(self, name=None, mode="r", fileobj=None):
844 """Open an (uncompressed) tar archive `name'. `mode' is either 'r' to
845 read from an existing archive, 'a' to append data to an existing
846 file or 'w' to create a new file overwriting an existing one. `mode'
847 defaults to 'r'.
848 If `fileobj' is given, it is used for reading or writing data. If it
849 can be determined, `mode' is overridden by `fileobj's mode.
850 `fileobj' is not closed, when TarFile is closed.
851 """
Martin v. Löwisbc3b0602005-08-24 06:06:52 +0000852 self.name = os.path.abspath(name)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000853
854 if len(mode) > 1 or mode not in "raw":
855 raise ValueError, "mode must be 'r', 'a' or 'w'"
856 self._mode = mode
857 self.mode = {"r": "rb", "a": "r+b", "w": "wb"}[mode]
858
859 if not fileobj:
860 fileobj = file(self.name, self.mode)
861 self._extfileobj = False
862 else:
863 if self.name is None and hasattr(fileobj, "name"):
Martin v. Löwisbc3b0602005-08-24 06:06:52 +0000864 self.name = os.path.abspath(fileobj.name)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000865 if hasattr(fileobj, "mode"):
866 self.mode = fileobj.mode
867 self._extfileobj = True
868 self.fileobj = fileobj
869
870 # Init datastructures
871 self.closed = False
872 self.members = [] # list of members as TarInfo objects
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000873 self._loaded = False # flag if all members have been read
874 self.offset = 0L # current position in the archive file
875 self.inodes = {} # dictionary caching the inodes of
876 # archive members already added
877
878 if self._mode == "r":
879 self.firstmember = None
880 self.firstmember = self.next()
881
882 if self._mode == "a":
883 # Move to the end of the archive,
884 # before the first empty block.
885 self.firstmember = None
886 while True:
887 try:
888 tarinfo = self.next()
889 except ReadError:
890 self.fileobj.seek(0)
891 break
892 if tarinfo is None:
893 self.fileobj.seek(- BLOCKSIZE, 1)
894 break
895
896 if self._mode in "aw":
897 self._loaded = True
898
899 #--------------------------------------------------------------------------
900 # Below are the classmethods which act as alternate constructors to the
901 # TarFile class. The open() method is the only one that is needed for
902 # public use; it is the "super"-constructor and is able to select an
903 # adequate "sub"-constructor for a particular compression using the mapping
904 # from OPEN_METH.
905 #
906 # This concept allows one to subclass TarFile without losing the comfort of
907 # the super-constructor. A sub-constructor is registered and made available
908 # by adding it to the mapping in OPEN_METH.
909
Guido van Rossum75b64e62005-01-16 00:16:11 +0000910 @classmethod
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000911 def open(cls, name=None, mode="r", fileobj=None, bufsize=20*512):
912 """Open a tar archive for reading, writing or appending. Return
913 an appropriate TarFile class.
914
915 mode:
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000916 'r' or 'r:*' open for reading with transparent compression
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000917 'r:' open for reading exclusively uncompressed
918 'r:gz' open for reading with gzip compression
919 'r:bz2' open for reading with bzip2 compression
920 'a' or 'a:' open for appending
921 'w' or 'w:' open for writing without compression
922 'w:gz' open for writing with gzip compression
923 'w:bz2' open for writing with bzip2 compression
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000924
925 'r|*' open a stream of tar blocks with transparent compression
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000926 'r|' open an uncompressed stream of tar blocks for reading
927 'r|gz' open a gzip compressed stream of tar blocks
928 'r|bz2' open a bzip2 compressed stream of tar blocks
929 'w|' open an uncompressed stream for writing
930 'w|gz' open a gzip compressed stream for writing
931 'w|bz2' open a bzip2 compressed stream for writing
932 """
933
934 if not name and not fileobj:
935 raise ValueError, "nothing to open"
936
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000937 if mode in ("r", "r:*"):
938 # Find out which *open() is appropriate for opening the file.
939 for comptype in cls.OPEN_METH:
940 func = getattr(cls, cls.OPEN_METH[comptype])
941 try:
942 return func(name, "r", fileobj)
943 except (ReadError, CompressionError):
944 continue
945 raise ReadError, "file could not be opened successfully"
946
947 elif ":" in mode:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000948 filemode, comptype = mode.split(":", 1)
949 filemode = filemode or "r"
950 comptype = comptype or "tar"
951
952 # Select the *open() function according to
953 # given compression.
954 if comptype in cls.OPEN_METH:
955 func = getattr(cls, cls.OPEN_METH[comptype])
956 else:
957 raise CompressionError, "unknown compression type %r" % comptype
958 return func(name, filemode, fileobj)
959
960 elif "|" in mode:
961 filemode, comptype = mode.split("|", 1)
962 filemode = filemode or "r"
963 comptype = comptype or "tar"
964
965 if filemode not in "rw":
966 raise ValueError, "mode must be 'r' or 'w'"
967
968 t = cls(name, filemode,
969 _Stream(name, filemode, comptype, fileobj, bufsize))
970 t._extfileobj = False
971 return t
972
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000973 elif mode in "aw":
974 return cls.taropen(name, mode, fileobj)
975
976 raise ValueError, "undiscernible mode"
977
Guido van Rossum75b64e62005-01-16 00:16:11 +0000978 @classmethod
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000979 def taropen(cls, name, mode="r", fileobj=None):
980 """Open uncompressed tar archive name for reading or writing.
981 """
982 if len(mode) > 1 or mode not in "raw":
983 raise ValueError, "mode must be 'r', 'a' or 'w'"
984 return cls(name, mode, fileobj)
985
Guido van Rossum75b64e62005-01-16 00:16:11 +0000986 @classmethod
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000987 def gzopen(cls, name, mode="r", fileobj=None, compresslevel=9):
988 """Open gzip compressed tar archive name for reading or writing.
989 Appending is not allowed.
990 """
991 if len(mode) > 1 or mode not in "rw":
992 raise ValueError, "mode must be 'r' or 'w'"
993
994 try:
995 import gzip
Neal Norwitz4ec68242003-04-11 03:05:56 +0000996 gzip.GzipFile
997 except (ImportError, AttributeError):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000998 raise CompressionError, "gzip module is not available"
999
1000 pre, ext = os.path.splitext(name)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001001 if ext == ".tgz":
1002 ext = ".tar"
1003 if ext == ".gz":
1004 ext = ""
Martin v. Löwisbc3b0602005-08-24 06:06:52 +00001005 tarname = os.path.basename(pre + ext)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001006
1007 if fileobj is None:
1008 fileobj = file(name, mode + "b")
1009
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001010 try:
Martin v. Löwisbc3b0602005-08-24 06:06:52 +00001011 t = cls.taropen(name, mode,
1012 gzip.GzipFile(tarname, mode, compresslevel, fileobj)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001013 )
1014 except IOError:
1015 raise ReadError, "not a gzip file"
1016 t._extfileobj = False
1017 return t
1018
Guido van Rossum75b64e62005-01-16 00:16:11 +00001019 @classmethod
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001020 def bz2open(cls, name, mode="r", fileobj=None, compresslevel=9):
1021 """Open bzip2 compressed tar archive name for reading or writing.
1022 Appending is not allowed.
1023 """
1024 if len(mode) > 1 or mode not in "rw":
1025 raise ValueError, "mode must be 'r' or 'w'."
1026
1027 try:
1028 import bz2
1029 except ImportError:
1030 raise CompressionError, "bz2 module is not available"
1031
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001032 if fileobj is not None:
1033 raise ValueError, "no support for external file objects"
1034
1035 try:
Martin v. Löwisbc3b0602005-08-24 06:06:52 +00001036 t = cls.taropen(name, mode, bz2.BZ2File(name, mode, compresslevel=compresslevel))
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001037 except IOError:
1038 raise ReadError, "not a bzip2 file"
1039 t._extfileobj = False
1040 return t
1041
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001042 # All *open() methods are registered here.
1043 OPEN_METH = {
1044 "tar": "taropen", # uncompressed tar
1045 "gz": "gzopen", # gzip compressed tar
1046 "bz2": "bz2open" # bzip2 compressed tar
1047 }
1048
1049 #--------------------------------------------------------------------------
1050 # The public methods which TarFile provides:
1051
1052 def close(self):
1053 """Close the TarFile. In write-mode, two finishing zero blocks are
1054 appended to the archive.
1055 """
1056 if self.closed:
1057 return
1058
1059 if self._mode in "aw":
1060 self.fileobj.write(NUL * (BLOCKSIZE * 2))
1061 self.offset += (BLOCKSIZE * 2)
1062 # fill up the end with zero-blocks
1063 # (like option -b20 for tar does)
1064 blocks, remainder = divmod(self.offset, RECORDSIZE)
1065 if remainder > 0:
1066 self.fileobj.write(NUL * (RECORDSIZE - remainder))
1067
1068 if not self._extfileobj:
1069 self.fileobj.close()
1070 self.closed = True
1071
1072 def getmember(self, name):
1073 """Return a TarInfo object for member `name'. If `name' can not be
1074 found in the archive, KeyError is raised. If a member occurs more
1075 than once in the archive, its last occurence is assumed to be the
1076 most up-to-date version.
1077 """
Martin v. Löwisf3c56112004-09-18 09:08:52 +00001078 tarinfo = self._getmember(name)
1079 if tarinfo is None:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001080 raise KeyError, "filename %r not found" % name
Martin v. Löwisf3c56112004-09-18 09:08:52 +00001081 return tarinfo
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001082
1083 def getmembers(self):
1084 """Return the members of the archive as a list of TarInfo objects. The
1085 list has the same order as the members in the archive.
1086 """
1087 self._check()
1088 if not self._loaded: # if we want to obtain a list of
1089 self._load() # all members, we first have to
1090 # scan the whole archive.
1091 return self.members
1092
1093 def getnames(self):
1094 """Return the members of the archive as a list of their names. It has
1095 the same order as the list returned by getmembers().
1096 """
Martin v. Löwisf3c56112004-09-18 09:08:52 +00001097 return [tarinfo.name for tarinfo in self.getmembers()]
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001098
1099 def gettarinfo(self, name=None, arcname=None, fileobj=None):
1100 """Create a TarInfo object for either the file `name' or the file
1101 object `fileobj' (using os.fstat on its file descriptor). You can
1102 modify some of the TarInfo's attributes before you add it using
1103 addfile(). If given, `arcname' specifies an alternative name for the
1104 file in the archive.
1105 """
1106 self._check("aw")
1107
1108 # When fileobj is given, replace name by
1109 # fileobj's real name.
1110 if fileobj is not None:
1111 name = fileobj.name
1112
1113 # Building the name of the member in the archive.
1114 # Backward slashes are converted to forward slashes,
1115 # Absolute paths are turned to relative paths.
1116 if arcname is None:
1117 arcname = name
1118 arcname = normpath(arcname)
1119 drv, arcname = os.path.splitdrive(arcname)
1120 while arcname[0:1] == "/":
1121 arcname = arcname[1:]
1122
1123 # Now, fill the TarInfo object with
1124 # information specific for the file.
1125 tarinfo = TarInfo()
1126
1127 # Use os.stat or os.lstat, depending on platform
1128 # and if symlinks shall be resolved.
1129 if fileobj is None:
1130 if hasattr(os, "lstat") and not self.dereference:
1131 statres = os.lstat(name)
1132 else:
1133 statres = os.stat(name)
1134 else:
1135 statres = os.fstat(fileobj.fileno())
1136 linkname = ""
1137
1138 stmd = statres.st_mode
1139 if stat.S_ISREG(stmd):
1140 inode = (statres.st_ino, statres.st_dev)
1141 if inode in self.inodes and not self.dereference:
1142 # Is it a hardlink to an already
1143 # archived file?
1144 type = LNKTYPE
1145 linkname = self.inodes[inode]
1146 else:
1147 # The inode is added only if its valid.
1148 # For win32 it is always 0.
1149 type = REGTYPE
1150 if inode[0]:
1151 self.inodes[inode] = arcname
1152 elif stat.S_ISDIR(stmd):
1153 type = DIRTYPE
1154 if arcname[-1:] != "/":
1155 arcname += "/"
1156 elif stat.S_ISFIFO(stmd):
1157 type = FIFOTYPE
1158 elif stat.S_ISLNK(stmd):
1159 type = SYMTYPE
1160 linkname = os.readlink(name)
1161 elif stat.S_ISCHR(stmd):
1162 type = CHRTYPE
1163 elif stat.S_ISBLK(stmd):
1164 type = BLKTYPE
1165 else:
1166 return None
1167
1168 # Fill the TarInfo object with all
1169 # information we can get.
1170 tarinfo.name = arcname
1171 tarinfo.mode = stmd
1172 tarinfo.uid = statres.st_uid
1173 tarinfo.gid = statres.st_gid
Martin v. Löwis61d77e02004-08-20 06:35:46 +00001174 if stat.S_ISDIR(stmd):
1175 # For a directory, the size must be 0
1176 tarinfo.size = 0
1177 else:
1178 tarinfo.size = statres.st_size
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001179 tarinfo.mtime = statres.st_mtime
1180 tarinfo.type = type
1181 tarinfo.linkname = linkname
1182 if pwd:
1183 try:
1184 tarinfo.uname = pwd.getpwuid(tarinfo.uid)[0]
1185 except KeyError:
1186 pass
1187 if grp:
1188 try:
1189 tarinfo.gname = grp.getgrgid(tarinfo.gid)[0]
1190 except KeyError:
1191 pass
1192
1193 if type in (CHRTYPE, BLKTYPE):
1194 if hasattr(os, "major") and hasattr(os, "minor"):
1195 tarinfo.devmajor = os.major(statres.st_rdev)
1196 tarinfo.devminor = os.minor(statres.st_rdev)
1197 return tarinfo
1198
1199 def list(self, verbose=True):
1200 """Print a table of contents to sys.stdout. If `verbose' is False, only
1201 the names of the members are printed. If it is True, an `ls -l'-like
1202 output is produced.
1203 """
1204 self._check()
1205
1206 for tarinfo in self:
1207 if verbose:
1208 print filemode(tarinfo.mode),
1209 print "%s/%s" % (tarinfo.uname or tarinfo.uid,
1210 tarinfo.gname or tarinfo.gid),
1211 if tarinfo.ischr() or tarinfo.isblk():
1212 print "%10s" % ("%d,%d" \
1213 % (tarinfo.devmajor, tarinfo.devminor)),
1214 else:
1215 print "%10d" % tarinfo.size,
1216 print "%d-%02d-%02d %02d:%02d:%02d" \
1217 % time.localtime(tarinfo.mtime)[:6],
1218
1219 print tarinfo.name,
1220
1221 if verbose:
1222 if tarinfo.issym():
1223 print "->", tarinfo.linkname,
1224 if tarinfo.islnk():
1225 print "link to", tarinfo.linkname,
1226 print
1227
1228 def add(self, name, arcname=None, recursive=True):
1229 """Add the file `name' to the archive. `name' may be any type of file
1230 (directory, fifo, symbolic link, etc.). If given, `arcname'
1231 specifies an alternative name for the file in the archive.
1232 Directories are added recursively by default. This can be avoided by
1233 setting `recursive' to False.
1234 """
1235 self._check("aw")
1236
1237 if arcname is None:
1238 arcname = name
1239
1240 # Skip if somebody tries to archive the archive...
Martin v. Löwisbc3b0602005-08-24 06:06:52 +00001241 if self.name is not None and os.path.samefile(name, self.name):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001242 self._dbg(2, "tarfile: Skipped %r" % name)
1243 return
1244
1245 # Special case: The user wants to add the current
1246 # working directory.
1247 if name == ".":
1248 if recursive:
1249 if arcname == ".":
1250 arcname = ""
1251 for f in os.listdir("."):
1252 self.add(f, os.path.join(arcname, f))
1253 return
1254
1255 self._dbg(1, name)
1256
1257 # Create a TarInfo object from the file.
1258 tarinfo = self.gettarinfo(name, arcname)
1259
1260 if tarinfo is None:
1261 self._dbg(1, "tarfile: Unsupported type %r" % name)
1262 return
1263
1264 # Append the tar header and data to the archive.
1265 if tarinfo.isreg():
1266 f = file(name, "rb")
1267 self.addfile(tarinfo, f)
1268 f.close()
1269
1270 if tarinfo.type in (LNKTYPE, SYMTYPE, FIFOTYPE, CHRTYPE, BLKTYPE):
1271 tarinfo.size = 0L
1272 self.addfile(tarinfo)
1273
1274 if tarinfo.isdir():
1275 self.addfile(tarinfo)
1276 if recursive:
1277 for f in os.listdir(name):
1278 self.add(os.path.join(name, f), os.path.join(arcname, f))
1279
1280 def addfile(self, tarinfo, fileobj=None):
1281 """Add the TarInfo object `tarinfo' to the archive. If `fileobj' is
1282 given, tarinfo.size bytes are read from it and added to the archive.
1283 You can create TarInfo objects using gettarinfo().
1284 On Windows platforms, `fileobj' should always be opened with mode
1285 'rb' to avoid irritation about the file size.
1286 """
1287 self._check("aw")
1288
1289 tarinfo.name = normpath(tarinfo.name)
1290 if tarinfo.isdir():
1291 # directories should end with '/'
1292 tarinfo.name += "/"
1293
1294 if tarinfo.linkname:
1295 tarinfo.linkname = normpath(tarinfo.linkname)
1296
1297 if tarinfo.size > MAXSIZE_MEMBER:
Neal Norwitzd96d1012004-07-20 22:23:02 +00001298 if self.posix:
1299 raise ValueError, "file is too large (>= 8 GB)"
1300 else:
1301 self._dbg(2, "tarfile: Created GNU tar largefile header")
1302
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001303
1304 if len(tarinfo.linkname) > LENGTH_LINK:
1305 if self.posix:
1306 raise ValueError, "linkname is too long (>%d)" \
1307 % (LENGTH_LINK)
1308 else:
1309 self._create_gnulong(tarinfo.linkname, GNUTYPE_LONGLINK)
1310 tarinfo.linkname = tarinfo.linkname[:LENGTH_LINK -1]
1311 self._dbg(2, "tarfile: Created GNU tar extension LONGLINK")
1312
1313 if len(tarinfo.name) > LENGTH_NAME:
1314 if self.posix:
1315 prefix = tarinfo.name[:LENGTH_PREFIX + 1]
1316 while prefix and prefix[-1] != "/":
Tim Peters2c60f7a2003-01-29 03:49:43 +00001317 prefix = prefix[:-1]
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001318
1319 name = tarinfo.name[len(prefix):]
1320 prefix = prefix[:-1]
1321
1322 if not prefix or len(name) > LENGTH_NAME:
1323 raise ValueError, "name is too long (>%d)" \
1324 % (LENGTH_NAME)
1325
1326 tarinfo.name = name
1327 tarinfo.prefix = prefix
1328 else:
1329 self._create_gnulong(tarinfo.name, GNUTYPE_LONGNAME)
1330 tarinfo.name = tarinfo.name[:LENGTH_NAME - 1]
1331 self._dbg(2, "tarfile: Created GNU tar extension LONGNAME")
1332
1333 self.fileobj.write(tarinfo.tobuf())
1334 self.offset += BLOCKSIZE
1335
1336 # If there's data to follow, append it.
1337 if fileobj is not None:
1338 copyfileobj(fileobj, self.fileobj, tarinfo.size)
1339 blocks, remainder = divmod(tarinfo.size, BLOCKSIZE)
1340 if remainder > 0:
1341 self.fileobj.write(NUL * (BLOCKSIZE - remainder))
1342 blocks += 1
1343 self.offset += blocks * BLOCKSIZE
1344
Martin v. Löwisf3c56112004-09-18 09:08:52 +00001345 self.members.append(tarinfo)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001346
Martin v. Löwis00a73e72005-03-04 19:40:34 +00001347 def extractall(self, path=".", members=None):
1348 """Extract all members from the archive to the current working
1349 directory and set owner, modification time and permissions on
1350 directories afterwards. `path' specifies a different directory
1351 to extract to. `members' is optional and must be a subset of the
1352 list returned by getmembers().
1353 """
1354 directories = []
1355
1356 if members is None:
1357 members = self
1358
1359 for tarinfo in members:
1360 if tarinfo.isdir():
1361 # Extract directory with a safe mode, so that
1362 # all files below can be extracted as well.
1363 try:
1364 os.makedirs(os.path.join(path, tarinfo.name), 0777)
1365 except EnvironmentError:
1366 pass
1367 directories.append(tarinfo)
1368 else:
1369 self.extract(tarinfo, path)
1370
1371 # Reverse sort directories.
1372 directories.sort(lambda a, b: cmp(a.name, b.name))
1373 directories.reverse()
1374
1375 # Set correct owner, mtime and filemode on directories.
1376 for tarinfo in directories:
1377 path = os.path.join(path, tarinfo.name)
1378 try:
1379 self.chown(tarinfo, path)
1380 self.utime(tarinfo, path)
1381 self.chmod(tarinfo, path)
1382 except ExtractError, e:
1383 if self.errorlevel > 1:
1384 raise
1385 else:
1386 self._dbg(1, "tarfile: %s" % e)
1387
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001388 def extract(self, member, path=""):
1389 """Extract a member from the archive to the current working directory,
1390 using its full name. Its file information is extracted as accurately
1391 as possible. `member' may be a filename or a TarInfo object. You can
1392 specify a different directory using `path'.
1393 """
1394 self._check("r")
1395
1396 if isinstance(member, TarInfo):
1397 tarinfo = member
1398 else:
1399 tarinfo = self.getmember(member)
1400
Neal Norwitza4f651a2004-07-20 22:07:44 +00001401 # Prepare the link target for makelink().
1402 if tarinfo.islnk():
1403 tarinfo._link_target = os.path.join(path, tarinfo.linkname)
1404
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001405 try:
1406 self._extract_member(tarinfo, os.path.join(path, tarinfo.name))
1407 except EnvironmentError, e:
1408 if self.errorlevel > 0:
1409 raise
1410 else:
1411 if e.filename is None:
1412 self._dbg(1, "tarfile: %s" % e.strerror)
1413 else:
1414 self._dbg(1, "tarfile: %s %r" % (e.strerror, e.filename))
1415 except ExtractError, e:
1416 if self.errorlevel > 1:
1417 raise
1418 else:
1419 self._dbg(1, "tarfile: %s" % e)
1420
1421 def extractfile(self, member):
1422 """Extract a member from the archive as a file object. `member' may be
1423 a filename or a TarInfo object. If `member' is a regular file, a
1424 file-like object is returned. If `member' is a link, a file-like
1425 object is constructed from the link's target. If `member' is none of
1426 the above, None is returned.
1427 The file-like object is read-only and provides the following
1428 methods: read(), readline(), readlines(), seek() and tell()
1429 """
1430 self._check("r")
1431
1432 if isinstance(member, TarInfo):
1433 tarinfo = member
1434 else:
1435 tarinfo = self.getmember(member)
1436
1437 if tarinfo.isreg():
1438 return self.fileobject(self, tarinfo)
1439
1440 elif tarinfo.type not in SUPPORTED_TYPES:
1441 # If a member's type is unknown, it is treated as a
1442 # regular file.
1443 return self.fileobject(self, tarinfo)
1444
1445 elif tarinfo.islnk() or tarinfo.issym():
1446 if isinstance(self.fileobj, _Stream):
1447 # A small but ugly workaround for the case that someone tries
1448 # to extract a (sym)link as a file-object from a non-seekable
1449 # stream of tar blocks.
1450 raise StreamError, "cannot extract (sym)link as file object"
1451 else:
Georg Brandl7eb4b7d2005-07-22 21:49:32 +00001452 # A (sym)link's file object is its target's file object.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001453 return self.extractfile(self._getmember(tarinfo.linkname,
1454 tarinfo))
1455 else:
1456 # If there's no data associated with the member (directory, chrdev,
1457 # blkdev, etc.), return None instead of a file object.
1458 return None
1459
1460 def _extract_member(self, tarinfo, targetpath):
1461 """Extract the TarInfo object tarinfo to a physical
1462 file called targetpath.
1463 """
1464 # Fetch the TarInfo object for the given name
1465 # and build the destination pathname, replacing
1466 # forward slashes to platform specific separators.
1467 if targetpath[-1:] == "/":
1468 targetpath = targetpath[:-1]
1469 targetpath = os.path.normpath(targetpath)
1470
1471 # Create all upper directories.
1472 upperdirs = os.path.dirname(targetpath)
1473 if upperdirs and not os.path.exists(upperdirs):
1474 ti = TarInfo()
1475 ti.name = upperdirs
1476 ti.type = DIRTYPE
1477 ti.mode = 0777
1478 ti.mtime = tarinfo.mtime
1479 ti.uid = tarinfo.uid
1480 ti.gid = tarinfo.gid
1481 ti.uname = tarinfo.uname
1482 ti.gname = tarinfo.gname
1483 try:
1484 self._extract_member(ti, ti.name)
1485 except:
1486 pass
1487
1488 if tarinfo.islnk() or tarinfo.issym():
1489 self._dbg(1, "%s -> %s" % (tarinfo.name, tarinfo.linkname))
1490 else:
1491 self._dbg(1, tarinfo.name)
1492
1493 if tarinfo.isreg():
1494 self.makefile(tarinfo, targetpath)
1495 elif tarinfo.isdir():
1496 self.makedir(tarinfo, targetpath)
1497 elif tarinfo.isfifo():
1498 self.makefifo(tarinfo, targetpath)
1499 elif tarinfo.ischr() or tarinfo.isblk():
1500 self.makedev(tarinfo, targetpath)
1501 elif tarinfo.islnk() or tarinfo.issym():
1502 self.makelink(tarinfo, targetpath)
1503 elif tarinfo.type not in SUPPORTED_TYPES:
1504 self.makeunknown(tarinfo, targetpath)
1505 else:
1506 self.makefile(tarinfo, targetpath)
1507
1508 self.chown(tarinfo, targetpath)
1509 if not tarinfo.issym():
1510 self.chmod(tarinfo, targetpath)
1511 self.utime(tarinfo, targetpath)
1512
1513 #--------------------------------------------------------------------------
1514 # Below are the different file methods. They are called via
1515 # _extract_member() when extract() is called. They can be replaced in a
1516 # subclass to implement other functionality.
1517
1518 def makedir(self, tarinfo, targetpath):
1519 """Make a directory called targetpath.
1520 """
1521 try:
1522 os.mkdir(targetpath)
1523 except EnvironmentError, e:
1524 if e.errno != errno.EEXIST:
1525 raise
1526
1527 def makefile(self, tarinfo, targetpath):
1528 """Make a file called targetpath.
1529 """
1530 source = self.extractfile(tarinfo)
1531 target = file(targetpath, "wb")
1532 copyfileobj(source, target)
1533 source.close()
1534 target.close()
1535
1536 def makeunknown(self, tarinfo, targetpath):
1537 """Make a file from a TarInfo object with an unknown type
1538 at targetpath.
1539 """
1540 self.makefile(tarinfo, targetpath)
1541 self._dbg(1, "tarfile: Unknown file type %r, " \
1542 "extracted as regular file." % tarinfo.type)
1543
1544 def makefifo(self, tarinfo, targetpath):
1545 """Make a fifo called targetpath.
1546 """
1547 if hasattr(os, "mkfifo"):
1548 os.mkfifo(targetpath)
1549 else:
1550 raise ExtractError, "fifo not supported by system"
1551
1552 def makedev(self, tarinfo, targetpath):
1553 """Make a character or block device called targetpath.
1554 """
1555 if not hasattr(os, "mknod") or not hasattr(os, "makedev"):
1556 raise ExtractError, "special devices not supported by system"
1557
1558 mode = tarinfo.mode
1559 if tarinfo.isblk():
1560 mode |= stat.S_IFBLK
1561 else:
1562 mode |= stat.S_IFCHR
1563
1564 os.mknod(targetpath, mode,
1565 os.makedev(tarinfo.devmajor, tarinfo.devminor))
1566
1567 def makelink(self, tarinfo, targetpath):
1568 """Make a (symbolic) link called targetpath. If it cannot be created
1569 (platform limitation), we try to make a copy of the referenced file
1570 instead of a link.
1571 """
1572 linkpath = tarinfo.linkname
1573 try:
1574 if tarinfo.issym():
1575 os.symlink(linkpath, targetpath)
1576 else:
Neal Norwitza4f651a2004-07-20 22:07:44 +00001577 # See extract().
1578 os.link(tarinfo._link_target, targetpath)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001579 except AttributeError:
1580 if tarinfo.issym():
1581 linkpath = os.path.join(os.path.dirname(tarinfo.name),
1582 linkpath)
1583 linkpath = normpath(linkpath)
1584
1585 try:
1586 self._extract_member(self.getmember(linkpath), targetpath)
1587 except (EnvironmentError, KeyError), e:
1588 linkpath = os.path.normpath(linkpath)
1589 try:
1590 shutil.copy2(linkpath, targetpath)
1591 except EnvironmentError, e:
1592 raise IOError, "link could not be created"
1593
1594 def chown(self, tarinfo, targetpath):
1595 """Set owner of targetpath according to tarinfo.
1596 """
1597 if pwd and hasattr(os, "geteuid") and os.geteuid() == 0:
1598 # We have to be root to do so.
1599 try:
1600 g = grp.getgrnam(tarinfo.gname)[2]
1601 except KeyError:
1602 try:
1603 g = grp.getgrgid(tarinfo.gid)[2]
1604 except KeyError:
1605 g = os.getgid()
1606 try:
1607 u = pwd.getpwnam(tarinfo.uname)[2]
1608 except KeyError:
1609 try:
1610 u = pwd.getpwuid(tarinfo.uid)[2]
1611 except KeyError:
1612 u = os.getuid()
1613 try:
1614 if tarinfo.issym() and hasattr(os, "lchown"):
1615 os.lchown(targetpath, u, g)
1616 else:
Andrew MacIntyre7970d202003-02-19 12:51:34 +00001617 if sys.platform != "os2emx":
1618 os.chown(targetpath, u, g)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001619 except EnvironmentError, e:
1620 raise ExtractError, "could not change owner"
1621
1622 def chmod(self, tarinfo, targetpath):
1623 """Set file permissions of targetpath according to tarinfo.
1624 """
Jack Jansen834eff62003-03-07 12:47:06 +00001625 if hasattr(os, 'chmod'):
1626 try:
1627 os.chmod(targetpath, tarinfo.mode)
1628 except EnvironmentError, e:
1629 raise ExtractError, "could not change mode"
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001630
1631 def utime(self, tarinfo, targetpath):
1632 """Set modification time of targetpath according to tarinfo.
1633 """
Jack Jansen834eff62003-03-07 12:47:06 +00001634 if not hasattr(os, 'utime'):
Tim Petersf9347782003-03-07 15:36:41 +00001635 return
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001636 if sys.platform == "win32" and tarinfo.isdir():
1637 # According to msdn.microsoft.com, it is an error (EACCES)
1638 # to use utime() on directories.
1639 return
1640 try:
1641 os.utime(targetpath, (tarinfo.mtime, tarinfo.mtime))
1642 except EnvironmentError, e:
1643 raise ExtractError, "could not change modification time"
1644
1645 #--------------------------------------------------------------------------
1646
1647 def next(self):
1648 """Return the next member of the archive as a TarInfo object, when
1649 TarFile is opened for reading. Return None if there is no more
1650 available.
1651 """
1652 self._check("ra")
1653 if self.firstmember is not None:
1654 m = self.firstmember
1655 self.firstmember = None
1656 return m
1657
1658 # Read the next block.
Andrew M. Kuchling864bba12004-07-10 22:02:11 +00001659 self.fileobj.seek(self.offset)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001660 while True:
1661 buf = self.fileobj.read(BLOCKSIZE)
1662 if not buf:
1663 return None
1664 try:
1665 tarinfo = TarInfo.frombuf(buf)
1666 except ValueError:
1667 if self.ignore_zeros:
1668 if buf.count(NUL) == BLOCKSIZE:
1669 adj = "empty"
1670 else:
1671 adj = "invalid"
1672 self._dbg(2, "0x%X: %s block" % (self.offset, adj))
1673 self.offset += BLOCKSIZE
1674 continue
1675 else:
1676 # Block is empty or unreadable.
Andrew M. Kuchling864bba12004-07-10 22:02:11 +00001677 if self.offset == 0:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001678 # If the first block is invalid. That does not
1679 # look like a tar archive we can handle.
1680 raise ReadError,"empty, unreadable or compressed file"
1681 return None
1682 break
1683
1684 # We shouldn't rely on this checksum, because some tar programs
1685 # calculate it differently and it is merely validating the
1686 # header block. We could just as well skip this part, which would
1687 # have a slight effect on performance...
1688 if tarinfo.chksum != calc_chksum(buf):
1689 self._dbg(1, "tarfile: Bad Checksum %r" % tarinfo.name)
1690
1691 # Set the TarInfo object's offset to the current position of the
1692 # TarFile and set self.offset to the position where the data blocks
1693 # should begin.
1694 tarinfo.offset = self.offset
1695 self.offset += BLOCKSIZE
1696
1697 # Check if the TarInfo object has a typeflag for which a callback
1698 # method is registered in the TYPE_METH. If so, then call it.
1699 if tarinfo.type in self.TYPE_METH:
Andrew M. Kuchling864bba12004-07-10 22:02:11 +00001700 return self.TYPE_METH[tarinfo.type](self, tarinfo)
1701
1702 tarinfo.offset_data = self.offset
1703 if tarinfo.isreg() or tarinfo.type not in SUPPORTED_TYPES:
1704 # Skip the following data blocks.
1705 self.offset += self._block(tarinfo.size)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001706
1707 if tarinfo.isreg() and tarinfo.name[:-1] == "/":
1708 # some old tar programs don't know DIRTYPE
1709 tarinfo.type = DIRTYPE
1710
Martin v. Löwisf3c56112004-09-18 09:08:52 +00001711 self.members.append(tarinfo)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001712 return tarinfo
1713
1714 #--------------------------------------------------------------------------
1715 # Below are some methods which are called for special typeflags in the
1716 # next() method, e.g. for unwrapping GNU longname/longlink blocks. They
1717 # are registered in TYPE_METH below. You can register your own methods
1718 # with this mapping.
1719 # A registered method is called with a TarInfo object as only argument.
1720 #
1721 # During its execution the method MUST perform the following tasks:
1722 # 1. set tarinfo.offset_data to the position where the data blocks begin,
1723 # if there is data to follow.
1724 # 2. set self.offset to the position where the next member's header will
1725 # begin.
Martin v. Löwisf3c56112004-09-18 09:08:52 +00001726 # 3. append the tarinfo object to self.members, if it is supposed to appear
1727 # as a member of the TarFile object.
Andrew M. Kuchling864bba12004-07-10 22:02:11 +00001728 # 4. return tarinfo or another valid TarInfo object.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001729
1730 def proc_gnulong(self, tarinfo):
1731 """Evaluate the blocks that hold a GNU longname
1732 or longlink member.
1733 """
1734 buf = ""
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001735 count = tarinfo.size
1736 while count > 0:
1737 block = self.fileobj.read(BLOCKSIZE)
1738 buf += block
1739 self.offset += BLOCKSIZE
1740 count -= BLOCKSIZE
1741
Andrew M. Kuchling864bba12004-07-10 22:02:11 +00001742 # Fetch the next header
1743 next = self.next()
1744
1745 next.offset = tarinfo.offset
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001746 if tarinfo.type == GNUTYPE_LONGNAME:
Andrew M. Kuchling864bba12004-07-10 22:02:11 +00001747 next.name = nts(buf)
1748 elif tarinfo.type == GNUTYPE_LONGLINK:
1749 next.linkname = nts(buf)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001750
Andrew M. Kuchling864bba12004-07-10 22:02:11 +00001751 return next
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001752
1753 def proc_sparse(self, tarinfo):
1754 """Analyze a GNU sparse header plus extra headers.
1755 """
1756 buf = tarinfo.tobuf()
1757 sp = _ringbuffer()
1758 pos = 386
1759 lastpos = 0L
1760 realpos = 0L
1761 # There are 4 possible sparse structs in the
1762 # first header.
1763 for i in xrange(4):
1764 try:
1765 offset = int(buf[pos:pos + 12], 8)
1766 numbytes = int(buf[pos + 12:pos + 24], 8)
1767 except ValueError:
1768 break
1769 if offset > lastpos:
1770 sp.append(_hole(lastpos, offset - lastpos))
1771 sp.append(_data(offset, numbytes, realpos))
1772 realpos += numbytes
1773 lastpos = offset + numbytes
1774 pos += 24
1775
1776 isextended = ord(buf[482])
1777 origsize = int(buf[483:495], 8)
1778
1779 # If the isextended flag is given,
1780 # there are extra headers to process.
1781 while isextended == 1:
1782 buf = self.fileobj.read(BLOCKSIZE)
1783 self.offset += BLOCKSIZE
1784 pos = 0
1785 for i in xrange(21):
1786 try:
1787 offset = int(buf[pos:pos + 12], 8)
1788 numbytes = int(buf[pos + 12:pos + 24], 8)
1789 except ValueError:
1790 break
1791 if offset > lastpos:
1792 sp.append(_hole(lastpos, offset - lastpos))
1793 sp.append(_data(offset, numbytes, realpos))
1794 realpos += numbytes
1795 lastpos = offset + numbytes
1796 pos += 24
1797 isextended = ord(buf[504])
1798
1799 if lastpos < origsize:
1800 sp.append(_hole(lastpos, origsize - lastpos))
1801
1802 tarinfo.sparse = sp
1803
1804 tarinfo.offset_data = self.offset
1805 self.offset += self._block(tarinfo.size)
1806 tarinfo.size = origsize
Andrew M. Kuchling864bba12004-07-10 22:02:11 +00001807
Martin v. Löwisf3c56112004-09-18 09:08:52 +00001808 self.members.append(tarinfo)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001809 return tarinfo
1810
1811 # The type mapping for the next() method. The keys are single character
1812 # strings, the typeflag. The values are methods which are called when
1813 # next() encounters such a typeflag.
1814 TYPE_METH = {
1815 GNUTYPE_LONGNAME: proc_gnulong,
1816 GNUTYPE_LONGLINK: proc_gnulong,
1817 GNUTYPE_SPARSE: proc_sparse
1818 }
1819
1820 #--------------------------------------------------------------------------
1821 # Little helper methods:
1822
1823 def _block(self, count):
1824 """Round up a byte count by BLOCKSIZE and return it,
1825 e.g. _block(834) => 1024.
1826 """
1827 blocks, remainder = divmod(count, BLOCKSIZE)
1828 if remainder:
1829 blocks += 1
1830 return blocks * BLOCKSIZE
1831
1832 def _getmember(self, name, tarinfo=None):
1833 """Find an archive member by name from bottom to top.
1834 If tarinfo is given, it is used as the starting point.
1835 """
Martin v. Löwisf3c56112004-09-18 09:08:52 +00001836 # Ensure that all members have been loaded.
1837 members = self.getmembers()
1838
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001839 if tarinfo is None:
Martin v. Löwisf3c56112004-09-18 09:08:52 +00001840 end = len(members)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001841 else:
Martin v. Löwisf3c56112004-09-18 09:08:52 +00001842 end = members.index(tarinfo)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001843
1844 for i in xrange(end - 1, -1, -1):
Martin v. Löwisf3c56112004-09-18 09:08:52 +00001845 if name == members[i].name:
1846 return members[i]
Andrew M. Kuchling864bba12004-07-10 22:02:11 +00001847
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001848 def _load(self):
1849 """Read through the entire archive file and look for readable
1850 members.
1851 """
1852 while True:
1853 tarinfo = self.next()
1854 if tarinfo is None:
1855 break
1856 self._loaded = True
1857
1858 def _check(self, mode=None):
1859 """Check if TarFile is still open, and if the operation's mode
1860 corresponds to TarFile's mode.
1861 """
1862 if self.closed:
1863 raise IOError, "%s is closed" % self.__class__.__name__
1864 if mode is not None and self._mode not in mode:
1865 raise IOError, "bad operation for mode %r" % self._mode
1866
1867 def __iter__(self):
1868 """Provide an iterator object.
1869 """
1870 if self._loaded:
1871 return iter(self.members)
1872 else:
1873 return TarIter(self)
1874
1875 def _create_gnulong(self, name, type):
1876 """Write a GNU longname/longlink member to the TarFile.
1877 It consists of an extended tar header, with the length
1878 of the longname as size, followed by data blocks,
1879 which contain the longname as a null terminated string.
1880 """
Neal Norwitz0662f8a2004-07-20 21:54:18 +00001881 name += NUL
1882
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001883 tarinfo = TarInfo()
1884 tarinfo.name = "././@LongLink"
1885 tarinfo.type = type
1886 tarinfo.mode = 0
1887 tarinfo.size = len(name)
1888
1889 # write extended header
1890 self.fileobj.write(tarinfo.tobuf())
Neal Norwitz0662f8a2004-07-20 21:54:18 +00001891 self.offset += BLOCKSIZE
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001892 # write name blocks
1893 self.fileobj.write(name)
1894 blocks, remainder = divmod(tarinfo.size, BLOCKSIZE)
1895 if remainder > 0:
1896 self.fileobj.write(NUL * (BLOCKSIZE - remainder))
1897 blocks += 1
1898 self.offset += blocks * BLOCKSIZE
1899
1900 def _dbg(self, level, msg):
1901 """Write debugging output to sys.stderr.
1902 """
1903 if level <= self.debug:
1904 print >> sys.stderr, msg
1905# class TarFile
1906
1907class TarIter:
1908 """Iterator Class.
1909
1910 for tarinfo in TarFile(...):
1911 suite...
1912 """
1913
1914 def __init__(self, tarfile):
1915 """Construct a TarIter object.
1916 """
1917 self.tarfile = tarfile
Martin v. Löwis637431b2005-03-03 23:12:42 +00001918 self.index = 0
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001919 def __iter__(self):
1920 """Return iterator object.
1921 """
1922 return self
1923 def next(self):
1924 """Return the next item using TarFile's next() method.
1925 When all members have been read, set TarFile as _loaded.
1926 """
Martin v. Löwis637431b2005-03-03 23:12:42 +00001927 # Fix for SF #1100429: Under rare circumstances it can
1928 # happen that getmembers() is called during iteration,
1929 # which will cause TarIter to stop prematurely.
1930 if not self.tarfile._loaded:
1931 tarinfo = self.tarfile.next()
1932 if not tarinfo:
1933 self.tarfile._loaded = True
1934 raise StopIteration
1935 else:
1936 try:
1937 tarinfo = self.tarfile.members[self.index]
1938 except IndexError:
1939 raise StopIteration
1940 self.index += 1
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001941 return tarinfo
1942
1943# Helper classes for sparse file support
1944class _section:
1945 """Base class for _data and _hole.
1946 """
1947 def __init__(self, offset, size):
1948 self.offset = offset
1949 self.size = size
1950 def __contains__(self, offset):
1951 return self.offset <= offset < self.offset + self.size
1952
1953class _data(_section):
1954 """Represent a data section in a sparse file.
1955 """
1956 def __init__(self, offset, size, realpos):
1957 _section.__init__(self, offset, size)
1958 self.realpos = realpos
1959
1960class _hole(_section):
1961 """Represent a hole section in a sparse file.
1962 """
1963 pass
1964
1965class _ringbuffer(list):
1966 """Ringbuffer class which increases performance
1967 over a regular list.
1968 """
1969 def __init__(self):
1970 self.idx = 0
1971 def find(self, offset):
1972 idx = self.idx
1973 while True:
1974 item = self[idx]
1975 if offset in item:
1976 break
1977 idx += 1
1978 if idx == len(self):
1979 idx = 0
1980 if idx == self.idx:
1981 # End of File
1982 return None
1983 self.idx = idx
1984 return item
1985
1986#---------------------------------------------
1987# zipfile compatible TarFile class
1988#---------------------------------------------
1989TAR_PLAIN = 0 # zipfile.ZIP_STORED
1990TAR_GZIPPED = 8 # zipfile.ZIP_DEFLATED
1991class TarFileCompat:
1992 """TarFile class compatible with standard module zipfile's
1993 ZipFile class.
1994 """
1995 def __init__(self, file, mode="r", compression=TAR_PLAIN):
1996 if compression == TAR_PLAIN:
1997 self.tarfile = TarFile.taropen(file, mode)
1998 elif compression == TAR_GZIPPED:
1999 self.tarfile = TarFile.gzopen(file, mode)
2000 else:
2001 raise ValueError, "unknown compression constant"
2002 if mode[0:1] == "r":
2003 members = self.tarfile.getmembers()
2004 for i in xrange(len(members)):
2005 m = members[i]
2006 m.filename = m.name
2007 m.file_size = m.size
2008 m.date_time = time.gmtime(m.mtime)[:6]
2009 def namelist(self):
2010 return map(lambda m: m.name, self.infolist())
2011 def infolist(self):
2012 return filter(lambda m: m.type in REGULAR_TYPES,
2013 self.tarfile.getmembers())
2014 def printdir(self):
2015 self.tarfile.list()
2016 def testzip(self):
2017 return
2018 def getinfo(self, name):
2019 return self.tarfile.getmember(name)
2020 def read(self, name):
2021 return self.tarfile.extractfile(self.tarfile.getmember(name)).read()
2022 def write(self, filename, arcname=None, compress_type=None):
2023 self.tarfile.add(filename, arcname)
2024 def writestr(self, zinfo, bytes):
Raymond Hettingera6172712004-12-31 19:15:26 +00002025 try:
2026 from cStringIO import StringIO
2027 except ImportError:
2028 from StringIO import StringIO
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002029 import calendar
2030 zinfo.name = zinfo.filename
2031 zinfo.size = zinfo.file_size
2032 zinfo.mtime = calendar.timegm(zinfo.date_time)
Raymond Hettingera6172712004-12-31 19:15:26 +00002033 self.tarfile.addfile(zinfo, StringIO(bytes))
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002034 def close(self):
2035 self.tarfile.close()
2036#class TarFileCompat
2037
2038#--------------------
2039# exported functions
2040#--------------------
2041def is_tarfile(name):
2042 """Return True if name points to a tar archive that we
2043 are able to handle, else return False.
2044 """
2045 try:
2046 t = open(name)
2047 t.close()
2048 return True
2049 except TarError:
2050 return False
2051
2052open = TarFile.open