blob: 60259bc4d0e1d1f9a65360cc691b3aeda5c2006a [file] [log] [blame]
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001#!/usr/bin/env python
2# -*- coding: iso-8859-1 -*-
3#-------------------------------------------------------------------
4# tarfile.py
5#-------------------------------------------------------------------
6# Copyright (C) 2002 Lars Gustäbel <lars@gustaebel.de>
7# All rights reserved.
8#
9# Permission is hereby granted, free of charge, to any person
10# obtaining a copy of this software and associated documentation
11# files (the "Software"), to deal in the Software without
12# restriction, including without limitation the rights to use,
13# copy, modify, merge, publish, distribute, sublicense, and/or sell
14# copies of the Software, and to permit persons to whom the
15# Software is furnished to do so, subject to the following
16# conditions:
17#
18# The above copyright notice and this permission notice shall be
19# included in all copies or substantial portions of the Software.
20#
21# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
22# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
23# OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
24# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
25# HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
26# WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
27# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
28# OTHER DEALINGS IN THE SOFTWARE.
29#
30"""Read from and write to tar format archives.
31"""
32
33__version__ = "$Revision$"
34# $Source$
35
36version = "0.6.4"
37__author__ = "Lars Gustäbel (lars@gustaebel.de)"
38__date__ = "$Date$"
39__cvsid__ = "$Id$"
40__credits__ = "Gustavo Niemeyer, Niels Gustäbel, Richard Townsend."
41
42#---------
43# Imports
44#---------
45import sys
46import os
47import shutil
48import stat
49import errno
50import time
51import struct
52
Jack Jansencfc49022003-03-07 13:37:32 +000053if sys.platform == 'mac':
54 # This module needs work for MacOS9, especially in the area of pathname
55 # handling. In many places it is assumed a simple substitution of / by the
56 # local os.path.sep is good enough to convert pathnames, but this does not
57 # work with the mac rooted:path:name versus :nonrooted:path:name syntax
58 raise ImportError, "tarfile does not work for platform==mac"
59
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +000060try:
61 import grp, pwd
62except ImportError:
63 grp = pwd = None
64
65# from tarfile import *
66__all__ = ["TarFile", "TarInfo", "is_tarfile", "TarError"]
67
68#---------------------------------------------------------
69# tar constants
70#---------------------------------------------------------
71NUL = "\0" # the null character
72BLOCKSIZE = 512 # length of processing blocks
73RECORDSIZE = BLOCKSIZE * 20 # length of records
74MAGIC = "ustar" # magic tar string
75VERSION = "00" # version number
76
77LENGTH_NAME = 100 # maximum length of a filename
78LENGTH_LINK = 100 # maximum length of a linkname
79LENGTH_PREFIX = 155 # maximum length of the prefix field
80MAXSIZE_MEMBER = 077777777777L # maximum size of a file (11 octal digits)
81
82REGTYPE = "0" # regular file
83AREGTYPE = "\0" # regular file
84LNKTYPE = "1" # link (inside tarfile)
85SYMTYPE = "2" # symbolic link
86CHRTYPE = "3" # character special device
87BLKTYPE = "4" # block special device
88DIRTYPE = "5" # directory
89FIFOTYPE = "6" # fifo special device
90CONTTYPE = "7" # contiguous file
91
92GNUTYPE_LONGNAME = "L" # GNU tar extension for longnames
93GNUTYPE_LONGLINK = "K" # GNU tar extension for longlink
94GNUTYPE_SPARSE = "S" # GNU tar extension for sparse file
95
96#---------------------------------------------------------
97# tarfile constants
98#---------------------------------------------------------
99SUPPORTED_TYPES = (REGTYPE, AREGTYPE, LNKTYPE, # file types that tarfile
100 SYMTYPE, DIRTYPE, FIFOTYPE, # can cope with.
101 CONTTYPE, CHRTYPE, BLKTYPE,
102 GNUTYPE_LONGNAME, GNUTYPE_LONGLINK,
103 GNUTYPE_SPARSE)
104
105REGULAR_TYPES = (REGTYPE, AREGTYPE, # file types that somehow
106 CONTTYPE, GNUTYPE_SPARSE) # represent regular files
107
108#---------------------------------------------------------
109# Bits used in the mode field, values in octal.
110#---------------------------------------------------------
111S_IFLNK = 0120000 # symbolic link
112S_IFREG = 0100000 # regular file
113S_IFBLK = 0060000 # block device
114S_IFDIR = 0040000 # directory
115S_IFCHR = 0020000 # character device
116S_IFIFO = 0010000 # fifo
117
118TSUID = 04000 # set UID on execution
119TSGID = 02000 # set GID on execution
120TSVTX = 01000 # reserved
121
122TUREAD = 0400 # read by owner
123TUWRITE = 0200 # write by owner
124TUEXEC = 0100 # execute/search by owner
125TGREAD = 0040 # read by group
126TGWRITE = 0020 # write by group
127TGEXEC = 0010 # execute/search by group
128TOREAD = 0004 # read by other
129TOWRITE = 0002 # write by other
130TOEXEC = 0001 # execute/search by other
131
132#---------------------------------------------------------
133# Some useful functions
134#---------------------------------------------------------
135def nts(s):
136 """Convert a null-terminated string buffer to a python string.
137 """
Andrew M. Kuchling864bba12004-07-10 22:02:11 +0000138 return s.rstrip(NUL)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000139
140def calc_chksum(buf):
141 """Calculate the checksum for a member's header. It's a simple addition
142 of all bytes, treating the chksum field as if filled with spaces.
143 buf is a 512 byte long string buffer which holds the header.
144 """
145 chk = 256 # chksum field is treated as blanks,
146 # so the initial value is 8 * ord(" ")
147 for c in buf[:148]: chk += ord(c) # sum up all bytes before chksum
148 for c in buf[156:]: chk += ord(c) # sum up all bytes after chksum
149 return chk
150
151def copyfileobj(src, dst, length=None):
152 """Copy length bytes from fileobj src to fileobj dst.
153 If length is None, copy the entire content.
154 """
155 if length == 0:
156 return
157 if length is None:
158 shutil.copyfileobj(src, dst)
159 return
160
161 BUFSIZE = 16 * 1024
162 blocks, remainder = divmod(length, BUFSIZE)
163 for b in xrange(blocks):
164 buf = src.read(BUFSIZE)
165 if len(buf) < BUFSIZE:
166 raise IOError, "end of file reached"
167 dst.write(buf)
168
169 if remainder != 0:
170 buf = src.read(remainder)
171 if len(buf) < remainder:
172 raise IOError, "end of file reached"
173 dst.write(buf)
174 return
175
176filemode_table = (
Andrew M. Kuchling8bc462f2004-10-20 11:48:42 +0000177 ((S_IFLNK, "l"),
178 (S_IFREG, "-"),
179 (S_IFBLK, "b"),
180 (S_IFDIR, "d"),
181 (S_IFCHR, "c"),
182 (S_IFIFO, "p")),
183
184 ((TUREAD, "r"),),
185 ((TUWRITE, "w"),),
186 ((TUEXEC|TSUID, "s"),
187 (TSUID, "S"),
188 (TUEXEC, "x")),
189
190 ((TGREAD, "r"),),
191 ((TGWRITE, "w"),),
192 ((TGEXEC|TSGID, "s"),
193 (TSGID, "S"),
194 (TGEXEC, "x")),
195
196 ((TOREAD, "r"),),
197 ((TOWRITE, "w"),),
198 ((TOEXEC|TSVTX, "t"),
199 (TSVTX, "T"),
200 (TOEXEC, "x"))
201)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000202
203def filemode(mode):
204 """Convert a file's mode to a string of the form
205 -rwxrwxrwx.
206 Used by TarFile.list()
207 """
Andrew M. Kuchling8bc462f2004-10-20 11:48:42 +0000208 perm = []
209 for table in filemode_table:
210 for bit, char in table:
211 if mode & bit == bit:
212 perm.append(char)
213 break
214 else:
215 perm.append("-")
216 return "".join(perm)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000217
218if os.sep != "/":
219 normpath = lambda path: os.path.normpath(path).replace(os.sep, "/")
220else:
221 normpath = os.path.normpath
222
223class TarError(Exception):
224 """Base exception."""
225 pass
226class ExtractError(TarError):
227 """General exception for extract errors."""
228 pass
229class ReadError(TarError):
230 """Exception for unreadble tar archives."""
231 pass
232class CompressionError(TarError):
233 """Exception for unavailable compression methods."""
234 pass
235class StreamError(TarError):
236 """Exception for unsupported operations on stream-like TarFiles."""
237 pass
238
239#---------------------------
240# internal stream interface
241#---------------------------
242class _LowLevelFile:
243 """Low-level file object. Supports reading and writing.
244 It is used instead of a regular file object for streaming
245 access.
246 """
247
248 def __init__(self, name, mode):
249 mode = {
250 "r": os.O_RDONLY,
251 "w": os.O_WRONLY | os.O_CREAT | os.O_TRUNC,
252 }[mode]
253 if hasattr(os, "O_BINARY"):
254 mode |= os.O_BINARY
255 self.fd = os.open(name, mode)
256
257 def close(self):
258 os.close(self.fd)
259
260 def read(self, size):
261 return os.read(self.fd, size)
262
263 def write(self, s):
264 os.write(self.fd, s)
265
266class _Stream:
267 """Class that serves as an adapter between TarFile and
268 a stream-like object. The stream-like object only
269 needs to have a read() or write() method and is accessed
270 blockwise. Use of gzip or bzip2 compression is possible.
271 A stream-like object could be for example: sys.stdin,
272 sys.stdout, a socket, a tape device etc.
273
274 _Stream is intended to be used only internally.
275 """
276
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000277 def __init__(self, name, mode, comptype, fileobj, bufsize):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000278 """Construct a _Stream object.
279 """
280 self._extfileobj = True
281 if fileobj is None:
282 fileobj = _LowLevelFile(name, mode)
283 self._extfileobj = False
284
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000285 if comptype == '*':
286 # Enable transparent compression detection for the
287 # stream interface
288 fileobj = _StreamProxy(fileobj)
289 comptype = fileobj.getcomptype()
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000290
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000291 self.name = name or ""
292 self.mode = mode
293 self.comptype = comptype
294 self.fileobj = fileobj
295 self.bufsize = bufsize
296 self.buf = ""
297 self.pos = 0L
298 self.closed = False
299
300 if comptype == "gz":
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000301 try:
302 import zlib
303 except ImportError:
304 raise CompressionError, "zlib module is not available"
305 self.zlib = zlib
306 self.crc = zlib.crc32("")
307 if mode == "r":
308 self._init_read_gz()
309 else:
310 self._init_write_gz()
311
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000312 if comptype == "bz2":
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000313 try:
314 import bz2
315 except ImportError:
316 raise CompressionError, "bz2 module is not available"
317 if mode == "r":
318 self.dbuf = ""
319 self.cmp = bz2.BZ2Decompressor()
320 else:
321 self.cmp = bz2.BZ2Compressor()
322
323 def __del__(self):
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000324 if hasattr(self, "closed") and not self.closed:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000325 self.close()
326
327 def _init_write_gz(self):
328 """Initialize for writing with gzip compression.
329 """
330 self.cmp = self.zlib.compressobj(9, self.zlib.DEFLATED,
331 -self.zlib.MAX_WBITS,
332 self.zlib.DEF_MEM_LEVEL,
333 0)
334 timestamp = struct.pack("<L", long(time.time()))
335 self.__write("\037\213\010\010%s\002\377" % timestamp)
336 if self.name.endswith(".gz"):
337 self.name = self.name[:-3]
338 self.__write(self.name + NUL)
339
340 def write(self, s):
341 """Write string s to the stream.
342 """
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000343 if self.comptype == "gz":
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000344 self.crc = self.zlib.crc32(s, self.crc)
345 self.pos += len(s)
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000346 if self.comptype != "tar":
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000347 s = self.cmp.compress(s)
348 self.__write(s)
349
350 def __write(self, s):
351 """Write string s to the stream if a whole new block
352 is ready to be written.
353 """
354 self.buf += s
355 while len(self.buf) > self.bufsize:
356 self.fileobj.write(self.buf[:self.bufsize])
357 self.buf = self.buf[self.bufsize:]
358
359 def close(self):
360 """Close the _Stream object. No operation should be
361 done on it afterwards.
362 """
363 if self.closed:
364 return
365
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000366 if self.mode == "w" and self.comptype != "tar":
Martin v. Löwisc234a522004-08-22 21:28:33 +0000367 self.buf += self.cmp.flush()
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000368
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000369 if self.mode == "w" and self.buf:
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000370 blocks, remainder = divmod(len(self.buf), self.bufsize)
371 if remainder > 0:
372 self.buf += NUL * (self.bufsize - remainder)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000373 self.fileobj.write(self.buf)
374 self.buf = ""
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000375 if self.comptype == "gz":
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000376 self.fileobj.write(struct.pack("<l", self.crc))
Andrew M. Kuchling10a44492003-10-24 17:38:34 +0000377 self.fileobj.write(struct.pack("<L", self.pos & 0xffffFFFFL))
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000378
379 if not self._extfileobj:
380 self.fileobj.close()
381
382 self.closed = True
383
384 def _init_read_gz(self):
385 """Initialize for reading a gzip compressed fileobj.
386 """
387 self.cmp = self.zlib.decompressobj(-self.zlib.MAX_WBITS)
388 self.dbuf = ""
389
390 # taken from gzip.GzipFile with some alterations
391 if self.__read(2) != "\037\213":
392 raise ReadError, "not a gzip file"
393 if self.__read(1) != "\010":
394 raise CompressionError, "unsupported compression method"
395
396 flag = ord(self.__read(1))
397 self.__read(6)
398
399 if flag & 4:
400 xlen = ord(self.__read(1)) + 256 * ord(self.__read(1))
401 self.read(xlen)
402 if flag & 8:
403 while True:
404 s = self.__read(1)
405 if not s or s == NUL:
406 break
407 if flag & 16:
408 while True:
409 s = self.__read(1)
410 if not s or s == NUL:
411 break
412 if flag & 2:
413 self.__read(2)
414
415 def tell(self):
416 """Return the stream's file pointer position.
417 """
418 return self.pos
419
420 def seek(self, pos=0):
421 """Set the stream's file pointer to pos. Negative seeking
422 is forbidden.
423 """
424 if pos - self.pos >= 0:
425 blocks, remainder = divmod(pos - self.pos, self.bufsize)
426 for i in xrange(blocks):
427 self.read(self.bufsize)
428 self.read(remainder)
429 else:
430 raise StreamError, "seeking backwards is not allowed"
431 return self.pos
432
433 def read(self, size=None):
434 """Return the next size number of bytes from the stream.
435 If size is not defined, return all bytes of the stream
436 up to EOF.
437 """
438 if size is None:
439 t = []
440 while True:
441 buf = self._read(self.bufsize)
442 if not buf:
443 break
444 t.append(buf)
445 buf = "".join(t)
446 else:
447 buf = self._read(size)
448 self.pos += len(buf)
449 return buf
450
451 def _read(self, size):
452 """Return size bytes from the stream.
453 """
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000454 if self.comptype == "tar":
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000455 return self.__read(size)
456
457 c = len(self.dbuf)
458 t = [self.dbuf]
459 while c < size:
460 buf = self.__read(self.bufsize)
461 if not buf:
462 break
463 buf = self.cmp.decompress(buf)
464 t.append(buf)
465 c += len(buf)
466 t = "".join(t)
467 self.dbuf = t[size:]
468 return t[:size]
469
470 def __read(self, size):
471 """Return size bytes from stream. If internal buffer is empty,
472 read another block from the stream.
473 """
474 c = len(self.buf)
475 t = [self.buf]
476 while c < size:
477 buf = self.fileobj.read(self.bufsize)
478 if not buf:
479 break
480 t.append(buf)
481 c += len(buf)
482 t = "".join(t)
483 self.buf = t[size:]
484 return t[:size]
485# class _Stream
486
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000487class _StreamProxy(object):
488 """Small proxy class that enables transparent compression
489 detection for the Stream interface (mode 'r|*').
490 """
491
492 def __init__(self, fileobj):
493 self.fileobj = fileobj
494 self.buf = self.fileobj.read(BLOCKSIZE)
495
496 def read(self, size):
497 self.read = self.fileobj.read
498 return self.buf
499
500 def getcomptype(self):
501 if self.buf.startswith("\037\213\010"):
502 return "gz"
503 if self.buf.startswith("BZh91"):
504 return "bz2"
505 return "tar"
506
507 def close(self):
508 self.fileobj.close()
509# class StreamProxy
510
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000511#------------------------
512# Extraction file object
513#------------------------
514class ExFileObject(object):
515 """File-like object for reading an archive member.
516 Is returned by TarFile.extractfile(). Support for
517 sparse files included.
518 """
519
520 def __init__(self, tarfile, tarinfo):
521 self.fileobj = tarfile.fileobj
522 self.name = tarinfo.name
523 self.mode = "r"
524 self.closed = False
525 self.offset = tarinfo.offset_data
526 self.size = tarinfo.size
527 self.pos = 0L
528 self.linebuffer = ""
529 if tarinfo.issparse():
530 self.sparse = tarinfo.sparse
531 self.read = self._readsparse
532 else:
533 self.read = self._readnormal
534
535 def __read(self, size):
536 """Overloadable read method.
537 """
538 return self.fileobj.read(size)
539
540 def readline(self, size=-1):
541 """Read a line with approx. size. If size is negative,
542 read a whole line. readline() and read() must not
543 be mixed up (!).
544 """
545 if size < 0:
546 size = sys.maxint
547
548 nl = self.linebuffer.find("\n")
549 if nl >= 0:
550 nl = min(nl, size)
551 else:
552 size -= len(self.linebuffer)
Martin v. Löwisc11d6f12004-08-25 10:52:58 +0000553 while (nl < 0 and size > 0):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000554 buf = self.read(min(size, 100))
555 if not buf:
556 break
557 self.linebuffer += buf
558 size -= len(buf)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000559 nl = self.linebuffer.find("\n")
560 if nl == -1:
561 s = self.linebuffer
562 self.linebuffer = ""
563 return s
564 buf = self.linebuffer[:nl]
565 self.linebuffer = self.linebuffer[nl + 1:]
566 while buf[-1:] == "\r":
567 buf = buf[:-1]
568 return buf + "\n"
569
570 def readlines(self):
571 """Return a list with all (following) lines.
572 """
573 result = []
574 while True:
575 line = self.readline()
576 if not line: break
577 result.append(line)
578 return result
579
580 def _readnormal(self, size=None):
581 """Read operation for regular files.
582 """
583 if self.closed:
584 raise ValueError, "file is closed"
585 self.fileobj.seek(self.offset + self.pos)
586 bytesleft = self.size - self.pos
587 if size is None:
588 bytestoread = bytesleft
589 else:
590 bytestoread = min(size, bytesleft)
591 self.pos += bytestoread
592 return self.__read(bytestoread)
593
594 def _readsparse(self, size=None):
595 """Read operation for sparse files.
596 """
597 if self.closed:
598 raise ValueError, "file is closed"
599
600 if size is None:
601 size = self.size - self.pos
602
603 data = []
604 while size > 0:
605 buf = self._readsparsesection(size)
606 if not buf:
607 break
608 size -= len(buf)
609 data.append(buf)
610 return "".join(data)
611
612 def _readsparsesection(self, size):
613 """Read a single section of a sparse file.
614 """
615 section = self.sparse.find(self.pos)
616
617 if section is None:
618 return ""
619
620 toread = min(size, section.offset + section.size - self.pos)
621 if isinstance(section, _data):
622 realpos = section.realpos + self.pos - section.offset
623 self.pos += toread
624 self.fileobj.seek(self.offset + realpos)
625 return self.__read(toread)
626 else:
627 self.pos += toread
628 return NUL * toread
629
630 def tell(self):
631 """Return the current file position.
632 """
633 return self.pos
634
635 def seek(self, pos, whence=0):
636 """Seek to a position in the file.
637 """
638 self.linebuffer = ""
639 if whence == 0:
640 self.pos = min(max(pos, 0), self.size)
641 if whence == 1:
642 if pos < 0:
643 self.pos = max(self.pos + pos, 0)
644 else:
645 self.pos = min(self.pos + pos, self.size)
646 if whence == 2:
647 self.pos = max(min(self.size + pos, self.size), 0)
648
649 def close(self):
650 """Close the file object.
651 """
652 self.closed = True
Martin v. Löwisdf241532005-03-03 08:17:42 +0000653
654 def __iter__(self):
655 """Get an iterator over the file object.
656 """
657 if self.closed:
658 raise ValueError("I/O operation on closed file")
659 return self
660
661 def next(self):
662 """Get the next item from the file iterator.
663 """
664 result = self.readline()
665 if not result:
666 raise StopIteration
667 return result
Tim Peterseba28be2005-03-28 01:08:02 +0000668
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000669#class ExFileObject
670
671#------------------
672# Exported Classes
673#------------------
674class TarInfo(object):
675 """Informational class which holds the details about an
676 archive member given by a tar header block.
677 TarInfo objects are returned by TarFile.getmember(),
678 TarFile.getmembers() and TarFile.gettarinfo() and are
679 usually created internally.
680 """
681
682 def __init__(self, name=""):
683 """Construct a TarInfo object. name is the optional name
684 of the member.
685 """
686
687 self.name = name # member name (dirnames must end with '/')
688 self.mode = 0666 # file permissions
689 self.uid = 0 # user id
690 self.gid = 0 # group id
691 self.size = 0 # file size
692 self.mtime = 0 # modification time
693 self.chksum = 0 # header checksum
694 self.type = REGTYPE # member type
695 self.linkname = "" # link name
696 self.uname = "user" # user name
697 self.gname = "group" # group name
698 self.devmajor = 0 #-
699 self.devminor = 0 #-for use with CHRTYPE and BLKTYPE
700 self.prefix = "" # prefix to filename or holding information
701 # about sparse files
702
703 self.offset = 0 # the tar header starts here
704 self.offset_data = 0 # the file's data starts here
705
706 def __repr__(self):
707 return "<%s %r at %#x>" % (self.__class__.__name__,self.name,id(self))
708
Guido van Rossum75b64e62005-01-16 00:16:11 +0000709 @classmethod
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000710 def frombuf(cls, buf):
711 """Construct a TarInfo object from a 512 byte string buffer.
712 """
713 tarinfo = cls()
Neal Norwitzd96d1012004-07-20 22:23:02 +0000714 tarinfo.name = nts(buf[0:100])
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000715 tarinfo.mode = int(buf[100:108], 8)
716 tarinfo.uid = int(buf[108:116],8)
717 tarinfo.gid = int(buf[116:124],8)
Neal Norwitzd96d1012004-07-20 22:23:02 +0000718
719 # There are two possible codings for the size field we
720 # have to discriminate, see comment in tobuf() below.
721 if buf[124] != chr(0200):
722 tarinfo.size = long(buf[124:136], 8)
723 else:
724 tarinfo.size = 0L
725 for i in range(11):
726 tarinfo.size <<= 8
727 tarinfo.size += ord(buf[125 + i])
728
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000729 tarinfo.mtime = long(buf[136:148], 8)
730 tarinfo.chksum = int(buf[148:156], 8)
731 tarinfo.type = buf[156:157]
732 tarinfo.linkname = nts(buf[157:257])
733 tarinfo.uname = nts(buf[265:297])
734 tarinfo.gname = nts(buf[297:329])
735 try:
736 tarinfo.devmajor = int(buf[329:337], 8)
737 tarinfo.devminor = int(buf[337:345], 8)
738 except ValueError:
739 tarinfo.devmajor = tarinfo.devmajor = 0
Jack Jansen2b4b5a52003-04-22 22:03:11 +0000740 tarinfo.prefix = buf[345:500]
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000741
742 # The prefix field is used for filenames > 100 in
743 # the POSIX standard.
Jack Jansen2b4b5a52003-04-22 22:03:11 +0000744 # name = prefix + '/' + name
745 if tarinfo.type != GNUTYPE_SPARSE:
746 tarinfo.name = normpath(os.path.join(nts(tarinfo.prefix), tarinfo.name))
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000747
748 # Directory names should have a '/' at the end.
749 if tarinfo.isdir() and tarinfo.name[-1:] != "/":
750 tarinfo.name += "/"
751 return tarinfo
752
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000753 def tobuf(self):
754 """Return a tar header block as a 512 byte string.
755 """
Neal Norwitzd96d1012004-07-20 22:23:02 +0000756 # Prefer the size to be encoded as 11 octal ascii digits
757 # which is the most portable. If the size exceeds this
758 # limit (>= 8 GB), encode it as an 88-bit value which is
759 # a GNU tar feature.
760 if self.size <= MAXSIZE_MEMBER:
761 size = "%011o" % self.size
762 else:
763 s = self.size
764 size = ""
765 for i in range(11):
766 size = chr(s & 0377) + size
767 s >>= 8
768 size = chr(0200) + size
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000769
770 # The following code was contributed by Detlef Lannert.
771 parts = []
772 for value, fieldsize in (
Neal Norwitzd96d1012004-07-20 22:23:02 +0000773 (self.name, 100),
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000774 ("%07o" % (self.mode & 07777), 8),
775 ("%07o" % self.uid, 8),
776 ("%07o" % self.gid, 8),
Neal Norwitzd96d1012004-07-20 22:23:02 +0000777 (size, 12),
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000778 ("%011o" % self.mtime, 12),
779 (" ", 8),
780 (self.type, 1),
781 (self.linkname, 100),
782 (MAGIC, 6),
783 (VERSION, 2),
784 (self.uname, 32),
785 (self.gname, 32),
786 ("%07o" % self.devmajor, 8),
787 ("%07o" % self.devminor, 8),
788 (self.prefix, 155)
789 ):
790 l = len(value)
Andrew M. Kuchling864bba12004-07-10 22:02:11 +0000791 parts.append(value[:fieldsize] + (fieldsize - l) * NUL)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000792
793 buf = "".join(parts)
794 chksum = calc_chksum(buf)
795 buf = buf[:148] + "%06o\0" % chksum + buf[155:]
796 buf += (BLOCKSIZE - len(buf)) * NUL
797 self.buf = buf
798 return buf
799
800 def isreg(self):
801 return self.type in REGULAR_TYPES
802 def isfile(self):
803 return self.isreg()
804 def isdir(self):
805 return self.type == DIRTYPE
806 def issym(self):
807 return self.type == SYMTYPE
808 def islnk(self):
809 return self.type == LNKTYPE
810 def ischr(self):
811 return self.type == CHRTYPE
812 def isblk(self):
813 return self.type == BLKTYPE
814 def isfifo(self):
815 return self.type == FIFOTYPE
816 def issparse(self):
817 return self.type == GNUTYPE_SPARSE
818 def isdev(self):
819 return self.type in (CHRTYPE, BLKTYPE, FIFOTYPE)
820# class TarInfo
821
822class TarFile(object):
823 """The TarFile Class provides an interface to tar archives.
824 """
825
826 debug = 0 # May be set from 0 (no msgs) to 3 (all msgs)
827
828 dereference = False # If true, add content of linked file to the
829 # tar file, else the link.
830
831 ignore_zeros = False # If true, skips empty or invalid blocks and
832 # continues processing.
833
834 errorlevel = 0 # If 0, fatal errors only appear in debug
835 # messages (if debug >= 0). If > 0, errors
836 # are passed to the caller as exceptions.
837
Martin v. Löwis75b9da42004-08-18 13:57:44 +0000838 posix = False # If True, generates POSIX.1-1990-compliant
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000839 # archives (no GNU extensions!)
840
841 fileobject = ExFileObject
842
843 def __init__(self, name=None, mode="r", fileobj=None):
844 """Open an (uncompressed) tar archive `name'. `mode' is either 'r' to
845 read from an existing archive, 'a' to append data to an existing
846 file or 'w' to create a new file overwriting an existing one. `mode'
847 defaults to 'r'.
848 If `fileobj' is given, it is used for reading or writing data. If it
849 can be determined, `mode' is overridden by `fileobj's mode.
850 `fileobj' is not closed, when TarFile is closed.
851 """
Martin v. Löwisfaffa152005-08-24 06:43:09 +0000852 self.name = name
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000853
854 if len(mode) > 1 or mode not in "raw":
855 raise ValueError, "mode must be 'r', 'a' or 'w'"
856 self._mode = mode
857 self.mode = {"r": "rb", "a": "r+b", "w": "wb"}[mode]
858
859 if not fileobj:
860 fileobj = file(self.name, self.mode)
861 self._extfileobj = False
862 else:
863 if self.name is None and hasattr(fileobj, "name"):
Martin v. Löwisfaffa152005-08-24 06:43:09 +0000864 self.name = fileobj.name
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000865 if hasattr(fileobj, "mode"):
866 self.mode = fileobj.mode
867 self._extfileobj = True
868 self.fileobj = fileobj
869
870 # Init datastructures
871 self.closed = False
872 self.members = [] # list of members as TarInfo objects
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000873 self._loaded = False # flag if all members have been read
874 self.offset = 0L # current position in the archive file
875 self.inodes = {} # dictionary caching the inodes of
876 # archive members already added
877
878 if self._mode == "r":
879 self.firstmember = None
880 self.firstmember = self.next()
881
882 if self._mode == "a":
883 # Move to the end of the archive,
884 # before the first empty block.
885 self.firstmember = None
886 while True:
887 try:
888 tarinfo = self.next()
889 except ReadError:
890 self.fileobj.seek(0)
891 break
892 if tarinfo is None:
893 self.fileobj.seek(- BLOCKSIZE, 1)
894 break
895
896 if self._mode in "aw":
897 self._loaded = True
898
899 #--------------------------------------------------------------------------
900 # Below are the classmethods which act as alternate constructors to the
901 # TarFile class. The open() method is the only one that is needed for
902 # public use; it is the "super"-constructor and is able to select an
903 # adequate "sub"-constructor for a particular compression using the mapping
904 # from OPEN_METH.
905 #
906 # This concept allows one to subclass TarFile without losing the comfort of
907 # the super-constructor. A sub-constructor is registered and made available
908 # by adding it to the mapping in OPEN_METH.
909
Guido van Rossum75b64e62005-01-16 00:16:11 +0000910 @classmethod
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000911 def open(cls, name=None, mode="r", fileobj=None, bufsize=20*512):
912 """Open a tar archive for reading, writing or appending. Return
913 an appropriate TarFile class.
914
915 mode:
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000916 'r' or 'r:*' open for reading with transparent compression
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000917 'r:' open for reading exclusively uncompressed
918 'r:gz' open for reading with gzip compression
919 'r:bz2' open for reading with bzip2 compression
920 'a' or 'a:' open for appending
921 'w' or 'w:' open for writing without compression
922 'w:gz' open for writing with gzip compression
923 'w:bz2' open for writing with bzip2 compression
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000924
925 'r|*' open a stream of tar blocks with transparent compression
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000926 'r|' open an uncompressed stream of tar blocks for reading
927 'r|gz' open a gzip compressed stream of tar blocks
928 'r|bz2' open a bzip2 compressed stream of tar blocks
929 'w|' open an uncompressed stream for writing
930 'w|gz' open a gzip compressed stream for writing
931 'w|bz2' open a bzip2 compressed stream for writing
932 """
933
934 if not name and not fileobj:
935 raise ValueError, "nothing to open"
936
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000937 if mode in ("r", "r:*"):
938 # Find out which *open() is appropriate for opening the file.
939 for comptype in cls.OPEN_METH:
940 func = getattr(cls, cls.OPEN_METH[comptype])
941 try:
942 return func(name, "r", fileobj)
943 except (ReadError, CompressionError):
944 continue
945 raise ReadError, "file could not be opened successfully"
946
947 elif ":" in mode:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000948 filemode, comptype = mode.split(":", 1)
949 filemode = filemode or "r"
950 comptype = comptype or "tar"
951
952 # Select the *open() function according to
953 # given compression.
954 if comptype in cls.OPEN_METH:
955 func = getattr(cls, cls.OPEN_METH[comptype])
956 else:
957 raise CompressionError, "unknown compression type %r" % comptype
958 return func(name, filemode, fileobj)
959
960 elif "|" in mode:
961 filemode, comptype = mode.split("|", 1)
962 filemode = filemode or "r"
963 comptype = comptype or "tar"
964
965 if filemode not in "rw":
966 raise ValueError, "mode must be 'r' or 'w'"
967
968 t = cls(name, filemode,
969 _Stream(name, filemode, comptype, fileobj, bufsize))
970 t._extfileobj = False
971 return t
972
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000973 elif mode in "aw":
974 return cls.taropen(name, mode, fileobj)
975
976 raise ValueError, "undiscernible mode"
977
Guido van Rossum75b64e62005-01-16 00:16:11 +0000978 @classmethod
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000979 def taropen(cls, name, mode="r", fileobj=None):
980 """Open uncompressed tar archive name for reading or writing.
981 """
982 if len(mode) > 1 or mode not in "raw":
983 raise ValueError, "mode must be 'r', 'a' or 'w'"
984 return cls(name, mode, fileobj)
985
Guido van Rossum75b64e62005-01-16 00:16:11 +0000986 @classmethod
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000987 def gzopen(cls, name, mode="r", fileobj=None, compresslevel=9):
988 """Open gzip compressed tar archive name for reading or writing.
989 Appending is not allowed.
990 """
991 if len(mode) > 1 or mode not in "rw":
992 raise ValueError, "mode must be 'r' or 'w'"
993
994 try:
995 import gzip
Neal Norwitz4ec68242003-04-11 03:05:56 +0000996 gzip.GzipFile
997 except (ImportError, AttributeError):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000998 raise CompressionError, "gzip module is not available"
999
1000 pre, ext = os.path.splitext(name)
Martin v. Löwisfaffa152005-08-24 06:43:09 +00001001 pre = os.path.basename(pre)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001002 if ext == ".tgz":
1003 ext = ".tar"
1004 if ext == ".gz":
1005 ext = ""
Martin v. Löwisfaffa152005-08-24 06:43:09 +00001006 tarname = pre + ext
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001007
1008 if fileobj is None:
1009 fileobj = file(name, mode + "b")
1010
Martin v. Löwisfaffa152005-08-24 06:43:09 +00001011 if mode != "r":
1012 name = tarname
1013
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001014 try:
Martin v. Löwisfaffa152005-08-24 06:43:09 +00001015 t = cls.taropen(tarname, mode,
1016 gzip.GzipFile(name, mode, compresslevel, fileobj)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001017 )
1018 except IOError:
1019 raise ReadError, "not a gzip file"
1020 t._extfileobj = False
1021 return t
1022
Guido van Rossum75b64e62005-01-16 00:16:11 +00001023 @classmethod
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001024 def bz2open(cls, name, mode="r", fileobj=None, compresslevel=9):
1025 """Open bzip2 compressed tar archive name for reading or writing.
1026 Appending is not allowed.
1027 """
1028 if len(mode) > 1 or mode not in "rw":
1029 raise ValueError, "mode must be 'r' or 'w'."
1030
1031 try:
1032 import bz2
1033 except ImportError:
1034 raise CompressionError, "bz2 module is not available"
1035
Martin v. Löwisfaffa152005-08-24 06:43:09 +00001036 pre, ext = os.path.splitext(name)
1037 pre = os.path.basename(pre)
1038 if ext == ".tbz2":
1039 ext = ".tar"
1040 if ext == ".bz2":
1041 ext = ""
1042 tarname = pre + ext
1043
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001044 if fileobj is not None:
1045 raise ValueError, "no support for external file objects"
1046
1047 try:
Martin v. Löwisfaffa152005-08-24 06:43:09 +00001048 t = cls.taropen(tarname, mode, bz2.BZ2File(name, mode, compresslevel=compresslevel))
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001049 except IOError:
1050 raise ReadError, "not a bzip2 file"
1051 t._extfileobj = False
1052 return t
1053
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001054 # All *open() methods are registered here.
1055 OPEN_METH = {
1056 "tar": "taropen", # uncompressed tar
1057 "gz": "gzopen", # gzip compressed tar
1058 "bz2": "bz2open" # bzip2 compressed tar
1059 }
1060
1061 #--------------------------------------------------------------------------
1062 # The public methods which TarFile provides:
1063
1064 def close(self):
1065 """Close the TarFile. In write-mode, two finishing zero blocks are
1066 appended to the archive.
1067 """
1068 if self.closed:
1069 return
1070
1071 if self._mode in "aw":
1072 self.fileobj.write(NUL * (BLOCKSIZE * 2))
1073 self.offset += (BLOCKSIZE * 2)
1074 # fill up the end with zero-blocks
1075 # (like option -b20 for tar does)
1076 blocks, remainder = divmod(self.offset, RECORDSIZE)
1077 if remainder > 0:
1078 self.fileobj.write(NUL * (RECORDSIZE - remainder))
1079
1080 if not self._extfileobj:
1081 self.fileobj.close()
1082 self.closed = True
1083
1084 def getmember(self, name):
1085 """Return a TarInfo object for member `name'. If `name' can not be
1086 found in the archive, KeyError is raised. If a member occurs more
1087 than once in the archive, its last occurence is assumed to be the
1088 most up-to-date version.
1089 """
Martin v. Löwisf3c56112004-09-18 09:08:52 +00001090 tarinfo = self._getmember(name)
1091 if tarinfo is None:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001092 raise KeyError, "filename %r not found" % name
Martin v. Löwisf3c56112004-09-18 09:08:52 +00001093 return tarinfo
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001094
1095 def getmembers(self):
1096 """Return the members of the archive as a list of TarInfo objects. The
1097 list has the same order as the members in the archive.
1098 """
1099 self._check()
1100 if not self._loaded: # if we want to obtain a list of
1101 self._load() # all members, we first have to
1102 # scan the whole archive.
1103 return self.members
1104
1105 def getnames(self):
1106 """Return the members of the archive as a list of their names. It has
1107 the same order as the list returned by getmembers().
1108 """
Martin v. Löwisf3c56112004-09-18 09:08:52 +00001109 return [tarinfo.name for tarinfo in self.getmembers()]
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001110
1111 def gettarinfo(self, name=None, arcname=None, fileobj=None):
1112 """Create a TarInfo object for either the file `name' or the file
1113 object `fileobj' (using os.fstat on its file descriptor). You can
1114 modify some of the TarInfo's attributes before you add it using
1115 addfile(). If given, `arcname' specifies an alternative name for the
1116 file in the archive.
1117 """
1118 self._check("aw")
1119
1120 # When fileobj is given, replace name by
1121 # fileobj's real name.
1122 if fileobj is not None:
1123 name = fileobj.name
1124
1125 # Building the name of the member in the archive.
1126 # Backward slashes are converted to forward slashes,
1127 # Absolute paths are turned to relative paths.
1128 if arcname is None:
1129 arcname = name
1130 arcname = normpath(arcname)
1131 drv, arcname = os.path.splitdrive(arcname)
1132 while arcname[0:1] == "/":
1133 arcname = arcname[1:]
1134
1135 # Now, fill the TarInfo object with
1136 # information specific for the file.
1137 tarinfo = TarInfo()
1138
1139 # Use os.stat or os.lstat, depending on platform
1140 # and if symlinks shall be resolved.
1141 if fileobj is None:
1142 if hasattr(os, "lstat") and not self.dereference:
1143 statres = os.lstat(name)
1144 else:
1145 statres = os.stat(name)
1146 else:
1147 statres = os.fstat(fileobj.fileno())
1148 linkname = ""
1149
1150 stmd = statres.st_mode
1151 if stat.S_ISREG(stmd):
1152 inode = (statres.st_ino, statres.st_dev)
1153 if inode in self.inodes and not self.dereference:
1154 # Is it a hardlink to an already
1155 # archived file?
1156 type = LNKTYPE
1157 linkname = self.inodes[inode]
1158 else:
1159 # The inode is added only if its valid.
1160 # For win32 it is always 0.
1161 type = REGTYPE
1162 if inode[0]:
1163 self.inodes[inode] = arcname
1164 elif stat.S_ISDIR(stmd):
1165 type = DIRTYPE
1166 if arcname[-1:] != "/":
1167 arcname += "/"
1168 elif stat.S_ISFIFO(stmd):
1169 type = FIFOTYPE
1170 elif stat.S_ISLNK(stmd):
1171 type = SYMTYPE
1172 linkname = os.readlink(name)
1173 elif stat.S_ISCHR(stmd):
1174 type = CHRTYPE
1175 elif stat.S_ISBLK(stmd):
1176 type = BLKTYPE
1177 else:
1178 return None
1179
1180 # Fill the TarInfo object with all
1181 # information we can get.
Martin v. Löwis5dbdc592005-08-27 10:07:56 +00001182 tarinfo.name = arcname
1183 tarinfo.mode = stmd
1184 tarinfo.uid = statres.st_uid
1185 tarinfo.gid = statres.st_gid
1186 if stat.S_ISREG(stmd):
Martin v. Löwis61d77e02004-08-20 06:35:46 +00001187 tarinfo.size = statres.st_size
Martin v. Löwis5dbdc592005-08-27 10:07:56 +00001188 else:
1189 tarinfo.size = 0L
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001190 tarinfo.mtime = statres.st_mtime
Martin v. Löwis5dbdc592005-08-27 10:07:56 +00001191 tarinfo.type = type
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001192 tarinfo.linkname = linkname
1193 if pwd:
1194 try:
1195 tarinfo.uname = pwd.getpwuid(tarinfo.uid)[0]
1196 except KeyError:
1197 pass
1198 if grp:
1199 try:
1200 tarinfo.gname = grp.getgrgid(tarinfo.gid)[0]
1201 except KeyError:
1202 pass
1203
1204 if type in (CHRTYPE, BLKTYPE):
1205 if hasattr(os, "major") and hasattr(os, "minor"):
1206 tarinfo.devmajor = os.major(statres.st_rdev)
1207 tarinfo.devminor = os.minor(statres.st_rdev)
1208 return tarinfo
1209
1210 def list(self, verbose=True):
1211 """Print a table of contents to sys.stdout. If `verbose' is False, only
1212 the names of the members are printed. If it is True, an `ls -l'-like
1213 output is produced.
1214 """
1215 self._check()
1216
1217 for tarinfo in self:
1218 if verbose:
1219 print filemode(tarinfo.mode),
1220 print "%s/%s" % (tarinfo.uname or tarinfo.uid,
1221 tarinfo.gname or tarinfo.gid),
1222 if tarinfo.ischr() or tarinfo.isblk():
1223 print "%10s" % ("%d,%d" \
1224 % (tarinfo.devmajor, tarinfo.devminor)),
1225 else:
1226 print "%10d" % tarinfo.size,
1227 print "%d-%02d-%02d %02d:%02d:%02d" \
1228 % time.localtime(tarinfo.mtime)[:6],
1229
1230 print tarinfo.name,
1231
1232 if verbose:
1233 if tarinfo.issym():
1234 print "->", tarinfo.linkname,
1235 if tarinfo.islnk():
1236 print "link to", tarinfo.linkname,
1237 print
1238
1239 def add(self, name, arcname=None, recursive=True):
1240 """Add the file `name' to the archive. `name' may be any type of file
1241 (directory, fifo, symbolic link, etc.). If given, `arcname'
1242 specifies an alternative name for the file in the archive.
1243 Directories are added recursively by default. This can be avoided by
1244 setting `recursive' to False.
1245 """
1246 self._check("aw")
1247
1248 if arcname is None:
1249 arcname = name
1250
1251 # Skip if somebody tries to archive the archive...
Martin v. Löwisfaffa152005-08-24 06:43:09 +00001252 if self.name is not None \
1253 and os.path.abspath(name) == os.path.abspath(self.name):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001254 self._dbg(2, "tarfile: Skipped %r" % name)
1255 return
1256
1257 # Special case: The user wants to add the current
1258 # working directory.
1259 if name == ".":
1260 if recursive:
1261 if arcname == ".":
1262 arcname = ""
1263 for f in os.listdir("."):
1264 self.add(f, os.path.join(arcname, f))
1265 return
1266
1267 self._dbg(1, name)
1268
1269 # Create a TarInfo object from the file.
1270 tarinfo = self.gettarinfo(name, arcname)
1271
1272 if tarinfo is None:
1273 self._dbg(1, "tarfile: Unsupported type %r" % name)
1274 return
1275
1276 # Append the tar header and data to the archive.
1277 if tarinfo.isreg():
1278 f = file(name, "rb")
1279 self.addfile(tarinfo, f)
1280 f.close()
1281
Martin v. Löwis5dbdc592005-08-27 10:07:56 +00001282 elif tarinfo.isdir():
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001283 self.addfile(tarinfo)
1284 if recursive:
1285 for f in os.listdir(name):
1286 self.add(os.path.join(name, f), os.path.join(arcname, f))
1287
Martin v. Löwis5dbdc592005-08-27 10:07:56 +00001288 else:
1289 self.addfile(tarinfo)
1290
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001291 def addfile(self, tarinfo, fileobj=None):
1292 """Add the TarInfo object `tarinfo' to the archive. If `fileobj' is
1293 given, tarinfo.size bytes are read from it and added to the archive.
1294 You can create TarInfo objects using gettarinfo().
1295 On Windows platforms, `fileobj' should always be opened with mode
1296 'rb' to avoid irritation about the file size.
1297 """
1298 self._check("aw")
1299
1300 tarinfo.name = normpath(tarinfo.name)
1301 if tarinfo.isdir():
1302 # directories should end with '/'
1303 tarinfo.name += "/"
1304
1305 if tarinfo.linkname:
1306 tarinfo.linkname = normpath(tarinfo.linkname)
1307
1308 if tarinfo.size > MAXSIZE_MEMBER:
Neal Norwitzd96d1012004-07-20 22:23:02 +00001309 if self.posix:
1310 raise ValueError, "file is too large (>= 8 GB)"
1311 else:
1312 self._dbg(2, "tarfile: Created GNU tar largefile header")
1313
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001314
1315 if len(tarinfo.linkname) > LENGTH_LINK:
1316 if self.posix:
1317 raise ValueError, "linkname is too long (>%d)" \
1318 % (LENGTH_LINK)
1319 else:
1320 self._create_gnulong(tarinfo.linkname, GNUTYPE_LONGLINK)
1321 tarinfo.linkname = tarinfo.linkname[:LENGTH_LINK -1]
1322 self._dbg(2, "tarfile: Created GNU tar extension LONGLINK")
1323
1324 if len(tarinfo.name) > LENGTH_NAME:
1325 if self.posix:
1326 prefix = tarinfo.name[:LENGTH_PREFIX + 1]
1327 while prefix and prefix[-1] != "/":
Tim Peters2c60f7a2003-01-29 03:49:43 +00001328 prefix = prefix[:-1]
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001329
1330 name = tarinfo.name[len(prefix):]
1331 prefix = prefix[:-1]
1332
1333 if not prefix or len(name) > LENGTH_NAME:
1334 raise ValueError, "name is too long (>%d)" \
1335 % (LENGTH_NAME)
1336
1337 tarinfo.name = name
1338 tarinfo.prefix = prefix
1339 else:
1340 self._create_gnulong(tarinfo.name, GNUTYPE_LONGNAME)
1341 tarinfo.name = tarinfo.name[:LENGTH_NAME - 1]
1342 self._dbg(2, "tarfile: Created GNU tar extension LONGNAME")
1343
1344 self.fileobj.write(tarinfo.tobuf())
1345 self.offset += BLOCKSIZE
1346
1347 # If there's data to follow, append it.
1348 if fileobj is not None:
1349 copyfileobj(fileobj, self.fileobj, tarinfo.size)
1350 blocks, remainder = divmod(tarinfo.size, BLOCKSIZE)
1351 if remainder > 0:
1352 self.fileobj.write(NUL * (BLOCKSIZE - remainder))
1353 blocks += 1
1354 self.offset += blocks * BLOCKSIZE
1355
Martin v. Löwisf3c56112004-09-18 09:08:52 +00001356 self.members.append(tarinfo)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001357
Martin v. Löwis00a73e72005-03-04 19:40:34 +00001358 def extractall(self, path=".", members=None):
1359 """Extract all members from the archive to the current working
1360 directory and set owner, modification time and permissions on
1361 directories afterwards. `path' specifies a different directory
1362 to extract to. `members' is optional and must be a subset of the
1363 list returned by getmembers().
1364 """
1365 directories = []
1366
1367 if members is None:
1368 members = self
1369
1370 for tarinfo in members:
1371 if tarinfo.isdir():
1372 # Extract directory with a safe mode, so that
1373 # all files below can be extracted as well.
1374 try:
1375 os.makedirs(os.path.join(path, tarinfo.name), 0777)
1376 except EnvironmentError:
1377 pass
1378 directories.append(tarinfo)
1379 else:
1380 self.extract(tarinfo, path)
1381
1382 # Reverse sort directories.
1383 directories.sort(lambda a, b: cmp(a.name, b.name))
1384 directories.reverse()
1385
1386 # Set correct owner, mtime and filemode on directories.
1387 for tarinfo in directories:
1388 path = os.path.join(path, tarinfo.name)
1389 try:
1390 self.chown(tarinfo, path)
1391 self.utime(tarinfo, path)
1392 self.chmod(tarinfo, path)
1393 except ExtractError, e:
1394 if self.errorlevel > 1:
1395 raise
1396 else:
1397 self._dbg(1, "tarfile: %s" % e)
1398
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001399 def extract(self, member, path=""):
1400 """Extract a member from the archive to the current working directory,
1401 using its full name. Its file information is extracted as accurately
1402 as possible. `member' may be a filename or a TarInfo object. You can
1403 specify a different directory using `path'.
1404 """
1405 self._check("r")
1406
1407 if isinstance(member, TarInfo):
1408 tarinfo = member
1409 else:
1410 tarinfo = self.getmember(member)
1411
Neal Norwitza4f651a2004-07-20 22:07:44 +00001412 # Prepare the link target for makelink().
1413 if tarinfo.islnk():
1414 tarinfo._link_target = os.path.join(path, tarinfo.linkname)
1415
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001416 try:
1417 self._extract_member(tarinfo, os.path.join(path, tarinfo.name))
1418 except EnvironmentError, e:
1419 if self.errorlevel > 0:
1420 raise
1421 else:
1422 if e.filename is None:
1423 self._dbg(1, "tarfile: %s" % e.strerror)
1424 else:
1425 self._dbg(1, "tarfile: %s %r" % (e.strerror, e.filename))
1426 except ExtractError, e:
1427 if self.errorlevel > 1:
1428 raise
1429 else:
1430 self._dbg(1, "tarfile: %s" % e)
1431
1432 def extractfile(self, member):
1433 """Extract a member from the archive as a file object. `member' may be
1434 a filename or a TarInfo object. If `member' is a regular file, a
1435 file-like object is returned. If `member' is a link, a file-like
1436 object is constructed from the link's target. If `member' is none of
1437 the above, None is returned.
1438 The file-like object is read-only and provides the following
1439 methods: read(), readline(), readlines(), seek() and tell()
1440 """
1441 self._check("r")
1442
1443 if isinstance(member, TarInfo):
1444 tarinfo = member
1445 else:
1446 tarinfo = self.getmember(member)
1447
1448 if tarinfo.isreg():
1449 return self.fileobject(self, tarinfo)
1450
1451 elif tarinfo.type not in SUPPORTED_TYPES:
1452 # If a member's type is unknown, it is treated as a
1453 # regular file.
1454 return self.fileobject(self, tarinfo)
1455
1456 elif tarinfo.islnk() or tarinfo.issym():
1457 if isinstance(self.fileobj, _Stream):
1458 # A small but ugly workaround for the case that someone tries
1459 # to extract a (sym)link as a file-object from a non-seekable
1460 # stream of tar blocks.
1461 raise StreamError, "cannot extract (sym)link as file object"
1462 else:
Georg Brandl7eb4b7d2005-07-22 21:49:32 +00001463 # A (sym)link's file object is its target's file object.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001464 return self.extractfile(self._getmember(tarinfo.linkname,
1465 tarinfo))
1466 else:
1467 # If there's no data associated with the member (directory, chrdev,
1468 # blkdev, etc.), return None instead of a file object.
1469 return None
1470
1471 def _extract_member(self, tarinfo, targetpath):
1472 """Extract the TarInfo object tarinfo to a physical
1473 file called targetpath.
1474 """
1475 # Fetch the TarInfo object for the given name
1476 # and build the destination pathname, replacing
1477 # forward slashes to platform specific separators.
1478 if targetpath[-1:] == "/":
1479 targetpath = targetpath[:-1]
1480 targetpath = os.path.normpath(targetpath)
1481
1482 # Create all upper directories.
1483 upperdirs = os.path.dirname(targetpath)
1484 if upperdirs and not os.path.exists(upperdirs):
1485 ti = TarInfo()
1486 ti.name = upperdirs
1487 ti.type = DIRTYPE
1488 ti.mode = 0777
1489 ti.mtime = tarinfo.mtime
1490 ti.uid = tarinfo.uid
1491 ti.gid = tarinfo.gid
1492 ti.uname = tarinfo.uname
1493 ti.gname = tarinfo.gname
1494 try:
1495 self._extract_member(ti, ti.name)
1496 except:
1497 pass
1498
1499 if tarinfo.islnk() or tarinfo.issym():
1500 self._dbg(1, "%s -> %s" % (tarinfo.name, tarinfo.linkname))
1501 else:
1502 self._dbg(1, tarinfo.name)
1503
1504 if tarinfo.isreg():
1505 self.makefile(tarinfo, targetpath)
1506 elif tarinfo.isdir():
1507 self.makedir(tarinfo, targetpath)
1508 elif tarinfo.isfifo():
1509 self.makefifo(tarinfo, targetpath)
1510 elif tarinfo.ischr() or tarinfo.isblk():
1511 self.makedev(tarinfo, targetpath)
1512 elif tarinfo.islnk() or tarinfo.issym():
1513 self.makelink(tarinfo, targetpath)
1514 elif tarinfo.type not in SUPPORTED_TYPES:
1515 self.makeunknown(tarinfo, targetpath)
1516 else:
1517 self.makefile(tarinfo, targetpath)
1518
1519 self.chown(tarinfo, targetpath)
1520 if not tarinfo.issym():
1521 self.chmod(tarinfo, targetpath)
1522 self.utime(tarinfo, targetpath)
1523
1524 #--------------------------------------------------------------------------
1525 # Below are the different file methods. They are called via
1526 # _extract_member() when extract() is called. They can be replaced in a
1527 # subclass to implement other functionality.
1528
1529 def makedir(self, tarinfo, targetpath):
1530 """Make a directory called targetpath.
1531 """
1532 try:
1533 os.mkdir(targetpath)
1534 except EnvironmentError, e:
1535 if e.errno != errno.EEXIST:
1536 raise
1537
1538 def makefile(self, tarinfo, targetpath):
1539 """Make a file called targetpath.
1540 """
1541 source = self.extractfile(tarinfo)
1542 target = file(targetpath, "wb")
1543 copyfileobj(source, target)
1544 source.close()
1545 target.close()
1546
1547 def makeunknown(self, tarinfo, targetpath):
1548 """Make a file from a TarInfo object with an unknown type
1549 at targetpath.
1550 """
1551 self.makefile(tarinfo, targetpath)
1552 self._dbg(1, "tarfile: Unknown file type %r, " \
1553 "extracted as regular file." % tarinfo.type)
1554
1555 def makefifo(self, tarinfo, targetpath):
1556 """Make a fifo called targetpath.
1557 """
1558 if hasattr(os, "mkfifo"):
1559 os.mkfifo(targetpath)
1560 else:
1561 raise ExtractError, "fifo not supported by system"
1562
1563 def makedev(self, tarinfo, targetpath):
1564 """Make a character or block device called targetpath.
1565 """
1566 if not hasattr(os, "mknod") or not hasattr(os, "makedev"):
1567 raise ExtractError, "special devices not supported by system"
1568
1569 mode = tarinfo.mode
1570 if tarinfo.isblk():
1571 mode |= stat.S_IFBLK
1572 else:
1573 mode |= stat.S_IFCHR
1574
1575 os.mknod(targetpath, mode,
1576 os.makedev(tarinfo.devmajor, tarinfo.devminor))
1577
1578 def makelink(self, tarinfo, targetpath):
1579 """Make a (symbolic) link called targetpath. If it cannot be created
1580 (platform limitation), we try to make a copy of the referenced file
1581 instead of a link.
1582 """
1583 linkpath = tarinfo.linkname
1584 try:
1585 if tarinfo.issym():
1586 os.symlink(linkpath, targetpath)
1587 else:
Neal Norwitza4f651a2004-07-20 22:07:44 +00001588 # See extract().
1589 os.link(tarinfo._link_target, targetpath)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001590 except AttributeError:
1591 if tarinfo.issym():
1592 linkpath = os.path.join(os.path.dirname(tarinfo.name),
1593 linkpath)
1594 linkpath = normpath(linkpath)
1595
1596 try:
1597 self._extract_member(self.getmember(linkpath), targetpath)
1598 except (EnvironmentError, KeyError), e:
1599 linkpath = os.path.normpath(linkpath)
1600 try:
1601 shutil.copy2(linkpath, targetpath)
1602 except EnvironmentError, e:
1603 raise IOError, "link could not be created"
1604
1605 def chown(self, tarinfo, targetpath):
1606 """Set owner of targetpath according to tarinfo.
1607 """
1608 if pwd and hasattr(os, "geteuid") and os.geteuid() == 0:
1609 # We have to be root to do so.
1610 try:
1611 g = grp.getgrnam(tarinfo.gname)[2]
1612 except KeyError:
1613 try:
1614 g = grp.getgrgid(tarinfo.gid)[2]
1615 except KeyError:
1616 g = os.getgid()
1617 try:
1618 u = pwd.getpwnam(tarinfo.uname)[2]
1619 except KeyError:
1620 try:
1621 u = pwd.getpwuid(tarinfo.uid)[2]
1622 except KeyError:
1623 u = os.getuid()
1624 try:
1625 if tarinfo.issym() and hasattr(os, "lchown"):
1626 os.lchown(targetpath, u, g)
1627 else:
Andrew MacIntyre7970d202003-02-19 12:51:34 +00001628 if sys.platform != "os2emx":
1629 os.chown(targetpath, u, g)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001630 except EnvironmentError, e:
1631 raise ExtractError, "could not change owner"
1632
1633 def chmod(self, tarinfo, targetpath):
1634 """Set file permissions of targetpath according to tarinfo.
1635 """
Jack Jansen834eff62003-03-07 12:47:06 +00001636 if hasattr(os, 'chmod'):
1637 try:
1638 os.chmod(targetpath, tarinfo.mode)
1639 except EnvironmentError, e:
1640 raise ExtractError, "could not change mode"
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001641
1642 def utime(self, tarinfo, targetpath):
1643 """Set modification time of targetpath according to tarinfo.
1644 """
Jack Jansen834eff62003-03-07 12:47:06 +00001645 if not hasattr(os, 'utime'):
Tim Petersf9347782003-03-07 15:36:41 +00001646 return
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001647 if sys.platform == "win32" and tarinfo.isdir():
1648 # According to msdn.microsoft.com, it is an error (EACCES)
1649 # to use utime() on directories.
1650 return
1651 try:
1652 os.utime(targetpath, (tarinfo.mtime, tarinfo.mtime))
1653 except EnvironmentError, e:
1654 raise ExtractError, "could not change modification time"
1655
1656 #--------------------------------------------------------------------------
1657
1658 def next(self):
1659 """Return the next member of the archive as a TarInfo object, when
1660 TarFile is opened for reading. Return None if there is no more
1661 available.
1662 """
1663 self._check("ra")
1664 if self.firstmember is not None:
1665 m = self.firstmember
1666 self.firstmember = None
1667 return m
1668
1669 # Read the next block.
Andrew M. Kuchling864bba12004-07-10 22:02:11 +00001670 self.fileobj.seek(self.offset)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001671 while True:
1672 buf = self.fileobj.read(BLOCKSIZE)
1673 if not buf:
1674 return None
1675 try:
1676 tarinfo = TarInfo.frombuf(buf)
1677 except ValueError:
1678 if self.ignore_zeros:
1679 if buf.count(NUL) == BLOCKSIZE:
1680 adj = "empty"
1681 else:
1682 adj = "invalid"
1683 self._dbg(2, "0x%X: %s block" % (self.offset, adj))
1684 self.offset += BLOCKSIZE
1685 continue
1686 else:
1687 # Block is empty or unreadable.
Andrew M. Kuchling864bba12004-07-10 22:02:11 +00001688 if self.offset == 0:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001689 # If the first block is invalid. That does not
1690 # look like a tar archive we can handle.
1691 raise ReadError,"empty, unreadable or compressed file"
1692 return None
1693 break
1694
1695 # We shouldn't rely on this checksum, because some tar programs
1696 # calculate it differently and it is merely validating the
1697 # header block. We could just as well skip this part, which would
1698 # have a slight effect on performance...
1699 if tarinfo.chksum != calc_chksum(buf):
1700 self._dbg(1, "tarfile: Bad Checksum %r" % tarinfo.name)
1701
1702 # Set the TarInfo object's offset to the current position of the
1703 # TarFile and set self.offset to the position where the data blocks
1704 # should begin.
1705 tarinfo.offset = self.offset
1706 self.offset += BLOCKSIZE
1707
1708 # Check if the TarInfo object has a typeflag for which a callback
1709 # method is registered in the TYPE_METH. If so, then call it.
1710 if tarinfo.type in self.TYPE_METH:
Andrew M. Kuchling864bba12004-07-10 22:02:11 +00001711 return self.TYPE_METH[tarinfo.type](self, tarinfo)
1712
1713 tarinfo.offset_data = self.offset
1714 if tarinfo.isreg() or tarinfo.type not in SUPPORTED_TYPES:
1715 # Skip the following data blocks.
1716 self.offset += self._block(tarinfo.size)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001717
1718 if tarinfo.isreg() and tarinfo.name[:-1] == "/":
1719 # some old tar programs don't know DIRTYPE
1720 tarinfo.type = DIRTYPE
1721
Martin v. Löwisf3c56112004-09-18 09:08:52 +00001722 self.members.append(tarinfo)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001723 return tarinfo
1724
1725 #--------------------------------------------------------------------------
1726 # Below are some methods which are called for special typeflags in the
1727 # next() method, e.g. for unwrapping GNU longname/longlink blocks. They
1728 # are registered in TYPE_METH below. You can register your own methods
1729 # with this mapping.
1730 # A registered method is called with a TarInfo object as only argument.
1731 #
1732 # During its execution the method MUST perform the following tasks:
1733 # 1. set tarinfo.offset_data to the position where the data blocks begin,
1734 # if there is data to follow.
1735 # 2. set self.offset to the position where the next member's header will
1736 # begin.
Martin v. Löwisf3c56112004-09-18 09:08:52 +00001737 # 3. append the tarinfo object to self.members, if it is supposed to appear
1738 # as a member of the TarFile object.
Andrew M. Kuchling864bba12004-07-10 22:02:11 +00001739 # 4. return tarinfo or another valid TarInfo object.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001740
1741 def proc_gnulong(self, tarinfo):
1742 """Evaluate the blocks that hold a GNU longname
1743 or longlink member.
1744 """
1745 buf = ""
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001746 count = tarinfo.size
1747 while count > 0:
1748 block = self.fileobj.read(BLOCKSIZE)
1749 buf += block
1750 self.offset += BLOCKSIZE
1751 count -= BLOCKSIZE
1752
Andrew M. Kuchling864bba12004-07-10 22:02:11 +00001753 # Fetch the next header
1754 next = self.next()
1755
1756 next.offset = tarinfo.offset
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001757 if tarinfo.type == GNUTYPE_LONGNAME:
Andrew M. Kuchling864bba12004-07-10 22:02:11 +00001758 next.name = nts(buf)
1759 elif tarinfo.type == GNUTYPE_LONGLINK:
1760 next.linkname = nts(buf)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001761
Andrew M. Kuchling864bba12004-07-10 22:02:11 +00001762 return next
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001763
1764 def proc_sparse(self, tarinfo):
1765 """Analyze a GNU sparse header plus extra headers.
1766 """
1767 buf = tarinfo.tobuf()
1768 sp = _ringbuffer()
1769 pos = 386
1770 lastpos = 0L
1771 realpos = 0L
1772 # There are 4 possible sparse structs in the
1773 # first header.
1774 for i in xrange(4):
1775 try:
1776 offset = int(buf[pos:pos + 12], 8)
1777 numbytes = int(buf[pos + 12:pos + 24], 8)
1778 except ValueError:
1779 break
1780 if offset > lastpos:
1781 sp.append(_hole(lastpos, offset - lastpos))
1782 sp.append(_data(offset, numbytes, realpos))
1783 realpos += numbytes
1784 lastpos = offset + numbytes
1785 pos += 24
1786
1787 isextended = ord(buf[482])
1788 origsize = int(buf[483:495], 8)
1789
1790 # If the isextended flag is given,
1791 # there are extra headers to process.
1792 while isextended == 1:
1793 buf = self.fileobj.read(BLOCKSIZE)
1794 self.offset += BLOCKSIZE
1795 pos = 0
1796 for i in xrange(21):
1797 try:
1798 offset = int(buf[pos:pos + 12], 8)
1799 numbytes = int(buf[pos + 12:pos + 24], 8)
1800 except ValueError:
1801 break
1802 if offset > lastpos:
1803 sp.append(_hole(lastpos, offset - lastpos))
1804 sp.append(_data(offset, numbytes, realpos))
1805 realpos += numbytes
1806 lastpos = offset + numbytes
1807 pos += 24
1808 isextended = ord(buf[504])
1809
1810 if lastpos < origsize:
1811 sp.append(_hole(lastpos, origsize - lastpos))
1812
1813 tarinfo.sparse = sp
1814
1815 tarinfo.offset_data = self.offset
1816 self.offset += self._block(tarinfo.size)
1817 tarinfo.size = origsize
Andrew M. Kuchling864bba12004-07-10 22:02:11 +00001818
Martin v. Löwisf3c56112004-09-18 09:08:52 +00001819 self.members.append(tarinfo)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001820 return tarinfo
1821
1822 # The type mapping for the next() method. The keys are single character
1823 # strings, the typeflag. The values are methods which are called when
1824 # next() encounters such a typeflag.
1825 TYPE_METH = {
1826 GNUTYPE_LONGNAME: proc_gnulong,
1827 GNUTYPE_LONGLINK: proc_gnulong,
1828 GNUTYPE_SPARSE: proc_sparse
1829 }
1830
1831 #--------------------------------------------------------------------------
1832 # Little helper methods:
1833
1834 def _block(self, count):
1835 """Round up a byte count by BLOCKSIZE and return it,
1836 e.g. _block(834) => 1024.
1837 """
1838 blocks, remainder = divmod(count, BLOCKSIZE)
1839 if remainder:
1840 blocks += 1
1841 return blocks * BLOCKSIZE
1842
1843 def _getmember(self, name, tarinfo=None):
1844 """Find an archive member by name from bottom to top.
1845 If tarinfo is given, it is used as the starting point.
1846 """
Martin v. Löwisf3c56112004-09-18 09:08:52 +00001847 # Ensure that all members have been loaded.
1848 members = self.getmembers()
1849
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001850 if tarinfo is None:
Martin v. Löwisf3c56112004-09-18 09:08:52 +00001851 end = len(members)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001852 else:
Martin v. Löwisf3c56112004-09-18 09:08:52 +00001853 end = members.index(tarinfo)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001854
1855 for i in xrange(end - 1, -1, -1):
Martin v. Löwisf3c56112004-09-18 09:08:52 +00001856 if name == members[i].name:
1857 return members[i]
Andrew M. Kuchling864bba12004-07-10 22:02:11 +00001858
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001859 def _load(self):
1860 """Read through the entire archive file and look for readable
1861 members.
1862 """
1863 while True:
1864 tarinfo = self.next()
1865 if tarinfo is None:
1866 break
1867 self._loaded = True
1868
1869 def _check(self, mode=None):
1870 """Check if TarFile is still open, and if the operation's mode
1871 corresponds to TarFile's mode.
1872 """
1873 if self.closed:
1874 raise IOError, "%s is closed" % self.__class__.__name__
1875 if mode is not None and self._mode not in mode:
1876 raise IOError, "bad operation for mode %r" % self._mode
1877
1878 def __iter__(self):
1879 """Provide an iterator object.
1880 """
1881 if self._loaded:
1882 return iter(self.members)
1883 else:
1884 return TarIter(self)
1885
1886 def _create_gnulong(self, name, type):
1887 """Write a GNU longname/longlink member to the TarFile.
1888 It consists of an extended tar header, with the length
1889 of the longname as size, followed by data blocks,
1890 which contain the longname as a null terminated string.
1891 """
Neal Norwitz0662f8a2004-07-20 21:54:18 +00001892 name += NUL
1893
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001894 tarinfo = TarInfo()
1895 tarinfo.name = "././@LongLink"
1896 tarinfo.type = type
1897 tarinfo.mode = 0
1898 tarinfo.size = len(name)
1899
1900 # write extended header
1901 self.fileobj.write(tarinfo.tobuf())
Neal Norwitz0662f8a2004-07-20 21:54:18 +00001902 self.offset += BLOCKSIZE
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001903 # write name blocks
1904 self.fileobj.write(name)
1905 blocks, remainder = divmod(tarinfo.size, BLOCKSIZE)
1906 if remainder > 0:
1907 self.fileobj.write(NUL * (BLOCKSIZE - remainder))
1908 blocks += 1
1909 self.offset += blocks * BLOCKSIZE
1910
1911 def _dbg(self, level, msg):
1912 """Write debugging output to sys.stderr.
1913 """
1914 if level <= self.debug:
1915 print >> sys.stderr, msg
1916# class TarFile
1917
1918class TarIter:
1919 """Iterator Class.
1920
1921 for tarinfo in TarFile(...):
1922 suite...
1923 """
1924
1925 def __init__(self, tarfile):
1926 """Construct a TarIter object.
1927 """
1928 self.tarfile = tarfile
Martin v. Löwis637431b2005-03-03 23:12:42 +00001929 self.index = 0
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001930 def __iter__(self):
1931 """Return iterator object.
1932 """
1933 return self
1934 def next(self):
1935 """Return the next item using TarFile's next() method.
1936 When all members have been read, set TarFile as _loaded.
1937 """
Martin v. Löwis637431b2005-03-03 23:12:42 +00001938 # Fix for SF #1100429: Under rare circumstances it can
1939 # happen that getmembers() is called during iteration,
1940 # which will cause TarIter to stop prematurely.
1941 if not self.tarfile._loaded:
1942 tarinfo = self.tarfile.next()
1943 if not tarinfo:
1944 self.tarfile._loaded = True
1945 raise StopIteration
1946 else:
1947 try:
1948 tarinfo = self.tarfile.members[self.index]
1949 except IndexError:
1950 raise StopIteration
1951 self.index += 1
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001952 return tarinfo
1953
1954# Helper classes for sparse file support
1955class _section:
1956 """Base class for _data and _hole.
1957 """
1958 def __init__(self, offset, size):
1959 self.offset = offset
1960 self.size = size
1961 def __contains__(self, offset):
1962 return self.offset <= offset < self.offset + self.size
1963
1964class _data(_section):
1965 """Represent a data section in a sparse file.
1966 """
1967 def __init__(self, offset, size, realpos):
1968 _section.__init__(self, offset, size)
1969 self.realpos = realpos
1970
1971class _hole(_section):
1972 """Represent a hole section in a sparse file.
1973 """
1974 pass
1975
1976class _ringbuffer(list):
1977 """Ringbuffer class which increases performance
1978 over a regular list.
1979 """
1980 def __init__(self):
1981 self.idx = 0
1982 def find(self, offset):
1983 idx = self.idx
1984 while True:
1985 item = self[idx]
1986 if offset in item:
1987 break
1988 idx += 1
1989 if idx == len(self):
1990 idx = 0
1991 if idx == self.idx:
1992 # End of File
1993 return None
1994 self.idx = idx
1995 return item
1996
1997#---------------------------------------------
1998# zipfile compatible TarFile class
1999#---------------------------------------------
2000TAR_PLAIN = 0 # zipfile.ZIP_STORED
2001TAR_GZIPPED = 8 # zipfile.ZIP_DEFLATED
2002class TarFileCompat:
2003 """TarFile class compatible with standard module zipfile's
2004 ZipFile class.
2005 """
2006 def __init__(self, file, mode="r", compression=TAR_PLAIN):
2007 if compression == TAR_PLAIN:
2008 self.tarfile = TarFile.taropen(file, mode)
2009 elif compression == TAR_GZIPPED:
2010 self.tarfile = TarFile.gzopen(file, mode)
2011 else:
2012 raise ValueError, "unknown compression constant"
2013 if mode[0:1] == "r":
2014 members = self.tarfile.getmembers()
2015 for i in xrange(len(members)):
2016 m = members[i]
2017 m.filename = m.name
2018 m.file_size = m.size
2019 m.date_time = time.gmtime(m.mtime)[:6]
2020 def namelist(self):
2021 return map(lambda m: m.name, self.infolist())
2022 def infolist(self):
2023 return filter(lambda m: m.type in REGULAR_TYPES,
2024 self.tarfile.getmembers())
2025 def printdir(self):
2026 self.tarfile.list()
2027 def testzip(self):
2028 return
2029 def getinfo(self, name):
2030 return self.tarfile.getmember(name)
2031 def read(self, name):
2032 return self.tarfile.extractfile(self.tarfile.getmember(name)).read()
2033 def write(self, filename, arcname=None, compress_type=None):
2034 self.tarfile.add(filename, arcname)
2035 def writestr(self, zinfo, bytes):
Raymond Hettingera6172712004-12-31 19:15:26 +00002036 try:
2037 from cStringIO import StringIO
2038 except ImportError:
2039 from StringIO import StringIO
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002040 import calendar
2041 zinfo.name = zinfo.filename
2042 zinfo.size = zinfo.file_size
2043 zinfo.mtime = calendar.timegm(zinfo.date_time)
Raymond Hettingera6172712004-12-31 19:15:26 +00002044 self.tarfile.addfile(zinfo, StringIO(bytes))
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002045 def close(self):
2046 self.tarfile.close()
2047#class TarFileCompat
2048
2049#--------------------
2050# exported functions
2051#--------------------
2052def is_tarfile(name):
2053 """Return True if name points to a tar archive that we
2054 are able to handle, else return False.
2055 """
2056 try:
2057 t = open(name)
2058 t.close()
2059 return True
2060 except TarError:
2061 return False
2062
2063open = TarFile.open