blob: c86248c1a4749f4854a98d3b8c2fa58445971fd4 [file] [log] [blame]
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001#!/usr/bin/env python
2# -*- coding: iso-8859-1 -*-
3#-------------------------------------------------------------------
4# tarfile.py
5#-------------------------------------------------------------------
6# Copyright (C) 2002 Lars Gustäbel <lars@gustaebel.de>
7# All rights reserved.
8#
9# Permission is hereby granted, free of charge, to any person
10# obtaining a copy of this software and associated documentation
11# files (the "Software"), to deal in the Software without
12# restriction, including without limitation the rights to use,
13# copy, modify, merge, publish, distribute, sublicense, and/or sell
14# copies of the Software, and to permit persons to whom the
15# Software is furnished to do so, subject to the following
16# conditions:
17#
18# The above copyright notice and this permission notice shall be
19# included in all copies or substantial portions of the Software.
20#
21# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
22# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
23# OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
24# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
25# HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
26# WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
27# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
28# OTHER DEALINGS IN THE SOFTWARE.
29#
30"""Read from and write to tar format archives.
31"""
32
33__version__ = "$Revision$"
34# $Source$
35
36version = "0.6.4"
37__author__ = "Lars Gustäbel (lars@gustaebel.de)"
38__date__ = "$Date$"
39__cvsid__ = "$Id$"
40__credits__ = "Gustavo Niemeyer, Niels Gustäbel, Richard Townsend."
41
42#---------
43# Imports
44#---------
45import sys
46import os
47import shutil
48import stat
49import errno
50import time
51import struct
52
Jack Jansencfc49022003-03-07 13:37:32 +000053if sys.platform == 'mac':
54 # This module needs work for MacOS9, especially in the area of pathname
55 # handling. In many places it is assumed a simple substitution of / by the
56 # local os.path.sep is good enough to convert pathnames, but this does not
57 # work with the mac rooted:path:name versus :nonrooted:path:name syntax
58 raise ImportError, "tarfile does not work for platform==mac"
59
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +000060try:
61 import grp, pwd
62except ImportError:
63 grp = pwd = None
64
65# from tarfile import *
66__all__ = ["TarFile", "TarInfo", "is_tarfile", "TarError"]
67
68#---------------------------------------------------------
69# tar constants
70#---------------------------------------------------------
71NUL = "\0" # the null character
72BLOCKSIZE = 512 # length of processing blocks
73RECORDSIZE = BLOCKSIZE * 20 # length of records
74MAGIC = "ustar" # magic tar string
75VERSION = "00" # version number
76
77LENGTH_NAME = 100 # maximum length of a filename
78LENGTH_LINK = 100 # maximum length of a linkname
79LENGTH_PREFIX = 155 # maximum length of the prefix field
80MAXSIZE_MEMBER = 077777777777L # maximum size of a file (11 octal digits)
81
82REGTYPE = "0" # regular file
83AREGTYPE = "\0" # regular file
84LNKTYPE = "1" # link (inside tarfile)
85SYMTYPE = "2" # symbolic link
86CHRTYPE = "3" # character special device
87BLKTYPE = "4" # block special device
88DIRTYPE = "5" # directory
89FIFOTYPE = "6" # fifo special device
90CONTTYPE = "7" # contiguous file
91
92GNUTYPE_LONGNAME = "L" # GNU tar extension for longnames
93GNUTYPE_LONGLINK = "K" # GNU tar extension for longlink
94GNUTYPE_SPARSE = "S" # GNU tar extension for sparse file
95
96#---------------------------------------------------------
97# tarfile constants
98#---------------------------------------------------------
99SUPPORTED_TYPES = (REGTYPE, AREGTYPE, LNKTYPE, # file types that tarfile
100 SYMTYPE, DIRTYPE, FIFOTYPE, # can cope with.
101 CONTTYPE, CHRTYPE, BLKTYPE,
102 GNUTYPE_LONGNAME, GNUTYPE_LONGLINK,
103 GNUTYPE_SPARSE)
104
105REGULAR_TYPES = (REGTYPE, AREGTYPE, # file types that somehow
106 CONTTYPE, GNUTYPE_SPARSE) # represent regular files
107
108#---------------------------------------------------------
109# Bits used in the mode field, values in octal.
110#---------------------------------------------------------
111S_IFLNK = 0120000 # symbolic link
112S_IFREG = 0100000 # regular file
113S_IFBLK = 0060000 # block device
114S_IFDIR = 0040000 # directory
115S_IFCHR = 0020000 # character device
116S_IFIFO = 0010000 # fifo
117
118TSUID = 04000 # set UID on execution
119TSGID = 02000 # set GID on execution
120TSVTX = 01000 # reserved
121
122TUREAD = 0400 # read by owner
123TUWRITE = 0200 # write by owner
124TUEXEC = 0100 # execute/search by owner
125TGREAD = 0040 # read by group
126TGWRITE = 0020 # write by group
127TGEXEC = 0010 # execute/search by group
128TOREAD = 0004 # read by other
129TOWRITE = 0002 # write by other
130TOEXEC = 0001 # execute/search by other
131
132#---------------------------------------------------------
133# Some useful functions
134#---------------------------------------------------------
135def nts(s):
136 """Convert a null-terminated string buffer to a python string.
137 """
Andrew M. Kuchling864bba12004-07-10 22:02:11 +0000138 return s.rstrip(NUL)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000139
140def calc_chksum(buf):
141 """Calculate the checksum for a member's header. It's a simple addition
142 of all bytes, treating the chksum field as if filled with spaces.
143 buf is a 512 byte long string buffer which holds the header.
144 """
145 chk = 256 # chksum field is treated as blanks,
146 # so the initial value is 8 * ord(" ")
147 for c in buf[:148]: chk += ord(c) # sum up all bytes before chksum
148 for c in buf[156:]: chk += ord(c) # sum up all bytes after chksum
149 return chk
150
151def copyfileobj(src, dst, length=None):
152 """Copy length bytes from fileobj src to fileobj dst.
153 If length is None, copy the entire content.
154 """
155 if length == 0:
156 return
157 if length is None:
158 shutil.copyfileobj(src, dst)
159 return
160
161 BUFSIZE = 16 * 1024
162 blocks, remainder = divmod(length, BUFSIZE)
163 for b in xrange(blocks):
164 buf = src.read(BUFSIZE)
165 if len(buf) < BUFSIZE:
166 raise IOError, "end of file reached"
167 dst.write(buf)
168
169 if remainder != 0:
170 buf = src.read(remainder)
171 if len(buf) < remainder:
172 raise IOError, "end of file reached"
173 dst.write(buf)
174 return
175
176filemode_table = (
Andrew M. Kuchling8bc462f2004-10-20 11:48:42 +0000177 ((S_IFLNK, "l"),
178 (S_IFREG, "-"),
179 (S_IFBLK, "b"),
180 (S_IFDIR, "d"),
181 (S_IFCHR, "c"),
182 (S_IFIFO, "p")),
183
184 ((TUREAD, "r"),),
185 ((TUWRITE, "w"),),
186 ((TUEXEC|TSUID, "s"),
187 (TSUID, "S"),
188 (TUEXEC, "x")),
189
190 ((TGREAD, "r"),),
191 ((TGWRITE, "w"),),
192 ((TGEXEC|TSGID, "s"),
193 (TSGID, "S"),
194 (TGEXEC, "x")),
195
196 ((TOREAD, "r"),),
197 ((TOWRITE, "w"),),
198 ((TOEXEC|TSVTX, "t"),
199 (TSVTX, "T"),
200 (TOEXEC, "x"))
201)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000202
203def filemode(mode):
204 """Convert a file's mode to a string of the form
205 -rwxrwxrwx.
206 Used by TarFile.list()
207 """
Andrew M. Kuchling8bc462f2004-10-20 11:48:42 +0000208 perm = []
209 for table in filemode_table:
210 for bit, char in table:
211 if mode & bit == bit:
212 perm.append(char)
213 break
214 else:
215 perm.append("-")
216 return "".join(perm)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000217
218if os.sep != "/":
219 normpath = lambda path: os.path.normpath(path).replace(os.sep, "/")
220else:
221 normpath = os.path.normpath
222
223class TarError(Exception):
224 """Base exception."""
225 pass
226class ExtractError(TarError):
227 """General exception for extract errors."""
228 pass
229class ReadError(TarError):
230 """Exception for unreadble tar archives."""
231 pass
232class CompressionError(TarError):
233 """Exception for unavailable compression methods."""
234 pass
235class StreamError(TarError):
236 """Exception for unsupported operations on stream-like TarFiles."""
237 pass
238
239#---------------------------
240# internal stream interface
241#---------------------------
242class _LowLevelFile:
243 """Low-level file object. Supports reading and writing.
244 It is used instead of a regular file object for streaming
245 access.
246 """
247
248 def __init__(self, name, mode):
249 mode = {
250 "r": os.O_RDONLY,
251 "w": os.O_WRONLY | os.O_CREAT | os.O_TRUNC,
252 }[mode]
253 if hasattr(os, "O_BINARY"):
254 mode |= os.O_BINARY
255 self.fd = os.open(name, mode)
256
257 def close(self):
258 os.close(self.fd)
259
260 def read(self, size):
261 return os.read(self.fd, size)
262
263 def write(self, s):
264 os.write(self.fd, s)
265
266class _Stream:
267 """Class that serves as an adapter between TarFile and
268 a stream-like object. The stream-like object only
269 needs to have a read() or write() method and is accessed
270 blockwise. Use of gzip or bzip2 compression is possible.
271 A stream-like object could be for example: sys.stdin,
272 sys.stdout, a socket, a tape device etc.
273
274 _Stream is intended to be used only internally.
275 """
276
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000277 def __init__(self, name, mode, comptype, fileobj, bufsize):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000278 """Construct a _Stream object.
279 """
280 self._extfileobj = True
281 if fileobj is None:
282 fileobj = _LowLevelFile(name, mode)
283 self._extfileobj = False
284
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000285 if comptype == '*':
286 # Enable transparent compression detection for the
287 # stream interface
288 fileobj = _StreamProxy(fileobj)
289 comptype = fileobj.getcomptype()
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000290
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000291 self.name = name or ""
292 self.mode = mode
293 self.comptype = comptype
294 self.fileobj = fileobj
295 self.bufsize = bufsize
296 self.buf = ""
297 self.pos = 0L
298 self.closed = False
299
300 if comptype == "gz":
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000301 try:
302 import zlib
303 except ImportError:
304 raise CompressionError, "zlib module is not available"
305 self.zlib = zlib
306 self.crc = zlib.crc32("")
307 if mode == "r":
308 self._init_read_gz()
309 else:
310 self._init_write_gz()
311
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000312 if comptype == "bz2":
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000313 try:
314 import bz2
315 except ImportError:
316 raise CompressionError, "bz2 module is not available"
317 if mode == "r":
318 self.dbuf = ""
319 self.cmp = bz2.BZ2Decompressor()
320 else:
321 self.cmp = bz2.BZ2Compressor()
322
323 def __del__(self):
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000324 if hasattr(self, "closed") and not self.closed:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000325 self.close()
326
327 def _init_write_gz(self):
328 """Initialize for writing with gzip compression.
329 """
330 self.cmp = self.zlib.compressobj(9, self.zlib.DEFLATED,
331 -self.zlib.MAX_WBITS,
332 self.zlib.DEF_MEM_LEVEL,
333 0)
334 timestamp = struct.pack("<L", long(time.time()))
335 self.__write("\037\213\010\010%s\002\377" % timestamp)
336 if self.name.endswith(".gz"):
337 self.name = self.name[:-3]
338 self.__write(self.name + NUL)
339
340 def write(self, s):
341 """Write string s to the stream.
342 """
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000343 if self.comptype == "gz":
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000344 self.crc = self.zlib.crc32(s, self.crc)
345 self.pos += len(s)
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000346 if self.comptype != "tar":
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000347 s = self.cmp.compress(s)
348 self.__write(s)
349
350 def __write(self, s):
351 """Write string s to the stream if a whole new block
352 is ready to be written.
353 """
354 self.buf += s
355 while len(self.buf) > self.bufsize:
356 self.fileobj.write(self.buf[:self.bufsize])
357 self.buf = self.buf[self.bufsize:]
358
359 def close(self):
360 """Close the _Stream object. No operation should be
361 done on it afterwards.
362 """
363 if self.closed:
364 return
365
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000366 if self.mode == "w" and self.comptype != "tar":
Martin v. Löwisc234a522004-08-22 21:28:33 +0000367 self.buf += self.cmp.flush()
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000368
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000369 if self.mode == "w" and self.buf:
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000370 blocks, remainder = divmod(len(self.buf), self.bufsize)
371 if remainder > 0:
372 self.buf += NUL * (self.bufsize - remainder)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000373 self.fileobj.write(self.buf)
374 self.buf = ""
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000375 if self.comptype == "gz":
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000376 self.fileobj.write(struct.pack("<l", self.crc))
Andrew M. Kuchling10a44492003-10-24 17:38:34 +0000377 self.fileobj.write(struct.pack("<L", self.pos & 0xffffFFFFL))
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000378
379 if not self._extfileobj:
380 self.fileobj.close()
381
382 self.closed = True
383
384 def _init_read_gz(self):
385 """Initialize for reading a gzip compressed fileobj.
386 """
387 self.cmp = self.zlib.decompressobj(-self.zlib.MAX_WBITS)
388 self.dbuf = ""
389
390 # taken from gzip.GzipFile with some alterations
391 if self.__read(2) != "\037\213":
392 raise ReadError, "not a gzip file"
393 if self.__read(1) != "\010":
394 raise CompressionError, "unsupported compression method"
395
396 flag = ord(self.__read(1))
397 self.__read(6)
398
399 if flag & 4:
400 xlen = ord(self.__read(1)) + 256 * ord(self.__read(1))
401 self.read(xlen)
402 if flag & 8:
403 while True:
404 s = self.__read(1)
405 if not s or s == NUL:
406 break
407 if flag & 16:
408 while True:
409 s = self.__read(1)
410 if not s or s == NUL:
411 break
412 if flag & 2:
413 self.__read(2)
414
415 def tell(self):
416 """Return the stream's file pointer position.
417 """
418 return self.pos
419
420 def seek(self, pos=0):
421 """Set the stream's file pointer to pos. Negative seeking
422 is forbidden.
423 """
424 if pos - self.pos >= 0:
425 blocks, remainder = divmod(pos - self.pos, self.bufsize)
426 for i in xrange(blocks):
427 self.read(self.bufsize)
428 self.read(remainder)
429 else:
430 raise StreamError, "seeking backwards is not allowed"
431 return self.pos
432
433 def read(self, size=None):
434 """Return the next size number of bytes from the stream.
435 If size is not defined, return all bytes of the stream
436 up to EOF.
437 """
438 if size is None:
439 t = []
440 while True:
441 buf = self._read(self.bufsize)
442 if not buf:
443 break
444 t.append(buf)
445 buf = "".join(t)
446 else:
447 buf = self._read(size)
448 self.pos += len(buf)
449 return buf
450
451 def _read(self, size):
452 """Return size bytes from the stream.
453 """
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000454 if self.comptype == "tar":
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000455 return self.__read(size)
456
457 c = len(self.dbuf)
458 t = [self.dbuf]
459 while c < size:
460 buf = self.__read(self.bufsize)
461 if not buf:
462 break
463 buf = self.cmp.decompress(buf)
464 t.append(buf)
465 c += len(buf)
466 t = "".join(t)
467 self.dbuf = t[size:]
468 return t[:size]
469
470 def __read(self, size):
471 """Return size bytes from stream. If internal buffer is empty,
472 read another block from the stream.
473 """
474 c = len(self.buf)
475 t = [self.buf]
476 while c < size:
477 buf = self.fileobj.read(self.bufsize)
478 if not buf:
479 break
480 t.append(buf)
481 c += len(buf)
482 t = "".join(t)
483 self.buf = t[size:]
484 return t[:size]
485# class _Stream
486
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000487class _StreamProxy(object):
488 """Small proxy class that enables transparent compression
489 detection for the Stream interface (mode 'r|*').
490 """
491
492 def __init__(self, fileobj):
493 self.fileobj = fileobj
494 self.buf = self.fileobj.read(BLOCKSIZE)
495
496 def read(self, size):
497 self.read = self.fileobj.read
498 return self.buf
499
500 def getcomptype(self):
501 if self.buf.startswith("\037\213\010"):
502 return "gz"
503 if self.buf.startswith("BZh91"):
504 return "bz2"
505 return "tar"
506
507 def close(self):
508 self.fileobj.close()
509# class StreamProxy
510
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000511#------------------------
512# Extraction file object
513#------------------------
514class ExFileObject(object):
515 """File-like object for reading an archive member.
516 Is returned by TarFile.extractfile(). Support for
517 sparse files included.
518 """
519
520 def __init__(self, tarfile, tarinfo):
521 self.fileobj = tarfile.fileobj
522 self.name = tarinfo.name
523 self.mode = "r"
524 self.closed = False
525 self.offset = tarinfo.offset_data
526 self.size = tarinfo.size
527 self.pos = 0L
528 self.linebuffer = ""
529 if tarinfo.issparse():
530 self.sparse = tarinfo.sparse
531 self.read = self._readsparse
532 else:
533 self.read = self._readnormal
534
535 def __read(self, size):
536 """Overloadable read method.
537 """
538 return self.fileobj.read(size)
539
540 def readline(self, size=-1):
541 """Read a line with approx. size. If size is negative,
542 read a whole line. readline() and read() must not
543 be mixed up (!).
544 """
545 if size < 0:
546 size = sys.maxint
547
548 nl = self.linebuffer.find("\n")
549 if nl >= 0:
550 nl = min(nl, size)
551 else:
552 size -= len(self.linebuffer)
Martin v. Löwisc11d6f12004-08-25 10:52:58 +0000553 while (nl < 0 and size > 0):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000554 buf = self.read(min(size, 100))
555 if not buf:
556 break
557 self.linebuffer += buf
558 size -= len(buf)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000559 nl = self.linebuffer.find("\n")
560 if nl == -1:
561 s = self.linebuffer
562 self.linebuffer = ""
563 return s
564 buf = self.linebuffer[:nl]
565 self.linebuffer = self.linebuffer[nl + 1:]
566 while buf[-1:] == "\r":
567 buf = buf[:-1]
568 return buf + "\n"
569
570 def readlines(self):
571 """Return a list with all (following) lines.
572 """
573 result = []
574 while True:
575 line = self.readline()
576 if not line: break
577 result.append(line)
578 return result
579
580 def _readnormal(self, size=None):
581 """Read operation for regular files.
582 """
583 if self.closed:
584 raise ValueError, "file is closed"
585 self.fileobj.seek(self.offset + self.pos)
586 bytesleft = self.size - self.pos
587 if size is None:
588 bytestoread = bytesleft
589 else:
590 bytestoread = min(size, bytesleft)
591 self.pos += bytestoread
592 return self.__read(bytestoread)
593
594 def _readsparse(self, size=None):
595 """Read operation for sparse files.
596 """
597 if self.closed:
598 raise ValueError, "file is closed"
599
600 if size is None:
601 size = self.size - self.pos
602
603 data = []
604 while size > 0:
605 buf = self._readsparsesection(size)
606 if not buf:
607 break
608 size -= len(buf)
609 data.append(buf)
610 return "".join(data)
611
612 def _readsparsesection(self, size):
613 """Read a single section of a sparse file.
614 """
615 section = self.sparse.find(self.pos)
616
617 if section is None:
618 return ""
619
620 toread = min(size, section.offset + section.size - self.pos)
621 if isinstance(section, _data):
622 realpos = section.realpos + self.pos - section.offset
623 self.pos += toread
624 self.fileobj.seek(self.offset + realpos)
625 return self.__read(toread)
626 else:
627 self.pos += toread
628 return NUL * toread
629
630 def tell(self):
631 """Return the current file position.
632 """
633 return self.pos
634
635 def seek(self, pos, whence=0):
636 """Seek to a position in the file.
637 """
638 self.linebuffer = ""
639 if whence == 0:
640 self.pos = min(max(pos, 0), self.size)
641 if whence == 1:
642 if pos < 0:
643 self.pos = max(self.pos + pos, 0)
644 else:
645 self.pos = min(self.pos + pos, self.size)
646 if whence == 2:
647 self.pos = max(min(self.size + pos, self.size), 0)
648
649 def close(self):
650 """Close the file object.
651 """
652 self.closed = True
Martin v. Löwisdf241532005-03-03 08:17:42 +0000653
654 def __iter__(self):
655 """Get an iterator over the file object.
656 """
657 if self.closed:
658 raise ValueError("I/O operation on closed file")
659 return self
660
661 def next(self):
662 """Get the next item from the file iterator.
663 """
664 result = self.readline()
665 if not result:
666 raise StopIteration
667 return result
Tim Peterseba28be2005-03-28 01:08:02 +0000668
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000669#class ExFileObject
670
671#------------------
672# Exported Classes
673#------------------
674class TarInfo(object):
675 """Informational class which holds the details about an
676 archive member given by a tar header block.
677 TarInfo objects are returned by TarFile.getmember(),
678 TarFile.getmembers() and TarFile.gettarinfo() and are
679 usually created internally.
680 """
681
682 def __init__(self, name=""):
683 """Construct a TarInfo object. name is the optional name
684 of the member.
685 """
686
687 self.name = name # member name (dirnames must end with '/')
688 self.mode = 0666 # file permissions
689 self.uid = 0 # user id
690 self.gid = 0 # group id
691 self.size = 0 # file size
692 self.mtime = 0 # modification time
693 self.chksum = 0 # header checksum
694 self.type = REGTYPE # member type
695 self.linkname = "" # link name
696 self.uname = "user" # user name
697 self.gname = "group" # group name
698 self.devmajor = 0 #-
699 self.devminor = 0 #-for use with CHRTYPE and BLKTYPE
700 self.prefix = "" # prefix to filename or holding information
701 # about sparse files
702
703 self.offset = 0 # the tar header starts here
704 self.offset_data = 0 # the file's data starts here
705
706 def __repr__(self):
707 return "<%s %r at %#x>" % (self.__class__.__name__,self.name,id(self))
708
Guido van Rossum75b64e62005-01-16 00:16:11 +0000709 @classmethod
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000710 def frombuf(cls, buf):
711 """Construct a TarInfo object from a 512 byte string buffer.
712 """
713 tarinfo = cls()
Neal Norwitzd96d1012004-07-20 22:23:02 +0000714 tarinfo.name = nts(buf[0:100])
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000715 tarinfo.mode = int(buf[100:108], 8)
716 tarinfo.uid = int(buf[108:116],8)
717 tarinfo.gid = int(buf[116:124],8)
Neal Norwitzd96d1012004-07-20 22:23:02 +0000718
719 # There are two possible codings for the size field we
720 # have to discriminate, see comment in tobuf() below.
721 if buf[124] != chr(0200):
722 tarinfo.size = long(buf[124:136], 8)
723 else:
724 tarinfo.size = 0L
725 for i in range(11):
726 tarinfo.size <<= 8
727 tarinfo.size += ord(buf[125 + i])
728
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000729 tarinfo.mtime = long(buf[136:148], 8)
730 tarinfo.chksum = int(buf[148:156], 8)
731 tarinfo.type = buf[156:157]
732 tarinfo.linkname = nts(buf[157:257])
733 tarinfo.uname = nts(buf[265:297])
734 tarinfo.gname = nts(buf[297:329])
735 try:
736 tarinfo.devmajor = int(buf[329:337], 8)
737 tarinfo.devminor = int(buf[337:345], 8)
738 except ValueError:
739 tarinfo.devmajor = tarinfo.devmajor = 0
Jack Jansen2b4b5a52003-04-22 22:03:11 +0000740 tarinfo.prefix = buf[345:500]
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000741
742 # The prefix field is used for filenames > 100 in
743 # the POSIX standard.
Jack Jansen2b4b5a52003-04-22 22:03:11 +0000744 # name = prefix + '/' + name
745 if tarinfo.type != GNUTYPE_SPARSE:
746 tarinfo.name = normpath(os.path.join(nts(tarinfo.prefix), tarinfo.name))
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000747
748 # Directory names should have a '/' at the end.
749 if tarinfo.isdir() and tarinfo.name[-1:] != "/":
750 tarinfo.name += "/"
751 return tarinfo
752
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000753 def tobuf(self):
754 """Return a tar header block as a 512 byte string.
755 """
Neal Norwitzd96d1012004-07-20 22:23:02 +0000756 # Prefer the size to be encoded as 11 octal ascii digits
757 # which is the most portable. If the size exceeds this
758 # limit (>= 8 GB), encode it as an 88-bit value which is
759 # a GNU tar feature.
760 if self.size <= MAXSIZE_MEMBER:
761 size = "%011o" % self.size
762 else:
763 s = self.size
764 size = ""
765 for i in range(11):
766 size = chr(s & 0377) + size
767 s >>= 8
768 size = chr(0200) + size
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000769
770 # The following code was contributed by Detlef Lannert.
771 parts = []
772 for value, fieldsize in (
Neal Norwitzd96d1012004-07-20 22:23:02 +0000773 (self.name, 100),
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000774 ("%07o" % (self.mode & 07777), 8),
775 ("%07o" % self.uid, 8),
776 ("%07o" % self.gid, 8),
Neal Norwitzd96d1012004-07-20 22:23:02 +0000777 (size, 12),
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000778 ("%011o" % self.mtime, 12),
779 (" ", 8),
780 (self.type, 1),
781 (self.linkname, 100),
782 (MAGIC, 6),
783 (VERSION, 2),
784 (self.uname, 32),
785 (self.gname, 32),
786 ("%07o" % self.devmajor, 8),
787 ("%07o" % self.devminor, 8),
788 (self.prefix, 155)
789 ):
790 l = len(value)
Andrew M. Kuchling864bba12004-07-10 22:02:11 +0000791 parts.append(value[:fieldsize] + (fieldsize - l) * NUL)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000792
793 buf = "".join(parts)
794 chksum = calc_chksum(buf)
795 buf = buf[:148] + "%06o\0" % chksum + buf[155:]
796 buf += (BLOCKSIZE - len(buf)) * NUL
797 self.buf = buf
798 return buf
799
800 def isreg(self):
801 return self.type in REGULAR_TYPES
802 def isfile(self):
803 return self.isreg()
804 def isdir(self):
805 return self.type == DIRTYPE
806 def issym(self):
807 return self.type == SYMTYPE
808 def islnk(self):
809 return self.type == LNKTYPE
810 def ischr(self):
811 return self.type == CHRTYPE
812 def isblk(self):
813 return self.type == BLKTYPE
814 def isfifo(self):
815 return self.type == FIFOTYPE
816 def issparse(self):
817 return self.type == GNUTYPE_SPARSE
818 def isdev(self):
819 return self.type in (CHRTYPE, BLKTYPE, FIFOTYPE)
820# class TarInfo
821
822class TarFile(object):
823 """The TarFile Class provides an interface to tar archives.
824 """
825
826 debug = 0 # May be set from 0 (no msgs) to 3 (all msgs)
827
828 dereference = False # If true, add content of linked file to the
829 # tar file, else the link.
830
831 ignore_zeros = False # If true, skips empty or invalid blocks and
832 # continues processing.
833
834 errorlevel = 0 # If 0, fatal errors only appear in debug
835 # messages (if debug >= 0). If > 0, errors
836 # are passed to the caller as exceptions.
837
Martin v. Löwis75b9da42004-08-18 13:57:44 +0000838 posix = False # If True, generates POSIX.1-1990-compliant
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000839 # archives (no GNU extensions!)
840
841 fileobject = ExFileObject
842
843 def __init__(self, name=None, mode="r", fileobj=None):
844 """Open an (uncompressed) tar archive `name'. `mode' is either 'r' to
845 read from an existing archive, 'a' to append data to an existing
846 file or 'w' to create a new file overwriting an existing one. `mode'
847 defaults to 'r'.
848 If `fileobj' is given, it is used for reading or writing data. If it
849 can be determined, `mode' is overridden by `fileobj's mode.
850 `fileobj' is not closed, when TarFile is closed.
851 """
Martin v. Löwisfaffa152005-08-24 06:43:09 +0000852 self.name = name
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000853
854 if len(mode) > 1 or mode not in "raw":
855 raise ValueError, "mode must be 'r', 'a' or 'w'"
856 self._mode = mode
857 self.mode = {"r": "rb", "a": "r+b", "w": "wb"}[mode]
858
859 if not fileobj:
860 fileobj = file(self.name, self.mode)
861 self._extfileobj = False
862 else:
863 if self.name is None and hasattr(fileobj, "name"):
Martin v. Löwisfaffa152005-08-24 06:43:09 +0000864 self.name = fileobj.name
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000865 if hasattr(fileobj, "mode"):
866 self.mode = fileobj.mode
867 self._extfileobj = True
868 self.fileobj = fileobj
869
870 # Init datastructures
871 self.closed = False
872 self.members = [] # list of members as TarInfo objects
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000873 self._loaded = False # flag if all members have been read
874 self.offset = 0L # current position in the archive file
875 self.inodes = {} # dictionary caching the inodes of
876 # archive members already added
877
878 if self._mode == "r":
879 self.firstmember = None
880 self.firstmember = self.next()
881
882 if self._mode == "a":
883 # Move to the end of the archive,
884 # before the first empty block.
885 self.firstmember = None
886 while True:
887 try:
888 tarinfo = self.next()
889 except ReadError:
890 self.fileobj.seek(0)
891 break
892 if tarinfo is None:
893 self.fileobj.seek(- BLOCKSIZE, 1)
894 break
895
896 if self._mode in "aw":
897 self._loaded = True
898
899 #--------------------------------------------------------------------------
900 # Below are the classmethods which act as alternate constructors to the
901 # TarFile class. The open() method is the only one that is needed for
902 # public use; it is the "super"-constructor and is able to select an
903 # adequate "sub"-constructor for a particular compression using the mapping
904 # from OPEN_METH.
905 #
906 # This concept allows one to subclass TarFile without losing the comfort of
907 # the super-constructor. A sub-constructor is registered and made available
908 # by adding it to the mapping in OPEN_METH.
909
Guido van Rossum75b64e62005-01-16 00:16:11 +0000910 @classmethod
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000911 def open(cls, name=None, mode="r", fileobj=None, bufsize=20*512):
912 """Open a tar archive for reading, writing or appending. Return
913 an appropriate TarFile class.
914
915 mode:
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000916 'r' or 'r:*' open for reading with transparent compression
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000917 'r:' open for reading exclusively uncompressed
918 'r:gz' open for reading with gzip compression
919 'r:bz2' open for reading with bzip2 compression
920 'a' or 'a:' open for appending
921 'w' or 'w:' open for writing without compression
922 'w:gz' open for writing with gzip compression
923 'w:bz2' open for writing with bzip2 compression
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000924
925 'r|*' open a stream of tar blocks with transparent compression
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000926 'r|' open an uncompressed stream of tar blocks for reading
927 'r|gz' open a gzip compressed stream of tar blocks
928 'r|bz2' open a bzip2 compressed stream of tar blocks
929 'w|' open an uncompressed stream for writing
930 'w|gz' open a gzip compressed stream for writing
931 'w|bz2' open a bzip2 compressed stream for writing
932 """
933
934 if not name and not fileobj:
935 raise ValueError, "nothing to open"
936
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000937 if mode in ("r", "r:*"):
938 # Find out which *open() is appropriate for opening the file.
939 for comptype in cls.OPEN_METH:
940 func = getattr(cls, cls.OPEN_METH[comptype])
941 try:
942 return func(name, "r", fileobj)
943 except (ReadError, CompressionError):
944 continue
945 raise ReadError, "file could not be opened successfully"
946
947 elif ":" in mode:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000948 filemode, comptype = mode.split(":", 1)
949 filemode = filemode or "r"
950 comptype = comptype or "tar"
951
952 # Select the *open() function according to
953 # given compression.
954 if comptype in cls.OPEN_METH:
955 func = getattr(cls, cls.OPEN_METH[comptype])
956 else:
957 raise CompressionError, "unknown compression type %r" % comptype
958 return func(name, filemode, fileobj)
959
960 elif "|" in mode:
961 filemode, comptype = mode.split("|", 1)
962 filemode = filemode or "r"
963 comptype = comptype or "tar"
964
965 if filemode not in "rw":
966 raise ValueError, "mode must be 'r' or 'w'"
967
968 t = cls(name, filemode,
969 _Stream(name, filemode, comptype, fileobj, bufsize))
970 t._extfileobj = False
971 return t
972
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000973 elif mode in "aw":
974 return cls.taropen(name, mode, fileobj)
975
976 raise ValueError, "undiscernible mode"
977
Guido van Rossum75b64e62005-01-16 00:16:11 +0000978 @classmethod
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000979 def taropen(cls, name, mode="r", fileobj=None):
980 """Open uncompressed tar archive name for reading or writing.
981 """
982 if len(mode) > 1 or mode not in "raw":
983 raise ValueError, "mode must be 'r', 'a' or 'w'"
984 return cls(name, mode, fileobj)
985
Guido van Rossum75b64e62005-01-16 00:16:11 +0000986 @classmethod
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000987 def gzopen(cls, name, mode="r", fileobj=None, compresslevel=9):
988 """Open gzip compressed tar archive name for reading or writing.
989 Appending is not allowed.
990 """
991 if len(mode) > 1 or mode not in "rw":
992 raise ValueError, "mode must be 'r' or 'w'"
993
994 try:
995 import gzip
Neal Norwitz4ec68242003-04-11 03:05:56 +0000996 gzip.GzipFile
997 except (ImportError, AttributeError):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000998 raise CompressionError, "gzip module is not available"
999
1000 pre, ext = os.path.splitext(name)
Martin v. Löwisfaffa152005-08-24 06:43:09 +00001001 pre = os.path.basename(pre)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001002 if ext == ".tgz":
1003 ext = ".tar"
1004 if ext == ".gz":
1005 ext = ""
Martin v. Löwisfaffa152005-08-24 06:43:09 +00001006 tarname = pre + ext
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001007
1008 if fileobj is None:
1009 fileobj = file(name, mode + "b")
1010
Martin v. Löwisfaffa152005-08-24 06:43:09 +00001011 if mode != "r":
1012 name = tarname
1013
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001014 try:
Martin v. Löwisfaffa152005-08-24 06:43:09 +00001015 t = cls.taropen(tarname, mode,
1016 gzip.GzipFile(name, mode, compresslevel, fileobj)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001017 )
1018 except IOError:
1019 raise ReadError, "not a gzip file"
1020 t._extfileobj = False
1021 return t
1022
Guido van Rossum75b64e62005-01-16 00:16:11 +00001023 @classmethod
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001024 def bz2open(cls, name, mode="r", fileobj=None, compresslevel=9):
1025 """Open bzip2 compressed tar archive name for reading or writing.
1026 Appending is not allowed.
1027 """
1028 if len(mode) > 1 or mode not in "rw":
1029 raise ValueError, "mode must be 'r' or 'w'."
1030
1031 try:
1032 import bz2
1033 except ImportError:
1034 raise CompressionError, "bz2 module is not available"
1035
Martin v. Löwisfaffa152005-08-24 06:43:09 +00001036 pre, ext = os.path.splitext(name)
1037 pre = os.path.basename(pre)
1038 if ext == ".tbz2":
1039 ext = ".tar"
1040 if ext == ".bz2":
1041 ext = ""
1042 tarname = pre + ext
1043
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001044 if fileobj is not None:
1045 raise ValueError, "no support for external file objects"
1046
1047 try:
Martin v. Löwisfaffa152005-08-24 06:43:09 +00001048 t = cls.taropen(tarname, mode, bz2.BZ2File(name, mode, compresslevel=compresslevel))
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001049 except IOError:
1050 raise ReadError, "not a bzip2 file"
1051 t._extfileobj = False
1052 return t
1053
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001054 # All *open() methods are registered here.
1055 OPEN_METH = {
1056 "tar": "taropen", # uncompressed tar
1057 "gz": "gzopen", # gzip compressed tar
1058 "bz2": "bz2open" # bzip2 compressed tar
1059 }
1060
1061 #--------------------------------------------------------------------------
1062 # The public methods which TarFile provides:
1063
1064 def close(self):
1065 """Close the TarFile. In write-mode, two finishing zero blocks are
1066 appended to the archive.
1067 """
1068 if self.closed:
1069 return
1070
1071 if self._mode in "aw":
1072 self.fileobj.write(NUL * (BLOCKSIZE * 2))
1073 self.offset += (BLOCKSIZE * 2)
1074 # fill up the end with zero-blocks
1075 # (like option -b20 for tar does)
1076 blocks, remainder = divmod(self.offset, RECORDSIZE)
1077 if remainder > 0:
1078 self.fileobj.write(NUL * (RECORDSIZE - remainder))
1079
1080 if not self._extfileobj:
1081 self.fileobj.close()
1082 self.closed = True
1083
1084 def getmember(self, name):
1085 """Return a TarInfo object for member `name'. If `name' can not be
1086 found in the archive, KeyError is raised. If a member occurs more
1087 than once in the archive, its last occurence is assumed to be the
1088 most up-to-date version.
1089 """
Martin v. Löwisf3c56112004-09-18 09:08:52 +00001090 tarinfo = self._getmember(name)
1091 if tarinfo is None:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001092 raise KeyError, "filename %r not found" % name
Martin v. Löwisf3c56112004-09-18 09:08:52 +00001093 return tarinfo
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001094
1095 def getmembers(self):
1096 """Return the members of the archive as a list of TarInfo objects. The
1097 list has the same order as the members in the archive.
1098 """
1099 self._check()
1100 if not self._loaded: # if we want to obtain a list of
1101 self._load() # all members, we first have to
1102 # scan the whole archive.
1103 return self.members
1104
1105 def getnames(self):
1106 """Return the members of the archive as a list of their names. It has
1107 the same order as the list returned by getmembers().
1108 """
Martin v. Löwisf3c56112004-09-18 09:08:52 +00001109 return [tarinfo.name for tarinfo in self.getmembers()]
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001110
1111 def gettarinfo(self, name=None, arcname=None, fileobj=None):
1112 """Create a TarInfo object for either the file `name' or the file
1113 object `fileobj' (using os.fstat on its file descriptor). You can
1114 modify some of the TarInfo's attributes before you add it using
1115 addfile(). If given, `arcname' specifies an alternative name for the
1116 file in the archive.
1117 """
1118 self._check("aw")
1119
1120 # When fileobj is given, replace name by
1121 # fileobj's real name.
1122 if fileobj is not None:
1123 name = fileobj.name
1124
1125 # Building the name of the member in the archive.
1126 # Backward slashes are converted to forward slashes,
1127 # Absolute paths are turned to relative paths.
1128 if arcname is None:
1129 arcname = name
1130 arcname = normpath(arcname)
1131 drv, arcname = os.path.splitdrive(arcname)
1132 while arcname[0:1] == "/":
1133 arcname = arcname[1:]
1134
1135 # Now, fill the TarInfo object with
1136 # information specific for the file.
1137 tarinfo = TarInfo()
1138
1139 # Use os.stat or os.lstat, depending on platform
1140 # and if symlinks shall be resolved.
1141 if fileobj is None:
1142 if hasattr(os, "lstat") and not self.dereference:
1143 statres = os.lstat(name)
1144 else:
1145 statres = os.stat(name)
1146 else:
1147 statres = os.fstat(fileobj.fileno())
1148 linkname = ""
1149
1150 stmd = statres.st_mode
1151 if stat.S_ISREG(stmd):
1152 inode = (statres.st_ino, statres.st_dev)
Neal Norwitzb0e32e22005-10-20 04:50:13 +00001153 if not self.dereference and \
1154 statres.st_nlink > 1 and inode in self.inodes:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001155 # Is it a hardlink to an already
1156 # archived file?
1157 type = LNKTYPE
1158 linkname = self.inodes[inode]
1159 else:
1160 # The inode is added only if its valid.
1161 # For win32 it is always 0.
1162 type = REGTYPE
1163 if inode[0]:
1164 self.inodes[inode] = arcname
1165 elif stat.S_ISDIR(stmd):
1166 type = DIRTYPE
1167 if arcname[-1:] != "/":
1168 arcname += "/"
1169 elif stat.S_ISFIFO(stmd):
1170 type = FIFOTYPE
1171 elif stat.S_ISLNK(stmd):
1172 type = SYMTYPE
1173 linkname = os.readlink(name)
1174 elif stat.S_ISCHR(stmd):
1175 type = CHRTYPE
1176 elif stat.S_ISBLK(stmd):
1177 type = BLKTYPE
1178 else:
1179 return None
1180
1181 # Fill the TarInfo object with all
1182 # information we can get.
Martin v. Löwis5dbdc592005-08-27 10:07:56 +00001183 tarinfo.name = arcname
1184 tarinfo.mode = stmd
1185 tarinfo.uid = statres.st_uid
1186 tarinfo.gid = statres.st_gid
1187 if stat.S_ISREG(stmd):
Martin v. Löwis61d77e02004-08-20 06:35:46 +00001188 tarinfo.size = statres.st_size
Martin v. Löwis5dbdc592005-08-27 10:07:56 +00001189 else:
1190 tarinfo.size = 0L
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001191 tarinfo.mtime = statres.st_mtime
Martin v. Löwis5dbdc592005-08-27 10:07:56 +00001192 tarinfo.type = type
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001193 tarinfo.linkname = linkname
1194 if pwd:
1195 try:
1196 tarinfo.uname = pwd.getpwuid(tarinfo.uid)[0]
1197 except KeyError:
1198 pass
1199 if grp:
1200 try:
1201 tarinfo.gname = grp.getgrgid(tarinfo.gid)[0]
1202 except KeyError:
1203 pass
1204
1205 if type in (CHRTYPE, BLKTYPE):
1206 if hasattr(os, "major") and hasattr(os, "minor"):
1207 tarinfo.devmajor = os.major(statres.st_rdev)
1208 tarinfo.devminor = os.minor(statres.st_rdev)
1209 return tarinfo
1210
1211 def list(self, verbose=True):
1212 """Print a table of contents to sys.stdout. If `verbose' is False, only
1213 the names of the members are printed. If it is True, an `ls -l'-like
1214 output is produced.
1215 """
1216 self._check()
1217
1218 for tarinfo in self:
1219 if verbose:
1220 print filemode(tarinfo.mode),
1221 print "%s/%s" % (tarinfo.uname or tarinfo.uid,
1222 tarinfo.gname or tarinfo.gid),
1223 if tarinfo.ischr() or tarinfo.isblk():
1224 print "%10s" % ("%d,%d" \
1225 % (tarinfo.devmajor, tarinfo.devminor)),
1226 else:
1227 print "%10d" % tarinfo.size,
1228 print "%d-%02d-%02d %02d:%02d:%02d" \
1229 % time.localtime(tarinfo.mtime)[:6],
1230
1231 print tarinfo.name,
1232
1233 if verbose:
1234 if tarinfo.issym():
1235 print "->", tarinfo.linkname,
1236 if tarinfo.islnk():
1237 print "link to", tarinfo.linkname,
1238 print
1239
1240 def add(self, name, arcname=None, recursive=True):
1241 """Add the file `name' to the archive. `name' may be any type of file
1242 (directory, fifo, symbolic link, etc.). If given, `arcname'
1243 specifies an alternative name for the file in the archive.
1244 Directories are added recursively by default. This can be avoided by
1245 setting `recursive' to False.
1246 """
1247 self._check("aw")
1248
1249 if arcname is None:
1250 arcname = name
1251
1252 # Skip if somebody tries to archive the archive...
Martin v. Löwisfaffa152005-08-24 06:43:09 +00001253 if self.name is not None \
1254 and os.path.abspath(name) == os.path.abspath(self.name):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001255 self._dbg(2, "tarfile: Skipped %r" % name)
1256 return
1257
1258 # Special case: The user wants to add the current
1259 # working directory.
1260 if name == ".":
1261 if recursive:
1262 if arcname == ".":
1263 arcname = ""
1264 for f in os.listdir("."):
1265 self.add(f, os.path.join(arcname, f))
1266 return
1267
1268 self._dbg(1, name)
1269
1270 # Create a TarInfo object from the file.
1271 tarinfo = self.gettarinfo(name, arcname)
1272
1273 if tarinfo is None:
1274 self._dbg(1, "tarfile: Unsupported type %r" % name)
1275 return
1276
1277 # Append the tar header and data to the archive.
1278 if tarinfo.isreg():
1279 f = file(name, "rb")
1280 self.addfile(tarinfo, f)
1281 f.close()
1282
Martin v. Löwis5dbdc592005-08-27 10:07:56 +00001283 elif tarinfo.isdir():
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001284 self.addfile(tarinfo)
1285 if recursive:
1286 for f in os.listdir(name):
1287 self.add(os.path.join(name, f), os.path.join(arcname, f))
1288
Martin v. Löwis5dbdc592005-08-27 10:07:56 +00001289 else:
1290 self.addfile(tarinfo)
1291
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001292 def addfile(self, tarinfo, fileobj=None):
1293 """Add the TarInfo object `tarinfo' to the archive. If `fileobj' is
1294 given, tarinfo.size bytes are read from it and added to the archive.
1295 You can create TarInfo objects using gettarinfo().
1296 On Windows platforms, `fileobj' should always be opened with mode
1297 'rb' to avoid irritation about the file size.
1298 """
1299 self._check("aw")
1300
1301 tarinfo.name = normpath(tarinfo.name)
1302 if tarinfo.isdir():
1303 # directories should end with '/'
1304 tarinfo.name += "/"
1305
1306 if tarinfo.linkname:
1307 tarinfo.linkname = normpath(tarinfo.linkname)
1308
1309 if tarinfo.size > MAXSIZE_MEMBER:
Neal Norwitzd96d1012004-07-20 22:23:02 +00001310 if self.posix:
1311 raise ValueError, "file is too large (>= 8 GB)"
1312 else:
1313 self._dbg(2, "tarfile: Created GNU tar largefile header")
1314
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001315
1316 if len(tarinfo.linkname) > LENGTH_LINK:
1317 if self.posix:
1318 raise ValueError, "linkname is too long (>%d)" \
1319 % (LENGTH_LINK)
1320 else:
1321 self._create_gnulong(tarinfo.linkname, GNUTYPE_LONGLINK)
1322 tarinfo.linkname = tarinfo.linkname[:LENGTH_LINK -1]
1323 self._dbg(2, "tarfile: Created GNU tar extension LONGLINK")
1324
1325 if len(tarinfo.name) > LENGTH_NAME:
1326 if self.posix:
1327 prefix = tarinfo.name[:LENGTH_PREFIX + 1]
1328 while prefix and prefix[-1] != "/":
Tim Peters2c60f7a2003-01-29 03:49:43 +00001329 prefix = prefix[:-1]
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001330
1331 name = tarinfo.name[len(prefix):]
1332 prefix = prefix[:-1]
1333
1334 if not prefix or len(name) > LENGTH_NAME:
1335 raise ValueError, "name is too long (>%d)" \
1336 % (LENGTH_NAME)
1337
1338 tarinfo.name = name
1339 tarinfo.prefix = prefix
1340 else:
1341 self._create_gnulong(tarinfo.name, GNUTYPE_LONGNAME)
1342 tarinfo.name = tarinfo.name[:LENGTH_NAME - 1]
1343 self._dbg(2, "tarfile: Created GNU tar extension LONGNAME")
1344
1345 self.fileobj.write(tarinfo.tobuf())
1346 self.offset += BLOCKSIZE
1347
1348 # If there's data to follow, append it.
1349 if fileobj is not None:
1350 copyfileobj(fileobj, self.fileobj, tarinfo.size)
1351 blocks, remainder = divmod(tarinfo.size, BLOCKSIZE)
1352 if remainder > 0:
1353 self.fileobj.write(NUL * (BLOCKSIZE - remainder))
1354 blocks += 1
1355 self.offset += blocks * BLOCKSIZE
1356
Martin v. Löwisf3c56112004-09-18 09:08:52 +00001357 self.members.append(tarinfo)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001358
Martin v. Löwis00a73e72005-03-04 19:40:34 +00001359 def extractall(self, path=".", members=None):
1360 """Extract all members from the archive to the current working
1361 directory and set owner, modification time and permissions on
1362 directories afterwards. `path' specifies a different directory
1363 to extract to. `members' is optional and must be a subset of the
1364 list returned by getmembers().
1365 """
1366 directories = []
1367
1368 if members is None:
1369 members = self
1370
1371 for tarinfo in members:
1372 if tarinfo.isdir():
1373 # Extract directory with a safe mode, so that
1374 # all files below can be extracted as well.
1375 try:
1376 os.makedirs(os.path.join(path, tarinfo.name), 0777)
1377 except EnvironmentError:
1378 pass
1379 directories.append(tarinfo)
1380 else:
1381 self.extract(tarinfo, path)
1382
1383 # Reverse sort directories.
1384 directories.sort(lambda a, b: cmp(a.name, b.name))
1385 directories.reverse()
1386
1387 # Set correct owner, mtime and filemode on directories.
1388 for tarinfo in directories:
1389 path = os.path.join(path, tarinfo.name)
1390 try:
1391 self.chown(tarinfo, path)
1392 self.utime(tarinfo, path)
1393 self.chmod(tarinfo, path)
1394 except ExtractError, e:
1395 if self.errorlevel > 1:
1396 raise
1397 else:
1398 self._dbg(1, "tarfile: %s" % e)
1399
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001400 def extract(self, member, path=""):
1401 """Extract a member from the archive to the current working directory,
1402 using its full name. Its file information is extracted as accurately
1403 as possible. `member' may be a filename or a TarInfo object. You can
1404 specify a different directory using `path'.
1405 """
1406 self._check("r")
1407
1408 if isinstance(member, TarInfo):
1409 tarinfo = member
1410 else:
1411 tarinfo = self.getmember(member)
1412
Neal Norwitza4f651a2004-07-20 22:07:44 +00001413 # Prepare the link target for makelink().
1414 if tarinfo.islnk():
1415 tarinfo._link_target = os.path.join(path, tarinfo.linkname)
1416
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001417 try:
1418 self._extract_member(tarinfo, os.path.join(path, tarinfo.name))
1419 except EnvironmentError, e:
1420 if self.errorlevel > 0:
1421 raise
1422 else:
1423 if e.filename is None:
1424 self._dbg(1, "tarfile: %s" % e.strerror)
1425 else:
1426 self._dbg(1, "tarfile: %s %r" % (e.strerror, e.filename))
1427 except ExtractError, e:
1428 if self.errorlevel > 1:
1429 raise
1430 else:
1431 self._dbg(1, "tarfile: %s" % e)
1432
1433 def extractfile(self, member):
1434 """Extract a member from the archive as a file object. `member' may be
1435 a filename or a TarInfo object. If `member' is a regular file, a
1436 file-like object is returned. If `member' is a link, a file-like
1437 object is constructed from the link's target. If `member' is none of
1438 the above, None is returned.
1439 The file-like object is read-only and provides the following
1440 methods: read(), readline(), readlines(), seek() and tell()
1441 """
1442 self._check("r")
1443
1444 if isinstance(member, TarInfo):
1445 tarinfo = member
1446 else:
1447 tarinfo = self.getmember(member)
1448
1449 if tarinfo.isreg():
1450 return self.fileobject(self, tarinfo)
1451
1452 elif tarinfo.type not in SUPPORTED_TYPES:
1453 # If a member's type is unknown, it is treated as a
1454 # regular file.
1455 return self.fileobject(self, tarinfo)
1456
1457 elif tarinfo.islnk() or tarinfo.issym():
1458 if isinstance(self.fileobj, _Stream):
1459 # A small but ugly workaround for the case that someone tries
1460 # to extract a (sym)link as a file-object from a non-seekable
1461 # stream of tar blocks.
1462 raise StreamError, "cannot extract (sym)link as file object"
1463 else:
Georg Brandl7eb4b7d2005-07-22 21:49:32 +00001464 # A (sym)link's file object is its target's file object.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001465 return self.extractfile(self._getmember(tarinfo.linkname,
1466 tarinfo))
1467 else:
1468 # If there's no data associated with the member (directory, chrdev,
1469 # blkdev, etc.), return None instead of a file object.
1470 return None
1471
1472 def _extract_member(self, tarinfo, targetpath):
1473 """Extract the TarInfo object tarinfo to a physical
1474 file called targetpath.
1475 """
1476 # Fetch the TarInfo object for the given name
1477 # and build the destination pathname, replacing
1478 # forward slashes to platform specific separators.
1479 if targetpath[-1:] == "/":
1480 targetpath = targetpath[:-1]
1481 targetpath = os.path.normpath(targetpath)
1482
1483 # Create all upper directories.
1484 upperdirs = os.path.dirname(targetpath)
1485 if upperdirs and not os.path.exists(upperdirs):
1486 ti = TarInfo()
1487 ti.name = upperdirs
1488 ti.type = DIRTYPE
1489 ti.mode = 0777
1490 ti.mtime = tarinfo.mtime
1491 ti.uid = tarinfo.uid
1492 ti.gid = tarinfo.gid
1493 ti.uname = tarinfo.uname
1494 ti.gname = tarinfo.gname
1495 try:
1496 self._extract_member(ti, ti.name)
1497 except:
1498 pass
1499
1500 if tarinfo.islnk() or tarinfo.issym():
1501 self._dbg(1, "%s -> %s" % (tarinfo.name, tarinfo.linkname))
1502 else:
1503 self._dbg(1, tarinfo.name)
1504
1505 if tarinfo.isreg():
1506 self.makefile(tarinfo, targetpath)
1507 elif tarinfo.isdir():
1508 self.makedir(tarinfo, targetpath)
1509 elif tarinfo.isfifo():
1510 self.makefifo(tarinfo, targetpath)
1511 elif tarinfo.ischr() or tarinfo.isblk():
1512 self.makedev(tarinfo, targetpath)
1513 elif tarinfo.islnk() or tarinfo.issym():
1514 self.makelink(tarinfo, targetpath)
1515 elif tarinfo.type not in SUPPORTED_TYPES:
1516 self.makeunknown(tarinfo, targetpath)
1517 else:
1518 self.makefile(tarinfo, targetpath)
1519
1520 self.chown(tarinfo, targetpath)
1521 if not tarinfo.issym():
1522 self.chmod(tarinfo, targetpath)
1523 self.utime(tarinfo, targetpath)
1524
1525 #--------------------------------------------------------------------------
1526 # Below are the different file methods. They are called via
1527 # _extract_member() when extract() is called. They can be replaced in a
1528 # subclass to implement other functionality.
1529
1530 def makedir(self, tarinfo, targetpath):
1531 """Make a directory called targetpath.
1532 """
1533 try:
1534 os.mkdir(targetpath)
1535 except EnvironmentError, e:
1536 if e.errno != errno.EEXIST:
1537 raise
1538
1539 def makefile(self, tarinfo, targetpath):
1540 """Make a file called targetpath.
1541 """
1542 source = self.extractfile(tarinfo)
1543 target = file(targetpath, "wb")
1544 copyfileobj(source, target)
1545 source.close()
1546 target.close()
1547
1548 def makeunknown(self, tarinfo, targetpath):
1549 """Make a file from a TarInfo object with an unknown type
1550 at targetpath.
1551 """
1552 self.makefile(tarinfo, targetpath)
1553 self._dbg(1, "tarfile: Unknown file type %r, " \
1554 "extracted as regular file." % tarinfo.type)
1555
1556 def makefifo(self, tarinfo, targetpath):
1557 """Make a fifo called targetpath.
1558 """
1559 if hasattr(os, "mkfifo"):
1560 os.mkfifo(targetpath)
1561 else:
1562 raise ExtractError, "fifo not supported by system"
1563
1564 def makedev(self, tarinfo, targetpath):
1565 """Make a character or block device called targetpath.
1566 """
1567 if not hasattr(os, "mknod") or not hasattr(os, "makedev"):
1568 raise ExtractError, "special devices not supported by system"
1569
1570 mode = tarinfo.mode
1571 if tarinfo.isblk():
1572 mode |= stat.S_IFBLK
1573 else:
1574 mode |= stat.S_IFCHR
1575
1576 os.mknod(targetpath, mode,
1577 os.makedev(tarinfo.devmajor, tarinfo.devminor))
1578
1579 def makelink(self, tarinfo, targetpath):
1580 """Make a (symbolic) link called targetpath. If it cannot be created
1581 (platform limitation), we try to make a copy of the referenced file
1582 instead of a link.
1583 """
1584 linkpath = tarinfo.linkname
1585 try:
1586 if tarinfo.issym():
1587 os.symlink(linkpath, targetpath)
1588 else:
Neal Norwitza4f651a2004-07-20 22:07:44 +00001589 # See extract().
1590 os.link(tarinfo._link_target, targetpath)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001591 except AttributeError:
1592 if tarinfo.issym():
1593 linkpath = os.path.join(os.path.dirname(tarinfo.name),
1594 linkpath)
1595 linkpath = normpath(linkpath)
1596
1597 try:
1598 self._extract_member(self.getmember(linkpath), targetpath)
1599 except (EnvironmentError, KeyError), e:
1600 linkpath = os.path.normpath(linkpath)
1601 try:
1602 shutil.copy2(linkpath, targetpath)
1603 except EnvironmentError, e:
1604 raise IOError, "link could not be created"
1605
1606 def chown(self, tarinfo, targetpath):
1607 """Set owner of targetpath according to tarinfo.
1608 """
1609 if pwd and hasattr(os, "geteuid") and os.geteuid() == 0:
1610 # We have to be root to do so.
1611 try:
1612 g = grp.getgrnam(tarinfo.gname)[2]
1613 except KeyError:
1614 try:
1615 g = grp.getgrgid(tarinfo.gid)[2]
1616 except KeyError:
1617 g = os.getgid()
1618 try:
1619 u = pwd.getpwnam(tarinfo.uname)[2]
1620 except KeyError:
1621 try:
1622 u = pwd.getpwuid(tarinfo.uid)[2]
1623 except KeyError:
1624 u = os.getuid()
1625 try:
1626 if tarinfo.issym() and hasattr(os, "lchown"):
1627 os.lchown(targetpath, u, g)
1628 else:
Andrew MacIntyre7970d202003-02-19 12:51:34 +00001629 if sys.platform != "os2emx":
1630 os.chown(targetpath, u, g)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001631 except EnvironmentError, e:
1632 raise ExtractError, "could not change owner"
1633
1634 def chmod(self, tarinfo, targetpath):
1635 """Set file permissions of targetpath according to tarinfo.
1636 """
Jack Jansen834eff62003-03-07 12:47:06 +00001637 if hasattr(os, 'chmod'):
1638 try:
1639 os.chmod(targetpath, tarinfo.mode)
1640 except EnvironmentError, e:
1641 raise ExtractError, "could not change mode"
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001642
1643 def utime(self, tarinfo, targetpath):
1644 """Set modification time of targetpath according to tarinfo.
1645 """
Jack Jansen834eff62003-03-07 12:47:06 +00001646 if not hasattr(os, 'utime'):
Tim Petersf9347782003-03-07 15:36:41 +00001647 return
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001648 if sys.platform == "win32" and tarinfo.isdir():
1649 # According to msdn.microsoft.com, it is an error (EACCES)
1650 # to use utime() on directories.
1651 return
1652 try:
1653 os.utime(targetpath, (tarinfo.mtime, tarinfo.mtime))
1654 except EnvironmentError, e:
1655 raise ExtractError, "could not change modification time"
1656
1657 #--------------------------------------------------------------------------
1658
1659 def next(self):
1660 """Return the next member of the archive as a TarInfo object, when
1661 TarFile is opened for reading. Return None if there is no more
1662 available.
1663 """
1664 self._check("ra")
1665 if self.firstmember is not None:
1666 m = self.firstmember
1667 self.firstmember = None
1668 return m
1669
1670 # Read the next block.
Andrew M. Kuchling864bba12004-07-10 22:02:11 +00001671 self.fileobj.seek(self.offset)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001672 while True:
1673 buf = self.fileobj.read(BLOCKSIZE)
1674 if not buf:
1675 return None
1676 try:
1677 tarinfo = TarInfo.frombuf(buf)
1678 except ValueError:
1679 if self.ignore_zeros:
1680 if buf.count(NUL) == BLOCKSIZE:
1681 adj = "empty"
1682 else:
1683 adj = "invalid"
1684 self._dbg(2, "0x%X: %s block" % (self.offset, adj))
1685 self.offset += BLOCKSIZE
1686 continue
1687 else:
1688 # Block is empty or unreadable.
Andrew M. Kuchling864bba12004-07-10 22:02:11 +00001689 if self.offset == 0:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001690 # If the first block is invalid. That does not
1691 # look like a tar archive we can handle.
1692 raise ReadError,"empty, unreadable or compressed file"
1693 return None
1694 break
1695
1696 # We shouldn't rely on this checksum, because some tar programs
1697 # calculate it differently and it is merely validating the
1698 # header block. We could just as well skip this part, which would
1699 # have a slight effect on performance...
1700 if tarinfo.chksum != calc_chksum(buf):
1701 self._dbg(1, "tarfile: Bad Checksum %r" % tarinfo.name)
1702
1703 # Set the TarInfo object's offset to the current position of the
1704 # TarFile and set self.offset to the position where the data blocks
1705 # should begin.
1706 tarinfo.offset = self.offset
1707 self.offset += BLOCKSIZE
1708
1709 # Check if the TarInfo object has a typeflag for which a callback
1710 # method is registered in the TYPE_METH. If so, then call it.
1711 if tarinfo.type in self.TYPE_METH:
Andrew M. Kuchling864bba12004-07-10 22:02:11 +00001712 return self.TYPE_METH[tarinfo.type](self, tarinfo)
1713
1714 tarinfo.offset_data = self.offset
1715 if tarinfo.isreg() or tarinfo.type not in SUPPORTED_TYPES:
1716 # Skip the following data blocks.
1717 self.offset += self._block(tarinfo.size)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001718
1719 if tarinfo.isreg() and tarinfo.name[:-1] == "/":
1720 # some old tar programs don't know DIRTYPE
1721 tarinfo.type = DIRTYPE
1722
Martin v. Löwisf3c56112004-09-18 09:08:52 +00001723 self.members.append(tarinfo)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001724 return tarinfo
1725
1726 #--------------------------------------------------------------------------
1727 # Below are some methods which are called for special typeflags in the
1728 # next() method, e.g. for unwrapping GNU longname/longlink blocks. They
1729 # are registered in TYPE_METH below. You can register your own methods
1730 # with this mapping.
1731 # A registered method is called with a TarInfo object as only argument.
1732 #
1733 # During its execution the method MUST perform the following tasks:
1734 # 1. set tarinfo.offset_data to the position where the data blocks begin,
1735 # if there is data to follow.
1736 # 2. set self.offset to the position where the next member's header will
1737 # begin.
Martin v. Löwisf3c56112004-09-18 09:08:52 +00001738 # 3. append the tarinfo object to self.members, if it is supposed to appear
1739 # as a member of the TarFile object.
Andrew M. Kuchling864bba12004-07-10 22:02:11 +00001740 # 4. return tarinfo or another valid TarInfo object.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001741
1742 def proc_gnulong(self, tarinfo):
1743 """Evaluate the blocks that hold a GNU longname
1744 or longlink member.
1745 """
1746 buf = ""
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001747 count = tarinfo.size
1748 while count > 0:
1749 block = self.fileobj.read(BLOCKSIZE)
1750 buf += block
1751 self.offset += BLOCKSIZE
1752 count -= BLOCKSIZE
1753
Andrew M. Kuchling864bba12004-07-10 22:02:11 +00001754 # Fetch the next header
1755 next = self.next()
1756
1757 next.offset = tarinfo.offset
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001758 if tarinfo.type == GNUTYPE_LONGNAME:
Andrew M. Kuchling864bba12004-07-10 22:02:11 +00001759 next.name = nts(buf)
1760 elif tarinfo.type == GNUTYPE_LONGLINK:
1761 next.linkname = nts(buf)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001762
Andrew M. Kuchling864bba12004-07-10 22:02:11 +00001763 return next
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001764
1765 def proc_sparse(self, tarinfo):
1766 """Analyze a GNU sparse header plus extra headers.
1767 """
1768 buf = tarinfo.tobuf()
1769 sp = _ringbuffer()
1770 pos = 386
1771 lastpos = 0L
1772 realpos = 0L
1773 # There are 4 possible sparse structs in the
1774 # first header.
1775 for i in xrange(4):
1776 try:
1777 offset = int(buf[pos:pos + 12], 8)
1778 numbytes = int(buf[pos + 12:pos + 24], 8)
1779 except ValueError:
1780 break
1781 if offset > lastpos:
1782 sp.append(_hole(lastpos, offset - lastpos))
1783 sp.append(_data(offset, numbytes, realpos))
1784 realpos += numbytes
1785 lastpos = offset + numbytes
1786 pos += 24
1787
1788 isextended = ord(buf[482])
1789 origsize = int(buf[483:495], 8)
1790
1791 # If the isextended flag is given,
1792 # there are extra headers to process.
1793 while isextended == 1:
1794 buf = self.fileobj.read(BLOCKSIZE)
1795 self.offset += BLOCKSIZE
1796 pos = 0
1797 for i in xrange(21):
1798 try:
1799 offset = int(buf[pos:pos + 12], 8)
1800 numbytes = int(buf[pos + 12:pos + 24], 8)
1801 except ValueError:
1802 break
1803 if offset > lastpos:
1804 sp.append(_hole(lastpos, offset - lastpos))
1805 sp.append(_data(offset, numbytes, realpos))
1806 realpos += numbytes
1807 lastpos = offset + numbytes
1808 pos += 24
1809 isextended = ord(buf[504])
1810
1811 if lastpos < origsize:
1812 sp.append(_hole(lastpos, origsize - lastpos))
1813
1814 tarinfo.sparse = sp
1815
1816 tarinfo.offset_data = self.offset
1817 self.offset += self._block(tarinfo.size)
1818 tarinfo.size = origsize
Andrew M. Kuchling864bba12004-07-10 22:02:11 +00001819
Martin v. Löwisf3c56112004-09-18 09:08:52 +00001820 self.members.append(tarinfo)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001821 return tarinfo
1822
1823 # The type mapping for the next() method. The keys are single character
1824 # strings, the typeflag. The values are methods which are called when
1825 # next() encounters such a typeflag.
1826 TYPE_METH = {
1827 GNUTYPE_LONGNAME: proc_gnulong,
1828 GNUTYPE_LONGLINK: proc_gnulong,
1829 GNUTYPE_SPARSE: proc_sparse
1830 }
1831
1832 #--------------------------------------------------------------------------
1833 # Little helper methods:
1834
1835 def _block(self, count):
1836 """Round up a byte count by BLOCKSIZE and return it,
1837 e.g. _block(834) => 1024.
1838 """
1839 blocks, remainder = divmod(count, BLOCKSIZE)
1840 if remainder:
1841 blocks += 1
1842 return blocks * BLOCKSIZE
1843
1844 def _getmember(self, name, tarinfo=None):
1845 """Find an archive member by name from bottom to top.
1846 If tarinfo is given, it is used as the starting point.
1847 """
Martin v. Löwisf3c56112004-09-18 09:08:52 +00001848 # Ensure that all members have been loaded.
1849 members = self.getmembers()
1850
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001851 if tarinfo is None:
Martin v. Löwisf3c56112004-09-18 09:08:52 +00001852 end = len(members)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001853 else:
Martin v. Löwisf3c56112004-09-18 09:08:52 +00001854 end = members.index(tarinfo)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001855
1856 for i in xrange(end - 1, -1, -1):
Martin v. Löwisf3c56112004-09-18 09:08:52 +00001857 if name == members[i].name:
1858 return members[i]
Andrew M. Kuchling864bba12004-07-10 22:02:11 +00001859
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001860 def _load(self):
1861 """Read through the entire archive file and look for readable
1862 members.
1863 """
1864 while True:
1865 tarinfo = self.next()
1866 if tarinfo is None:
1867 break
1868 self._loaded = True
1869
1870 def _check(self, mode=None):
1871 """Check if TarFile is still open, and if the operation's mode
1872 corresponds to TarFile's mode.
1873 """
1874 if self.closed:
1875 raise IOError, "%s is closed" % self.__class__.__name__
1876 if mode is not None and self._mode not in mode:
1877 raise IOError, "bad operation for mode %r" % self._mode
1878
1879 def __iter__(self):
1880 """Provide an iterator object.
1881 """
1882 if self._loaded:
1883 return iter(self.members)
1884 else:
1885 return TarIter(self)
1886
1887 def _create_gnulong(self, name, type):
1888 """Write a GNU longname/longlink member to the TarFile.
1889 It consists of an extended tar header, with the length
1890 of the longname as size, followed by data blocks,
1891 which contain the longname as a null terminated string.
1892 """
Neal Norwitz0662f8a2004-07-20 21:54:18 +00001893 name += NUL
1894
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001895 tarinfo = TarInfo()
1896 tarinfo.name = "././@LongLink"
1897 tarinfo.type = type
1898 tarinfo.mode = 0
1899 tarinfo.size = len(name)
1900
1901 # write extended header
1902 self.fileobj.write(tarinfo.tobuf())
Neal Norwitz0662f8a2004-07-20 21:54:18 +00001903 self.offset += BLOCKSIZE
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001904 # write name blocks
1905 self.fileobj.write(name)
1906 blocks, remainder = divmod(tarinfo.size, BLOCKSIZE)
1907 if remainder > 0:
1908 self.fileobj.write(NUL * (BLOCKSIZE - remainder))
1909 blocks += 1
1910 self.offset += blocks * BLOCKSIZE
1911
1912 def _dbg(self, level, msg):
1913 """Write debugging output to sys.stderr.
1914 """
1915 if level <= self.debug:
1916 print >> sys.stderr, msg
1917# class TarFile
1918
1919class TarIter:
1920 """Iterator Class.
1921
1922 for tarinfo in TarFile(...):
1923 suite...
1924 """
1925
1926 def __init__(self, tarfile):
1927 """Construct a TarIter object.
1928 """
1929 self.tarfile = tarfile
Martin v. Löwis637431b2005-03-03 23:12:42 +00001930 self.index = 0
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001931 def __iter__(self):
1932 """Return iterator object.
1933 """
1934 return self
1935 def next(self):
1936 """Return the next item using TarFile's next() method.
1937 When all members have been read, set TarFile as _loaded.
1938 """
Martin v. Löwis637431b2005-03-03 23:12:42 +00001939 # Fix for SF #1100429: Under rare circumstances it can
1940 # happen that getmembers() is called during iteration,
1941 # which will cause TarIter to stop prematurely.
1942 if not self.tarfile._loaded:
1943 tarinfo = self.tarfile.next()
1944 if not tarinfo:
1945 self.tarfile._loaded = True
1946 raise StopIteration
1947 else:
1948 try:
1949 tarinfo = self.tarfile.members[self.index]
1950 except IndexError:
1951 raise StopIteration
1952 self.index += 1
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001953 return tarinfo
1954
1955# Helper classes for sparse file support
1956class _section:
1957 """Base class for _data and _hole.
1958 """
1959 def __init__(self, offset, size):
1960 self.offset = offset
1961 self.size = size
1962 def __contains__(self, offset):
1963 return self.offset <= offset < self.offset + self.size
1964
1965class _data(_section):
1966 """Represent a data section in a sparse file.
1967 """
1968 def __init__(self, offset, size, realpos):
1969 _section.__init__(self, offset, size)
1970 self.realpos = realpos
1971
1972class _hole(_section):
1973 """Represent a hole section in a sparse file.
1974 """
1975 pass
1976
1977class _ringbuffer(list):
1978 """Ringbuffer class which increases performance
1979 over a regular list.
1980 """
1981 def __init__(self):
1982 self.idx = 0
1983 def find(self, offset):
1984 idx = self.idx
1985 while True:
1986 item = self[idx]
1987 if offset in item:
1988 break
1989 idx += 1
1990 if idx == len(self):
1991 idx = 0
1992 if idx == self.idx:
1993 # End of File
1994 return None
1995 self.idx = idx
1996 return item
1997
1998#---------------------------------------------
1999# zipfile compatible TarFile class
2000#---------------------------------------------
2001TAR_PLAIN = 0 # zipfile.ZIP_STORED
2002TAR_GZIPPED = 8 # zipfile.ZIP_DEFLATED
2003class TarFileCompat:
2004 """TarFile class compatible with standard module zipfile's
2005 ZipFile class.
2006 """
2007 def __init__(self, file, mode="r", compression=TAR_PLAIN):
2008 if compression == TAR_PLAIN:
2009 self.tarfile = TarFile.taropen(file, mode)
2010 elif compression == TAR_GZIPPED:
2011 self.tarfile = TarFile.gzopen(file, mode)
2012 else:
2013 raise ValueError, "unknown compression constant"
2014 if mode[0:1] == "r":
2015 members = self.tarfile.getmembers()
Raymond Hettingera1d09e22005-09-11 16:34:05 +00002016 for m in members:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002017 m.filename = m.name
2018 m.file_size = m.size
2019 m.date_time = time.gmtime(m.mtime)[:6]
2020 def namelist(self):
2021 return map(lambda m: m.name, self.infolist())
2022 def infolist(self):
2023 return filter(lambda m: m.type in REGULAR_TYPES,
2024 self.tarfile.getmembers())
2025 def printdir(self):
2026 self.tarfile.list()
2027 def testzip(self):
2028 return
2029 def getinfo(self, name):
2030 return self.tarfile.getmember(name)
2031 def read(self, name):
2032 return self.tarfile.extractfile(self.tarfile.getmember(name)).read()
2033 def write(self, filename, arcname=None, compress_type=None):
2034 self.tarfile.add(filename, arcname)
2035 def writestr(self, zinfo, bytes):
Raymond Hettingera6172712004-12-31 19:15:26 +00002036 try:
2037 from cStringIO import StringIO
2038 except ImportError:
2039 from StringIO import StringIO
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002040 import calendar
2041 zinfo.name = zinfo.filename
2042 zinfo.size = zinfo.file_size
2043 zinfo.mtime = calendar.timegm(zinfo.date_time)
Raymond Hettingera6172712004-12-31 19:15:26 +00002044 self.tarfile.addfile(zinfo, StringIO(bytes))
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002045 def close(self):
2046 self.tarfile.close()
2047#class TarFileCompat
2048
2049#--------------------
2050# exported functions
2051#--------------------
2052def is_tarfile(name):
2053 """Return True if name points to a tar archive that we
2054 are able to handle, else return False.
2055 """
2056 try:
2057 t = open(name)
2058 t.close()
2059 return True
2060 except TarError:
2061 return False
2062
2063open = TarFile.open