blob: 449e3fb2286794750572f76697a871d678210ea1 [file] [log] [blame]
Tor Norbye3a2425a2013-11-04 10:16:08 -08001#!/usr/bin/env python
2# -*- coding: iso-8859-1 -*-
3#-------------------------------------------------------------------
4# tarfile.py
5#-------------------------------------------------------------------
6# Copyright (C) 2002 Lars Gustäbel <lars@gustaebel.de>
7# All rights reserved.
8#
9# Permission is hereby granted, free of charge, to any person
10# obtaining a copy of this software and associated documentation
11# files (the "Software"), to deal in the Software without
12# restriction, including without limitation the rights to use,
13# copy, modify, merge, publish, distribute, sublicense, and/or sell
14# copies of the Software, and to permit persons to whom the
15# Software is furnished to do so, subject to the following
16# conditions:
17#
18# The above copyright notice and this permission notice shall be
19# included in all copies or substantial portions of the Software.
20#
21# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
22# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
23# OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
24# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
25# HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
26# WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
27# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
28# OTHER DEALINGS IN THE SOFTWARE.
29#
30"""Read from and write to tar format archives.
31"""
32
33__version__ = "$Revision: 60730 $"
34# $Source$
35
36version = "0.8.0"
37__author__ = "Lars Gustäbel (lars@gustaebel.de)"
38__date__ = "$Date: 2008-02-11 10:36:07 -0800 (Mon, 11 Feb 2008) $"
39__cvsid__ = "$Id: tarfile.py 60730 2008-02-11 18:36:07Z lars.gustaebel $"
40__credits__ = "Gustavo Niemeyer, Niels Gustäbel, Richard Townsend."
41
42#---------
43# Imports
44#---------
45import sys
46import os
47import shutil
48import stat
49import errno
50import time
51import struct
52import copy
53
54if sys.platform == 'mac':
55 # This module needs work for MacOS9, especially in the area of pathname
56 # handling. In many places it is assumed a simple substitution of / by the
57 # local os.path.sep is good enough to convert pathnames, but this does not
58 # work with the mac rooted:path:name versus :nonrooted:path:name syntax
59 raise ImportError, "tarfile does not work for platform==mac"
60
61try:
62 import grp, pwd
63except ImportError:
64 grp = pwd = None
65
66# from tarfile import *
67__all__ = ["TarFile", "TarInfo", "is_tarfile", "TarError"]
68
69#---------------------------------------------------------
70# tar constants
71#---------------------------------------------------------
72NUL = "\0" # the null character
73BLOCKSIZE = 512 # length of processing blocks
74RECORDSIZE = BLOCKSIZE * 20 # length of records
75MAGIC = "ustar" # magic tar string
76VERSION = "00" # version number
77
78LENGTH_NAME = 100 # maximum length of a filename
79LENGTH_LINK = 100 # maximum length of a linkname
80LENGTH_PREFIX = 155 # maximum length of the prefix field
81MAXSIZE_MEMBER = 077777777777L # maximum size of a file (11 octal digits)
82
83REGTYPE = "0" # regular file
84AREGTYPE = "\0" # regular file
85LNKTYPE = "1" # link (inside tarfile)
86SYMTYPE = "2" # symbolic link
87CHRTYPE = "3" # character special device
88BLKTYPE = "4" # block special device
89DIRTYPE = "5" # directory
90FIFOTYPE = "6" # fifo special device
91CONTTYPE = "7" # contiguous file
92
93GNUTYPE_LONGNAME = "L" # GNU tar extension for longnames
94GNUTYPE_LONGLINK = "K" # GNU tar extension for longlink
95GNUTYPE_SPARSE = "S" # GNU tar extension for sparse file
96
97#---------------------------------------------------------
98# tarfile constants
99#---------------------------------------------------------
100SUPPORTED_TYPES = (REGTYPE, AREGTYPE, LNKTYPE, # file types that tarfile
101 SYMTYPE, DIRTYPE, FIFOTYPE, # can cope with.
102 CONTTYPE, CHRTYPE, BLKTYPE,
103 GNUTYPE_LONGNAME, GNUTYPE_LONGLINK,
104 GNUTYPE_SPARSE)
105
106REGULAR_TYPES = (REGTYPE, AREGTYPE, # file types that somehow
107 CONTTYPE, GNUTYPE_SPARSE) # represent regular files
108
109#---------------------------------------------------------
110# Bits used in the mode field, values in octal.
111#---------------------------------------------------------
112S_IFLNK = 0120000 # symbolic link
113S_IFREG = 0100000 # regular file
114S_IFBLK = 0060000 # block device
115S_IFDIR = 0040000 # directory
116S_IFCHR = 0020000 # character device
117S_IFIFO = 0010000 # fifo
118
119TSUID = 04000 # set UID on execution
120TSGID = 02000 # set GID on execution
121TSVTX = 01000 # reserved
122
123TUREAD = 0400 # read by owner
124TUWRITE = 0200 # write by owner
125TUEXEC = 0100 # execute/search by owner
126TGREAD = 0040 # read by group
127TGWRITE = 0020 # write by group
128TGEXEC = 0010 # execute/search by group
129TOREAD = 0004 # read by other
130TOWRITE = 0002 # write by other
131TOEXEC = 0001 # execute/search by other
132
133#---------------------------------------------------------
134# Some useful functions
135#---------------------------------------------------------
136
137def stn(s, length):
138 """Convert a python string to a null-terminated string buffer.
139 """
140 return s[:length] + (length - len(s)) * NUL
141
142def nts(s):
143 """Convert a null-terminated string field to a python string.
144 """
145 # Use the string up to the first null char.
146 p = s.find("\0")
147 if p == -1:
148 return s
149 return s[:p]
150
151def nti(s):
152 """Convert a number field to a python number.
153 """
154 # There are two possible encodings for a number field, see
155 # itn() below.
156 if s[0] != chr(0200):
157 n = int(nts(s) or "0", 8)
158 else:
159 n = 0L
160 for i in xrange(len(s) - 1):
161 n <<= 8
162 n += ord(s[i + 1])
163 return n
164
165def itn(n, digits=8, posix=False):
166 """Convert a python number to a number field.
167 """
168 # POSIX 1003.1-1988 requires numbers to be encoded as a string of
169 # octal digits followed by a null-byte, this allows values up to
170 # (8**(digits-1))-1. GNU tar allows storing numbers greater than
171 # that if necessary. A leading 0200 byte indicates this particular
172 # encoding, the following digits-1 bytes are a big-endian
173 # representation. This allows values up to (256**(digits-1))-1.
174 if 0 <= n < 8 ** (digits - 1):
175 s = "%0*o" % (digits - 1, n) + NUL
176 else:
177 if posix:
178 raise ValueError("overflow in number field")
179
180 if n < 0:
181 # XXX We mimic GNU tar's behaviour with negative numbers,
182 # this could raise OverflowError.
183 n = struct.unpack("L", struct.pack("l", n))[0]
184
185 s = ""
186 for i in xrange(digits - 1):
187 s = chr(n & 0377) + s
188 n >>= 8
189 s = chr(0200) + s
190 return s
191
192def calc_chksums(buf):
193 """Calculate the checksum for a member's header by summing up all
194 characters except for the chksum field which is treated as if
195 it was filled with spaces. According to the GNU tar sources,
196 some tars (Sun and NeXT) calculate chksum with signed char,
197 which will be different if there are chars in the buffer with
198 the high bit set. So we calculate two checksums, unsigned and
199 signed.
200 """
201 unsigned_chksum = 256 + sum(struct.unpack("148B", buf[:148]) + struct.unpack("356B", buf[156:512]))
202 signed_chksum = 256 + sum(struct.unpack("148b", buf[:148]) + struct.unpack("356b", buf[156:512]))
203 return unsigned_chksum, signed_chksum
204
205def copyfileobj(src, dst, length=None):
206 """Copy length bytes from fileobj src to fileobj dst.
207 If length is None, copy the entire content.
208 """
209 if length == 0:
210 return
211 if length is None:
212 shutil.copyfileobj(src, dst)
213 return
214
215 BUFSIZE = 16 * 1024
216 blocks, remainder = divmod(length, BUFSIZE)
217 for b in xrange(blocks):
218 buf = src.read(BUFSIZE)
219 if len(buf) < BUFSIZE:
220 raise IOError("end of file reached")
221 dst.write(buf)
222
223 if remainder != 0:
224 buf = src.read(remainder)
225 if len(buf) < remainder:
226 raise IOError("end of file reached")
227 dst.write(buf)
228 return
229
230filemode_table = (
231 ((S_IFLNK, "l"),
232 (S_IFREG, "-"),
233 (S_IFBLK, "b"),
234 (S_IFDIR, "d"),
235 (S_IFCHR, "c"),
236 (S_IFIFO, "p")),
237
238 ((TUREAD, "r"),),
239 ((TUWRITE, "w"),),
240 ((TUEXEC|TSUID, "s"),
241 (TSUID, "S"),
242 (TUEXEC, "x")),
243
244 ((TGREAD, "r"),),
245 ((TGWRITE, "w"),),
246 ((TGEXEC|TSGID, "s"),
247 (TSGID, "S"),
248 (TGEXEC, "x")),
249
250 ((TOREAD, "r"),),
251 ((TOWRITE, "w"),),
252 ((TOEXEC|TSVTX, "t"),
253 (TSVTX, "T"),
254 (TOEXEC, "x"))
255)
256
257def filemode(mode):
258 """Convert a file's mode to a string of the form
259 -rwxrwxrwx.
260 Used by TarFile.list()
261 """
262 perm = []
263 for table in filemode_table:
264 for bit, char in table:
265 if mode & bit == bit:
266 perm.append(char)
267 break
268 else:
269 perm.append("-")
270 return "".join(perm)
271
272if os.sep != "/":
273 normpath = lambda path: os.path.normpath(path).replace(os.sep, "/")
274else:
275 normpath = os.path.normpath
276
277class TarError(Exception):
278 """Base exception."""
279 pass
280class ExtractError(TarError):
281 """General exception for extract errors."""
282 pass
283class ReadError(TarError):
284 """Exception for unreadble tar archives."""
285 pass
286class CompressionError(TarError):
287 """Exception for unavailable compression methods."""
288 pass
289class StreamError(TarError):
290 """Exception for unsupported operations on stream-like TarFiles."""
291 pass
292
293#---------------------------
294# internal stream interface
295#---------------------------
296class _LowLevelFile:
297 """Low-level file object. Supports reading and writing.
298 It is used instead of a regular file object for streaming
299 access.
300 """
301
302 def __init__(self, name, mode):
303 mode = {
304 "r": os.O_RDONLY,
305 "w": os.O_WRONLY | os.O_CREAT | os.O_TRUNC,
306 }[mode]
307 if hasattr(os, "O_BINARY"):
308 mode |= os.O_BINARY
309 self.fd = os.open(name, mode)
310
311 def close(self):
312 os.close(self.fd)
313
314 def read(self, size):
315 return os.read(self.fd, size)
316
317 def write(self, s):
318 os.write(self.fd, s)
319
320class _Stream:
321 """Class that serves as an adapter between TarFile and
322 a stream-like object. The stream-like object only
323 needs to have a read() or write() method and is accessed
324 blockwise. Use of gzip or bzip2 compression is possible.
325 A stream-like object could be for example: sys.stdin,
326 sys.stdout, a socket, a tape device etc.
327
328 _Stream is intended to be used only internally.
329 """
330
331 def __init__(self, name, mode, comptype, fileobj, bufsize):
332 """Construct a _Stream object.
333 """
334 self._extfileobj = True
335 if fileobj is None:
336 fileobj = _LowLevelFile(name, mode)
337 self._extfileobj = False
338
339 if comptype == '*':
340 # Enable transparent compression detection for the
341 # stream interface
342 fileobj = _StreamProxy(fileobj)
343 comptype = fileobj.getcomptype()
344
345 self.name = name or ""
346 self.mode = mode
347 self.comptype = comptype
348 self.fileobj = fileobj
349 self.bufsize = bufsize
350 self.buf = ""
351 self.pos = 0L
352 self.closed = False
353
354 if comptype == "gz":
355 try:
356 import zlib
357 except ImportError:
358 raise CompressionError("zlib module is not available")
359 self.zlib = zlib
360 self.crc = zlib.crc32("")
361 if mode == "r":
362 self._init_read_gz()
363 else:
364 self._init_write_gz()
365
366 if comptype == "bz2":
367 try:
368 import bz2
369 except ImportError:
370 raise CompressionError("bz2 module is not available")
371 if mode == "r":
372 self.dbuf = ""
373 self.cmp = bz2.BZ2Decompressor()
374 else:
375 self.cmp = bz2.BZ2Compressor()
376
377 def __del__(self):
378 if hasattr(self, "closed") and not self.closed:
379 self.close()
380
381 def _init_write_gz(self):
382 """Initialize for writing with gzip compression.
383 """
384 self.cmp = self.zlib.compressobj(9, self.zlib.DEFLATED,
385 -self.zlib.MAX_WBITS,
386 self.zlib.DEF_MEM_LEVEL,
387 0)
388 timestamp = struct.pack("<L", long(time.time()))
389 self.__write("\037\213\010\010%s\002\377" % timestamp)
390 if self.name.endswith(".gz"):
391 self.name = self.name[:-3]
392 self.__write(self.name + NUL)
393
394 def write(self, s):
395 """Write string s to the stream.
396 """
397 if self.comptype == "gz":
398 self.crc = self.zlib.crc32(s, self.crc)
399 self.pos += len(s)
400 if self.comptype != "tar":
401 s = self.cmp.compress(s)
402 self.__write(s)
403
404 def __write(self, s):
405 """Write string s to the stream if a whole new block
406 is ready to be written.
407 """
408 self.buf += s
409 while len(self.buf) > self.bufsize:
410 self.fileobj.write(self.buf[:self.bufsize])
411 self.buf = self.buf[self.bufsize:]
412
413 def close(self):
414 """Close the _Stream object. No operation should be
415 done on it afterwards.
416 """
417 if self.closed:
418 return
419
420 if self.mode == "w" and self.comptype != "tar":
421 self.buf += self.cmp.flush()
422
423 if self.mode == "w" and self.buf:
424 self.fileobj.write(self.buf)
425 self.buf = ""
426 if self.comptype == "gz":
427 # The native zlib crc is an unsigned 32-bit integer, but
428 # the Python wrapper implicitly casts that to a signed C
429 # long. So, on a 32-bit box self.crc may "look negative",
430 # while the same crc on a 64-bit box may "look positive".
431 # To avoid irksome warnings from the `struct` module, force
432 # it to look positive on all boxes.
433 self.fileobj.write(struct.pack("<L", self.crc & 0xffffffffL))
434 self.fileobj.write(struct.pack("<L", self.pos & 0xffffFFFFL))
435
436 if not self._extfileobj:
437 self.fileobj.close()
438
439 self.closed = True
440
441 def _init_read_gz(self):
442 """Initialize for reading a gzip compressed fileobj.
443 """
444 self.cmp = self.zlib.decompressobj(-self.zlib.MAX_WBITS)
445 self.dbuf = ""
446
447 # taken from gzip.GzipFile with some alterations
448 if self.__read(2) != "\037\213":
449 raise ReadError("not a gzip file")
450 if self.__read(1) != "\010":
451 raise CompressionError("unsupported compression method")
452
453 flag = ord(self.__read(1))
454 self.__read(6)
455
456 if flag & 4:
457 xlen = ord(self.__read(1)) + 256 * ord(self.__read(1))
458 self.read(xlen)
459 if flag & 8:
460 while True:
461 s = self.__read(1)
462 if not s or s == NUL:
463 break
464 if flag & 16:
465 while True:
466 s = self.__read(1)
467 if not s or s == NUL:
468 break
469 if flag & 2:
470 self.__read(2)
471
472 def tell(self):
473 """Return the stream's file pointer position.
474 """
475 return self.pos
476
477 def seek(self, pos=0):
478 """Set the stream's file pointer to pos. Negative seeking
479 is forbidden.
480 """
481 if pos - self.pos >= 0:
482 blocks, remainder = divmod(pos - self.pos, self.bufsize)
483 for i in xrange(blocks):
484 self.read(self.bufsize)
485 self.read(remainder)
486 else:
487 raise StreamError("seeking backwards is not allowed")
488 return self.pos
489
490 def read(self, size=None):
491 """Return the next size number of bytes from the stream.
492 If size is not defined, return all bytes of the stream
493 up to EOF.
494 """
495 if size is None:
496 t = []
497 while True:
498 buf = self._read(self.bufsize)
499 if not buf:
500 break
501 t.append(buf)
502 buf = "".join(t)
503 else:
504 buf = self._read(size)
505 self.pos += len(buf)
506 return buf
507
508 def _read(self, size):
509 """Return size bytes from the stream.
510 """
511 if self.comptype == "tar":
512 return self.__read(size)
513
514 c = len(self.dbuf)
515 t = [self.dbuf]
516 while c < size:
517 buf = self.__read(self.bufsize)
518 if not buf:
519 break
520 buf = self.cmp.decompress(buf)
521 t.append(buf)
522 c += len(buf)
523 t = "".join(t)
524 self.dbuf = t[size:]
525 return t[:size]
526
527 def __read(self, size):
528 """Return size bytes from stream. If internal buffer is empty,
529 read another block from the stream.
530 """
531 c = len(self.buf)
532 t = [self.buf]
533 while c < size:
534 buf = self.fileobj.read(self.bufsize)
535 if not buf:
536 break
537 t.append(buf)
538 c += len(buf)
539 t = "".join(t)
540 self.buf = t[size:]
541 return t[:size]
542# class _Stream
543
544class _StreamProxy(object):
545 """Small proxy class that enables transparent compression
546 detection for the Stream interface (mode 'r|*').
547 """
548
549 def __init__(self, fileobj):
550 self.fileobj = fileobj
551 self.buf = self.fileobj.read(BLOCKSIZE)
552
553 def read(self, size):
554 self.read = self.fileobj.read
555 return self.buf
556
557 def getcomptype(self):
558 if self.buf.startswith("\037\213\010"):
559 return "gz"
560 if self.buf.startswith("BZh91"):
561 return "bz2"
562 return "tar"
563
564 def close(self):
565 self.fileobj.close()
566# class StreamProxy
567
568class _BZ2Proxy(object):
569 """Small proxy class that enables external file object
570 support for "r:bz2" and "w:bz2" modes. This is actually
571 a workaround for a limitation in bz2 module's BZ2File
572 class which (unlike gzip.GzipFile) has no support for
573 a file object argument.
574 """
575
576 blocksize = 16 * 1024
577
578 def __init__(self, fileobj, mode):
579 self.fileobj = fileobj
580 self.mode = mode
581 self.init()
582
583 def init(self):
584 import bz2
585 self.pos = 0
586 if self.mode == "r":
587 self.bz2obj = bz2.BZ2Decompressor()
588 self.fileobj.seek(0)
589 self.buf = ""
590 else:
591 self.bz2obj = bz2.BZ2Compressor()
592
593 def read(self, size):
594 b = [self.buf]
595 x = len(self.buf)
596 while x < size:
597 try:
598 raw = self.fileobj.read(self.blocksize)
599 data = self.bz2obj.decompress(raw)
600 b.append(data)
601 except EOFError:
602 break
603 x += len(data)
604 self.buf = "".join(b)
605
606 buf = self.buf[:size]
607 self.buf = self.buf[size:]
608 self.pos += len(buf)
609 return buf
610
611 def seek(self, pos):
612 if pos < self.pos:
613 self.init()
614 self.read(pos - self.pos)
615
616 def tell(self):
617 return self.pos
618
619 def write(self, data):
620 self.pos += len(data)
621 raw = self.bz2obj.compress(data)
622 self.fileobj.write(raw)
623
624 def close(self):
625 if self.mode == "w":
626 raw = self.bz2obj.flush()
627 self.fileobj.write(raw)
628 self.fileobj.close()
629# class _BZ2Proxy
630
631#------------------------
632# Extraction file object
633#------------------------
634class _FileInFile(object):
635 """A thin wrapper around an existing file object that
636 provides a part of its data as an individual file
637 object.
638 """
639
640 def __init__(self, fileobj, offset, size, sparse=None):
641 self.fileobj = fileobj
642 self.offset = offset
643 self.size = size
644 self.sparse = sparse
645 self.position = 0
646
647 def tell(self):
648 """Return the current file position.
649 """
650 return self.position
651
652 def seek(self, position):
653 """Seek to a position in the file.
654 """
655 self.position = position
656
657 def read(self, size=None):
658 """Read data from the file.
659 """
660 if size is None:
661 size = self.size - self.position
662 else:
663 size = min(size, self.size - self.position)
664
665 if self.sparse is None:
666 return self.readnormal(size)
667 else:
668 return self.readsparse(size)
669
670 def readnormal(self, size):
671 """Read operation for regular files.
672 """
673 self.fileobj.seek(self.offset + self.position)
674 self.position += size
675 return self.fileobj.read(size)
676
677 def readsparse(self, size):
678 """Read operation for sparse files.
679 """
680 data = []
681 while size > 0:
682 buf = self.readsparsesection(size)
683 if not buf:
684 break
685 size -= len(buf)
686 data.append(buf)
687 return "".join(data)
688
689 def readsparsesection(self, size):
690 """Read a single section of a sparse file.
691 """
692 section = self.sparse.find(self.position)
693
694 if section is None:
695 return ""
696
697 size = min(size, section.offset + section.size - self.position)
698
699 if isinstance(section, _data):
700 realpos = section.realpos + self.position - section.offset
701 self.fileobj.seek(self.offset + realpos)
702 self.position += size
703 return self.fileobj.read(size)
704 else:
705 self.position += size
706 return NUL * size
707#class _FileInFile
708
709
710class ExFileObject(object):
711 """File-like object for reading an archive member.
712 Is returned by TarFile.extractfile().
713 """
714 blocksize = 1024
715
716 def __init__(self, tarfile, tarinfo):
717 self.fileobj = _FileInFile(tarfile.fileobj,
718 tarinfo.offset_data,
719 tarinfo.size,
720 getattr(tarinfo, "sparse", None))
721 self.name = tarinfo.name
722 self.mode = "r"
723 self.closed = False
724 self.size = tarinfo.size
725
726 self.position = 0
727 self.buffer = ""
728
729 def read(self, size=None):
730 """Read at most size bytes from the file. If size is not
731 present or None, read all data until EOF is reached.
732 """
733 if self.closed:
734 raise ValueError("I/O operation on closed file")
735
736 buf = ""
737 if self.buffer:
738 if size is None:
739 buf = self.buffer
740 self.buffer = ""
741 else:
742 buf = self.buffer[:size]
743 self.buffer = self.buffer[size:]
744
745 if size is None:
746 buf += self.fileobj.read()
747 else:
748 buf += self.fileobj.read(size - len(buf))
749
750 self.position += len(buf)
751 return buf
752
753 def readline(self, size=-1):
754 """Read one entire line from the file. If size is present
755 and non-negative, return a string with at most that
756 size, which may be an incomplete line.
757 """
758 if self.closed:
759 raise ValueError("I/O operation on closed file")
760
761 if "\n" in self.buffer:
762 pos = self.buffer.find("\n") + 1
763 else:
764 buffers = [self.buffer]
765 while True:
766 buf = self.fileobj.read(self.blocksize)
767 buffers.append(buf)
768 if not buf or "\n" in buf:
769 self.buffer = "".join(buffers)
770 pos = self.buffer.find("\n") + 1
771 if pos == 0:
772 # no newline found.
773 pos = len(self.buffer)
774 break
775
776 if size != -1:
777 pos = min(size, pos)
778
779 buf = self.buffer[:pos]
780 self.buffer = self.buffer[pos:]
781 self.position += len(buf)
782 return buf
783
784 def readlines(self):
785 """Return a list with all remaining lines.
786 """
787 result = []
788 while True:
789 line = self.readline()
790 if not line: break
791 result.append(line)
792 return result
793
794 def tell(self):
795 """Return the current file position.
796 """
797 if self.closed:
798 raise ValueError("I/O operation on closed file")
799
800 return self.position
801
802 def seek(self, pos, whence=os.SEEK_SET):
803 """Seek to a position in the file.
804 """
805 if self.closed:
806 raise ValueError("I/O operation on closed file")
807
808 if whence == os.SEEK_SET:
809 self.position = min(max(pos, 0), self.size)
810 elif whence == os.SEEK_CUR:
811 if pos < 0:
812 self.position = max(self.position + pos, 0)
813 else:
814 self.position = min(self.position + pos, self.size)
815 elif whence == os.SEEK_END:
816 self.position = max(min(self.size + pos, self.size), 0)
817 else:
818 raise ValueError("Invalid argument")
819
820 self.buffer = ""
821 self.fileobj.seek(self.position)
822
823 def close(self):
824 """Close the file object.
825 """
826 self.closed = True
827
828 def __iter__(self):
829 """Get an iterator over the file's lines.
830 """
831 while True:
832 line = self.readline()
833 if not line:
834 break
835 yield line
836#class ExFileObject
837
838#------------------
839# Exported Classes
840#------------------
841class TarInfo(object):
842 """Informational class which holds the details about an
843 archive member given by a tar header block.
844 TarInfo objects are returned by TarFile.getmember(),
845 TarFile.getmembers() and TarFile.gettarinfo() and are
846 usually created internally.
847 """
848
849 def __init__(self, name=""):
850 """Construct a TarInfo object. name is the optional name
851 of the member.
852 """
853 self.name = name # member name (dirnames must end with '/')
854 self.mode = 0666 # file permissions
855 self.uid = 0 # user id
856 self.gid = 0 # group id
857 self.size = 0 # file size
858 self.mtime = 0 # modification time
859 self.chksum = 0 # header checksum
860 self.type = REGTYPE # member type
861 self.linkname = "" # link name
862 self.uname = "user" # user name
863 self.gname = "group" # group name
864 self.devmajor = 0 # device major number
865 self.devminor = 0 # device minor number
866
867 self.offset = 0 # the tar header starts here
868 self.offset_data = 0 # the file's data starts here
869
870 def __repr__(self):
871 return "<%s %r at %#x>" % (self.__class__.__name__,self.name,id(self))
872
873 @classmethod
874 def frombuf(cls, buf):
875 """Construct a TarInfo object from a 512 byte string buffer.
876 """
877 if len(buf) != BLOCKSIZE:
878 raise ValueError("truncated header")
879 if buf.count(NUL) == BLOCKSIZE:
880 raise ValueError("empty header")
881
882 tarinfo = cls()
883 tarinfo.buf = buf
884 tarinfo.name = nts(buf[0:100])
885 tarinfo.mode = nti(buf[100:108])
886 tarinfo.uid = nti(buf[108:116])
887 tarinfo.gid = nti(buf[116:124])
888 tarinfo.size = nti(buf[124:136])
889 tarinfo.mtime = nti(buf[136:148])
890 tarinfo.chksum = nti(buf[148:156])
891 tarinfo.type = buf[156:157]
892 tarinfo.linkname = nts(buf[157:257])
893 tarinfo.uname = nts(buf[265:297])
894 tarinfo.gname = nts(buf[297:329])
895 tarinfo.devmajor = nti(buf[329:337])
896 tarinfo.devminor = nti(buf[337:345])
897 prefix = nts(buf[345:500])
898
899 if prefix and not tarinfo.issparse():
900 tarinfo.name = prefix + "/" + tarinfo.name
901
902 if tarinfo.chksum not in calc_chksums(buf):
903 raise ValueError("invalid header")
904 return tarinfo
905
906 def tobuf(self, posix=False):
907 """Return a tar header as a string of 512 byte blocks.
908 """
909 buf = ""
910 type = self.type
911 prefix = ""
912
913 if self.name.endswith("/"):
914 type = DIRTYPE
915
916 if type in (GNUTYPE_LONGNAME, GNUTYPE_LONGLINK):
917 # Prevent "././@LongLink" from being normalized.
918 name = self.name
919 else:
920 name = normpath(self.name)
921
922 if type == DIRTYPE:
923 # directories should end with '/'
924 name += "/"
925
926 linkname = self.linkname
927 if linkname:
928 # if linkname is empty we end up with a '.'
929 linkname = normpath(linkname)
930
931 if posix:
932 if self.size > MAXSIZE_MEMBER:
933 raise ValueError("file is too large (>= 8 GB)")
934
935 if len(self.linkname) > LENGTH_LINK:
936 raise ValueError("linkname is too long (>%d)" % (LENGTH_LINK))
937
938 if len(name) > LENGTH_NAME:
939 prefix = name[:LENGTH_PREFIX + 1]
940 while prefix and prefix[-1] != "/":
941 prefix = prefix[:-1]
942
943 name = name[len(prefix):]
944 prefix = prefix[:-1]
945
946 if not prefix or len(name) > LENGTH_NAME:
947 raise ValueError("name is too long")
948
949 else:
950 if len(self.linkname) > LENGTH_LINK:
951 buf += self._create_gnulong(self.linkname, GNUTYPE_LONGLINK)
952
953 if len(name) > LENGTH_NAME:
954 buf += self._create_gnulong(name, GNUTYPE_LONGNAME)
955
956 parts = [
957 stn(name, 100),
958 itn(self.mode & 07777, 8, posix),
959 itn(self.uid, 8, posix),
960 itn(self.gid, 8, posix),
961 itn(self.size, 12, posix),
962 itn(self.mtime, 12, posix),
963 " ", # checksum field
964 type,
965 stn(self.linkname, 100),
966 stn(MAGIC, 6),
967 stn(VERSION, 2),
968 stn(self.uname, 32),
969 stn(self.gname, 32),
970 itn(self.devmajor, 8, posix),
971 itn(self.devminor, 8, posix),
972 stn(prefix, 155)
973 ]
974
975 buf += "".join(parts).ljust(BLOCKSIZE, NUL)
976 chksum = calc_chksums(buf[-BLOCKSIZE:])[0]
977 buf = buf[:-364] + "%06o\0" % chksum + buf[-357:]
978 self.buf = buf
979 return buf
980
981 def _create_gnulong(self, name, type):
982 """Create a GNU longname/longlink header from name.
983 It consists of an extended tar header, with the length
984 of the longname as size, followed by data blocks,
985 which contain the longname as a null terminated string.
986 """
987 name += NUL
988
989 tarinfo = self.__class__()
990 tarinfo.name = "././@LongLink"
991 tarinfo.type = type
992 tarinfo.mode = 0
993 tarinfo.size = len(name)
994
995 # create extended header
996 buf = tarinfo.tobuf()
997 # create name blocks
998 buf += name
999 blocks, remainder = divmod(len(name), BLOCKSIZE)
1000 if remainder > 0:
1001 buf += (BLOCKSIZE - remainder) * NUL
1002 return buf
1003
1004 def isreg(self):
1005 return self.type in REGULAR_TYPES
1006 def isfile(self):
1007 return self.isreg()
1008 def isdir(self):
1009 return self.type == DIRTYPE
1010 def issym(self):
1011 return self.type == SYMTYPE
1012 def islnk(self):
1013 return self.type == LNKTYPE
1014 def ischr(self):
1015 return self.type == CHRTYPE
1016 def isblk(self):
1017 return self.type == BLKTYPE
1018 def isfifo(self):
1019 return self.type == FIFOTYPE
1020 def issparse(self):
1021 return self.type == GNUTYPE_SPARSE
1022 def isdev(self):
1023 return self.type in (CHRTYPE, BLKTYPE, FIFOTYPE)
1024# class TarInfo
1025
1026class TarFile(object):
1027 """The TarFile Class provides an interface to tar archives.
1028 """
1029
1030 debug = 0 # May be set from 0 (no msgs) to 3 (all msgs)
1031
1032 dereference = False # If true, add content of linked file to the
1033 # tar file, else the link.
1034
1035 ignore_zeros = False # If true, skips empty or invalid blocks and
1036 # continues processing.
1037
1038 errorlevel = 0 # If 0, fatal errors only appear in debug
1039 # messages (if debug >= 0). If > 0, errors
1040 # are passed to the caller as exceptions.
1041
1042 posix = False # If True, generates POSIX.1-1990-compliant
1043 # archives (no GNU extensions!)
1044
1045 fileobject = ExFileObject
1046
1047 def __init__(self, name=None, mode="r", fileobj=None):
1048 """Open an (uncompressed) tar archive `name'. `mode' is either 'r' to
1049 read from an existing archive, 'a' to append data to an existing
1050 file or 'w' to create a new file overwriting an existing one. `mode'
1051 defaults to 'r'.
1052 If `fileobj' is given, it is used for reading or writing data. If it
1053 can be determined, `mode' is overridden by `fileobj's mode.
1054 `fileobj' is not closed, when TarFile is closed.
1055 """
1056 if len(mode) > 1 or mode not in "raw":
1057 raise ValueError("mode must be 'r', 'a' or 'w'")
1058 self._mode = mode
1059 self.mode = {"r": "rb", "a": "r+b", "w": "wb"}[mode]
1060
1061 if not fileobj:
1062 fileobj = file(name, self.mode)
1063 self._extfileobj = False
1064 else:
1065 if name is None and hasattr(fileobj, "name"):
1066 name = fileobj.name
1067 if hasattr(fileobj, "mode"):
1068 self.mode = fileobj.mode
1069 self._extfileobj = True
1070 self.name = os.path.abspath(name) if name else None
1071 self.fileobj = fileobj
1072
1073 # Init datastructures
1074 self.closed = False
1075 self.members = [] # list of members as TarInfo objects
1076 self._loaded = False # flag if all members have been read
1077 self.offset = self.fileobj.tell()
1078 # current position in the archive file
1079 self.inodes = {} # dictionary caching the inodes of
1080 # archive members already added
1081
1082 if self._mode == "r":
1083 self.firstmember = None
1084 self.firstmember = self.next()
1085
1086 if self._mode == "a":
1087 # Move to the end of the archive,
1088 # before the first empty block.
1089 self.firstmember = None
1090 while True:
1091 try:
1092 tarinfo = self.next()
1093 except ReadError:
1094 self.fileobj.seek(0)
1095 break
1096 if tarinfo is None:
1097 self.fileobj.seek(- BLOCKSIZE, 1)
1098 break
1099
1100 if self._mode in "aw":
1101 self._loaded = True
1102
1103 #--------------------------------------------------------------------------
1104 # Below are the classmethods which act as alternate constructors to the
1105 # TarFile class. The open() method is the only one that is needed for
1106 # public use; it is the "super"-constructor and is able to select an
1107 # adequate "sub"-constructor for a particular compression using the mapping
1108 # from OPEN_METH.
1109 #
1110 # This concept allows one to subclass TarFile without losing the comfort of
1111 # the super-constructor. A sub-constructor is registered and made available
1112 # by adding it to the mapping in OPEN_METH.
1113
1114 @classmethod
1115 def open(cls, name=None, mode="r", fileobj=None, bufsize=20*512):
1116 """Open a tar archive for reading, writing or appending. Return
1117 an appropriate TarFile class.
1118
1119 mode:
1120 'r' or 'r:*' open for reading with transparent compression
1121 'r:' open for reading exclusively uncompressed
1122 'r:gz' open for reading with gzip compression
1123 'r:bz2' open for reading with bzip2 compression
1124 'a' or 'a:' open for appending
1125 'w' or 'w:' open for writing without compression
1126 'w:gz' open for writing with gzip compression
1127 'w:bz2' open for writing with bzip2 compression
1128
1129 'r|*' open a stream of tar blocks with transparent compression
1130 'r|' open an uncompressed stream of tar blocks for reading
1131 'r|gz' open a gzip compressed stream of tar blocks
1132 'r|bz2' open a bzip2 compressed stream of tar blocks
1133 'w|' open an uncompressed stream for writing
1134 'w|gz' open a gzip compressed stream for writing
1135 'w|bz2' open a bzip2 compressed stream for writing
1136 """
1137
1138 if not name and not fileobj:
1139 raise ValueError("nothing to open")
1140
1141 if mode in ("r", "r:*"):
1142 # Find out which *open() is appropriate for opening the file.
1143 for comptype in cls.OPEN_METH:
1144 func = getattr(cls, cls.OPEN_METH[comptype])
1145 if fileobj is not None:
1146 saved_pos = fileobj.tell()
1147 try:
1148 return func(name, "r", fileobj)
1149 except (ReadError, CompressionError):
1150 if fileobj is not None:
1151 fileobj.seek(saved_pos)
1152 continue
1153 raise ReadError("file could not be opened successfully")
1154
1155 elif ":" in mode:
1156 filemode, comptype = mode.split(":", 1)
1157 filemode = filemode or "r"
1158 comptype = comptype or "tar"
1159
1160 # Select the *open() function according to
1161 # given compression.
1162 if comptype in cls.OPEN_METH:
1163 func = getattr(cls, cls.OPEN_METH[comptype])
1164 else:
1165 raise CompressionError("unknown compression type %r" % comptype)
1166 return func(name, filemode, fileobj)
1167
1168 elif "|" in mode:
1169 filemode, comptype = mode.split("|", 1)
1170 filemode = filemode or "r"
1171 comptype = comptype or "tar"
1172
1173 if filemode not in "rw":
1174 raise ValueError("mode must be 'r' or 'w'")
1175
1176 t = cls(name, filemode,
1177 _Stream(name, filemode, comptype, fileobj, bufsize))
1178 t._extfileobj = False
1179 return t
1180
1181 elif mode in "aw":
1182 return cls.taropen(name, mode, fileobj)
1183
1184 raise ValueError("undiscernible mode")
1185
1186 @classmethod
1187 def taropen(cls, name, mode="r", fileobj=None):
1188 """Open uncompressed tar archive name for reading or writing.
1189 """
1190 if len(mode) > 1 or mode not in "raw":
1191 raise ValueError("mode must be 'r', 'a' or 'w'")
1192 return cls(name, mode, fileobj)
1193
1194 @classmethod
1195 def gzopen(cls, name, mode="r", fileobj=None, compresslevel=9):
1196 """Open gzip compressed tar archive name for reading or writing.
1197 Appending is not allowed.
1198 """
1199 if len(mode) > 1 or mode not in "rw":
1200 raise ValueError("mode must be 'r' or 'w'")
1201
1202 try:
1203 import gzip
1204 gzip.GzipFile
1205 except (ImportError, AttributeError):
1206 raise CompressionError("gzip module is not available")
1207
1208 fileobj = gzip.GzipFile(name, mode, compresslevel, fileobj)
1209
1210 try:
1211 t = cls.taropen(name, mode, fileobj)
1212 except IOError:
1213 fileobj.close()
1214 raise ReadError("not a gzip file")
1215 t._extfileobj = False
1216 return t
1217
1218 @classmethod
1219 def bz2open(cls, name, mode="r", fileobj=None, compresslevel=9):
1220 """Open bzip2 compressed tar archive name for reading or writing.
1221 Appending is not allowed.
1222 """
1223 if len(mode) > 1 or mode not in "rw":
1224 raise ValueError("mode must be 'r' or 'w'.")
1225
1226 try:
1227 import bz2
1228 except ImportError:
1229 raise CompressionError("bz2 module is not available")
1230
1231 if fileobj is not None:
1232 fileobj = _BZ2Proxy(fileobj, mode)
1233 extfileobj = True
1234 else:
1235 fileobj = bz2.BZ2File(name, mode, compresslevel=compresslevel)
1236 extfileobj = False
1237
1238 try:
1239 t = cls.taropen(name, mode, fileobj)
1240 except IOError:
1241 if not extfileobj:
1242 fileobj.close()
1243 raise ReadError("not a bzip2 file")
1244 t._extfileobj = False
1245 return t
1246
1247 # All *open() methods are registered here.
1248 OPEN_METH = {
1249 "tar": "taropen", # uncompressed tar
1250 "gz": "gzopen", # gzip compressed tar
1251 "bz2": "bz2open" # bzip2 compressed tar
1252 }
1253
1254 #--------------------------------------------------------------------------
1255 # The public methods which TarFile provides:
1256
1257 def close(self):
1258 """Close the TarFile. In write-mode, two finishing zero blocks are
1259 appended to the archive.
1260 """
1261 if self.closed:
1262 return
1263
1264 if self._mode in "aw":
1265 self.fileobj.write(NUL * (BLOCKSIZE * 2))
1266 self.offset += (BLOCKSIZE * 2)
1267 # fill up the end with zero-blocks
1268 # (like option -b20 for tar does)
1269 blocks, remainder = divmod(self.offset, RECORDSIZE)
1270 if remainder > 0:
1271 self.fileobj.write(NUL * (RECORDSIZE - remainder))
1272
1273 if not self._extfileobj:
1274 self.fileobj.close()
1275 self.closed = True
1276
1277 def getmember(self, name):
1278 """Return a TarInfo object for member `name'. If `name' can not be
1279 found in the archive, KeyError is raised. If a member occurs more
1280 than once in the archive, its last occurence is assumed to be the
1281 most up-to-date version.
1282 """
1283 tarinfo = self._getmember(name)
1284 if tarinfo is None:
1285 raise KeyError("filename %r not found" % name)
1286 return tarinfo
1287
1288 def getmembers(self):
1289 """Return the members of the archive as a list of TarInfo objects. The
1290 list has the same order as the members in the archive.
1291 """
1292 self._check()
1293 if not self._loaded: # if we want to obtain a list of
1294 self._load() # all members, we first have to
1295 # scan the whole archive.
1296 return self.members
1297
1298 def getnames(self):
1299 """Return the members of the archive as a list of their names. It has
1300 the same order as the list returned by getmembers().
1301 """
1302 return [tarinfo.name for tarinfo in self.getmembers()]
1303
1304 def gettarinfo(self, name=None, arcname=None, fileobj=None):
1305 """Create a TarInfo object for either the file `name' or the file
1306 object `fileobj' (using os.fstat on its file descriptor). You can
1307 modify some of the TarInfo's attributes before you add it using
1308 addfile(). If given, `arcname' specifies an alternative name for the
1309 file in the archive.
1310 """
1311 self._check("aw")
1312
1313 # When fileobj is given, replace name by
1314 # fileobj's real name.
1315 if fileobj is not None:
1316 name = fileobj.name
1317
1318 # Building the name of the member in the archive.
1319 # Backward slashes are converted to forward slashes,
1320 # Absolute paths are turned to relative paths.
1321 if arcname is None:
1322 arcname = name
1323 arcname = normpath(arcname)
1324 drv, arcname = os.path.splitdrive(arcname)
1325 while arcname[0:1] == "/":
1326 arcname = arcname[1:]
1327
1328 # Now, fill the TarInfo object with
1329 # information specific for the file.
1330 tarinfo = TarInfo()
1331
1332 # Use os.stat or os.lstat, depending on platform
1333 # and if symlinks shall be resolved.
1334 if fileobj is None:
1335 if hasattr(os, "lstat") and not self.dereference:
1336 statres = os.lstat(name)
1337 else:
1338 statres = os.stat(name)
1339 elif hasattr(os, 'fstat'):
1340 statres = os.fstat(fileobj.fileno())
1341 else:
1342 raise NotImplementedError('fileobj argument not supported on this '
1343 'platform (no os.fstat)')
1344 linkname = ""
1345
1346 stmd = statres.st_mode
1347 if stat.S_ISREG(stmd):
1348 inode = (statres.st_ino, statres.st_dev)
1349 if not self.dereference and \
1350 statres.st_nlink > 1 and inode in self.inodes:
1351 # Is it a hardlink to an already
1352 # archived file?
1353 type = LNKTYPE
1354 linkname = self.inodes[inode]
1355 else:
1356 # The inode is added only if its valid.
1357 # For win32 it is always 0.
1358 type = REGTYPE
1359 if inode[0]:
1360 self.inodes[inode] = arcname
1361 elif stat.S_ISDIR(stmd):
1362 type = DIRTYPE
1363 if arcname[-1:] != "/":
1364 arcname += "/"
1365 elif stat.S_ISFIFO(stmd):
1366 type = FIFOTYPE
1367 elif stat.S_ISLNK(stmd):
1368 type = SYMTYPE
1369 linkname = os.readlink(name)
1370 elif stat.S_ISCHR(stmd):
1371 type = CHRTYPE
1372 elif stat.S_ISBLK(stmd):
1373 type = BLKTYPE
1374 else:
1375 return None
1376
1377 # Fill the TarInfo object with all
1378 # information we can get.
1379 tarinfo.name = arcname
1380 tarinfo.mode = stmd
1381 tarinfo.uid = statres.st_uid
1382 tarinfo.gid = statres.st_gid
1383 if stat.S_ISREG(stmd):
1384 tarinfo.size = statres.st_size
1385 else:
1386 tarinfo.size = 0L
1387 tarinfo.mtime = statres.st_mtime
1388 tarinfo.type = type
1389 tarinfo.linkname = linkname
1390 if pwd:
1391 try:
1392 tarinfo.uname = pwd.getpwuid(tarinfo.uid)[0]
1393 except KeyError:
1394 pass
1395 if grp:
1396 try:
1397 tarinfo.gname = grp.getgrgid(tarinfo.gid)[0]
1398 except KeyError:
1399 pass
1400
1401 if type in (CHRTYPE, BLKTYPE):
1402 if hasattr(os, "major") and hasattr(os, "minor"):
1403 tarinfo.devmajor = os.major(statres.st_rdev)
1404 tarinfo.devminor = os.minor(statres.st_rdev)
1405 return tarinfo
1406
1407 def list(self, verbose=True):
1408 """Print a table of contents to sys.stdout. If `verbose' is False, only
1409 the names of the members are printed. If it is True, an `ls -l'-like
1410 output is produced.
1411 """
1412 self._check()
1413
1414 for tarinfo in self:
1415 if verbose:
1416 print filemode(tarinfo.mode),
1417 print "%s/%s" % (tarinfo.uname or tarinfo.uid,
1418 tarinfo.gname or tarinfo.gid),
1419 if tarinfo.ischr() or tarinfo.isblk():
1420 print "%10s" % ("%d,%d" \
1421 % (tarinfo.devmajor, tarinfo.devminor)),
1422 else:
1423 print "%10d" % tarinfo.size,
1424 print "%d-%02d-%02d %02d:%02d:%02d" \
1425 % time.localtime(tarinfo.mtime)[:6],
1426
1427 print tarinfo.name,
1428
1429 if verbose:
1430 if tarinfo.issym():
1431 print "->", tarinfo.linkname,
1432 if tarinfo.islnk():
1433 print "link to", tarinfo.linkname,
1434 print
1435
1436 def add(self, name, arcname=None, recursive=True):
1437 """Add the file `name' to the archive. `name' may be any type of file
1438 (directory, fifo, symbolic link, etc.). If given, `arcname'
1439 specifies an alternative name for the file in the archive.
1440 Directories are added recursively by default. This can be avoided by
1441 setting `recursive' to False.
1442 """
1443 self._check("aw")
1444
1445 if arcname is None:
1446 arcname = name
1447
1448 # Skip if somebody tries to archive the archive...
1449 if self.name is not None and os.path.abspath(name) == self.name:
1450 self._dbg(2, "tarfile: Skipped %r" % name)
1451 return
1452
1453 # Special case: The user wants to add the current
1454 # working directory.
1455 if name == ".":
1456 if recursive:
1457 if arcname == ".":
1458 arcname = ""
1459 for f in os.listdir("."):
1460 self.add(f, os.path.join(arcname, f))
1461 return
1462
1463 self._dbg(1, name)
1464
1465 # Create a TarInfo object from the file.
1466 tarinfo = self.gettarinfo(name, arcname)
1467
1468 if tarinfo is None:
1469 self._dbg(1, "tarfile: Unsupported type %r" % name)
1470 return
1471
1472 # Append the tar header and data to the archive.
1473 if tarinfo.isreg():
1474 f = file(name, "rb")
1475 self.addfile(tarinfo, f)
1476 f.close()
1477
1478 elif tarinfo.isdir():
1479 self.addfile(tarinfo)
1480 if recursive:
1481 for f in os.listdir(name):
1482 self.add(os.path.join(name, f), os.path.join(arcname, f))
1483
1484 else:
1485 self.addfile(tarinfo)
1486
1487 def addfile(self, tarinfo, fileobj=None):
1488 """Add the TarInfo object `tarinfo' to the archive. If `fileobj' is
1489 given, tarinfo.size bytes are read from it and added to the archive.
1490 You can create TarInfo objects using gettarinfo().
1491 On Windows platforms, `fileobj' should always be opened with mode
1492 'rb' to avoid irritation about the file size.
1493 """
1494 self._check("aw")
1495
1496 tarinfo = copy.copy(tarinfo)
1497
1498 buf = tarinfo.tobuf(self.posix)
1499 self.fileobj.write(buf)
1500 self.offset += len(buf)
1501
1502 # If there's data to follow, append it.
1503 if fileobj is not None:
1504 copyfileobj(fileobj, self.fileobj, tarinfo.size)
1505 blocks, remainder = divmod(tarinfo.size, BLOCKSIZE)
1506 if remainder > 0:
1507 self.fileobj.write(NUL * (BLOCKSIZE - remainder))
1508 blocks += 1
1509 self.offset += blocks * BLOCKSIZE
1510
1511 self.members.append(tarinfo)
1512
1513 def extractall(self, path=".", members=None):
1514 """Extract all members from the archive to the current working
1515 directory and set owner, modification time and permissions on
1516 directories afterwards. `path' specifies a different directory
1517 to extract to. `members' is optional and must be a subset of the
1518 list returned by getmembers().
1519 """
1520 directories = []
1521
1522 if members is None:
1523 members = self
1524
1525 for tarinfo in members:
1526 if tarinfo.isdir():
1527 # Extract directories with a safe mode.
1528 directories.append(tarinfo)
1529 tarinfo = copy.copy(tarinfo)
1530 tarinfo.mode = 0700
1531 self.extract(tarinfo, path)
1532
1533 # Reverse sort directories.
1534 directories.sort(lambda a, b: cmp(a.name, b.name))
1535 directories.reverse()
1536
1537 # Set correct owner, mtime and filemode on directories.
1538 for tarinfo in directories:
1539 dirpath = os.path.join(path, tarinfo.name)
1540 try:
1541 self.chown(tarinfo, dirpath)
1542 self.utime(tarinfo, dirpath)
1543 self.chmod(tarinfo, dirpath)
1544 except ExtractError, e:
1545 if self.errorlevel > 1:
1546 raise
1547 else:
1548 self._dbg(1, "tarfile: %s" % e)
1549
1550 def extract(self, member, path=""):
1551 """Extract a member from the archive to the current working directory,
1552 using its full name. Its file information is extracted as accurately
1553 as possible. `member' may be a filename or a TarInfo object. You can
1554 specify a different directory using `path'.
1555 """
1556 self._check("r")
1557
1558 if isinstance(member, TarInfo):
1559 tarinfo = member
1560 else:
1561 tarinfo = self.getmember(member)
1562
1563 # Prepare the link target for makelink().
1564 if tarinfo.islnk():
1565 tarinfo._link_target = os.path.join(path, tarinfo.linkname)
1566
1567 try:
1568 self._extract_member(tarinfo, os.path.join(path, tarinfo.name))
1569 except EnvironmentError, e:
1570 if self.errorlevel > 0:
1571 raise
1572 else:
1573 if e.filename is None:
1574 self._dbg(1, "tarfile: %s" % e.strerror)
1575 else:
1576 self._dbg(1, "tarfile: %s %r" % (e.strerror, e.filename))
1577 except ExtractError, e:
1578 if self.errorlevel > 1:
1579 raise
1580 else:
1581 self._dbg(1, "tarfile: %s" % e)
1582
1583 def extractfile(self, member):
1584 """Extract a member from the archive as a file object. `member' may be
1585 a filename or a TarInfo object. If `member' is a regular file, a
1586 file-like object is returned. If `member' is a link, a file-like
1587 object is constructed from the link's target. If `member' is none of
1588 the above, None is returned.
1589 The file-like object is read-only and provides the following
1590 methods: read(), readline(), readlines(), seek() and tell()
1591 """
1592 self._check("r")
1593
1594 if isinstance(member, TarInfo):
1595 tarinfo = member
1596 else:
1597 tarinfo = self.getmember(member)
1598
1599 if tarinfo.isreg():
1600 return self.fileobject(self, tarinfo)
1601
1602 elif tarinfo.type not in SUPPORTED_TYPES:
1603 # If a member's type is unknown, it is treated as a
1604 # regular file.
1605 return self.fileobject(self, tarinfo)
1606
1607 elif tarinfo.islnk() or tarinfo.issym():
1608 if isinstance(self.fileobj, _Stream):
1609 # A small but ugly workaround for the case that someone tries
1610 # to extract a (sym)link as a file-object from a non-seekable
1611 # stream of tar blocks.
1612 raise StreamError("cannot extract (sym)link as file object")
1613 else:
1614 # A (sym)link's file object is its target's file object.
1615 return self.extractfile(self._getmember(tarinfo.linkname,
1616 tarinfo))
1617 else:
1618 # If there's no data associated with the member (directory, chrdev,
1619 # blkdev, etc.), return None instead of a file object.
1620 return None
1621
1622 def _extract_member(self, tarinfo, targetpath):
1623 """Extract the TarInfo object tarinfo to a physical
1624 file called targetpath.
1625 """
1626 # Fetch the TarInfo object for the given name
1627 # and build the destination pathname, replacing
1628 # forward slashes to platform specific separators.
1629 if targetpath[-1:] == "/":
1630 targetpath = targetpath[:-1]
1631 targetpath = os.path.normpath(targetpath)
1632
1633 # Create all upper directories.
1634 upperdirs = os.path.dirname(targetpath)
1635 if upperdirs and not os.path.exists(upperdirs):
1636 # Create directories that are not part of the archive with
1637 # default permissions.
1638 os.makedirs(upperdirs)
1639
1640 if tarinfo.islnk() or tarinfo.issym():
1641 self._dbg(1, "%s -> %s" % (tarinfo.name, tarinfo.linkname))
1642 else:
1643 self._dbg(1, tarinfo.name)
1644
1645 if tarinfo.isreg():
1646 self.makefile(tarinfo, targetpath)
1647 elif tarinfo.isdir():
1648 self.makedir(tarinfo, targetpath)
1649 elif tarinfo.isfifo():
1650 self.makefifo(tarinfo, targetpath)
1651 elif tarinfo.ischr() or tarinfo.isblk():
1652 self.makedev(tarinfo, targetpath)
1653 elif tarinfo.islnk() or tarinfo.issym():
1654 self.makelink(tarinfo, targetpath)
1655 elif tarinfo.type not in SUPPORTED_TYPES:
1656 self.makeunknown(tarinfo, targetpath)
1657 else:
1658 self.makefile(tarinfo, targetpath)
1659
1660 self.chown(tarinfo, targetpath)
1661 if not tarinfo.issym():
1662 self.chmod(tarinfo, targetpath)
1663 self.utime(tarinfo, targetpath)
1664
1665 #--------------------------------------------------------------------------
1666 # Below are the different file methods. They are called via
1667 # _extract_member() when extract() is called. They can be replaced in a
1668 # subclass to implement other functionality.
1669
1670 def makedir(self, tarinfo, targetpath):
1671 """Make a directory called targetpath.
1672 """
1673 try:
1674 # Use a safe mode for the directory, the real mode is set
1675 # later in _extract_member().
1676 os.mkdir(targetpath, 0700)
1677 except EnvironmentError, e:
1678 if e.errno != errno.EEXIST:
1679 raise
1680
1681 def makefile(self, tarinfo, targetpath):
1682 """Make a file called targetpath.
1683 """
1684 source = self.extractfile(tarinfo)
1685 target = file(targetpath, "wb")
1686 copyfileobj(source, target)
1687 source.close()
1688 target.close()
1689
1690 def makeunknown(self, tarinfo, targetpath):
1691 """Make a file from a TarInfo object with an unknown type
1692 at targetpath.
1693 """
1694 self.makefile(tarinfo, targetpath)
1695 self._dbg(1, "tarfile: Unknown file type %r, " \
1696 "extracted as regular file." % tarinfo.type)
1697
1698 def makefifo(self, tarinfo, targetpath):
1699 """Make a fifo called targetpath.
1700 """
1701 if hasattr(os, "mkfifo"):
1702 os.mkfifo(targetpath)
1703 else:
1704 raise ExtractError("fifo not supported by system")
1705
1706 def makedev(self, tarinfo, targetpath):
1707 """Make a character or block device called targetpath.
1708 """
1709 if not hasattr(os, "mknod") or not hasattr(os, "makedev"):
1710 raise ExtractError("special devices not supported by system")
1711
1712 mode = tarinfo.mode
1713 if tarinfo.isblk():
1714 mode |= stat.S_IFBLK
1715 else:
1716 mode |= stat.S_IFCHR
1717
1718 os.mknod(targetpath, mode,
1719 os.makedev(tarinfo.devmajor, tarinfo.devminor))
1720
1721 def makelink(self, tarinfo, targetpath):
1722 """Make a (symbolic) link called targetpath. If it cannot be created
1723 (platform limitation), we try to make a copy of the referenced file
1724 instead of a link.
1725 """
1726 linkpath = tarinfo.linkname
1727 try:
1728 if tarinfo.issym():
1729 os.symlink(linkpath, targetpath)
1730 else:
1731 # See extract().
1732 os.link(tarinfo._link_target, targetpath)
1733 except AttributeError:
1734 if tarinfo.issym():
1735 linkpath = os.path.join(os.path.dirname(tarinfo.name),
1736 linkpath)
1737 linkpath = normpath(linkpath)
1738
1739 try:
1740 self._extract_member(self.getmember(linkpath), targetpath)
1741 except (EnvironmentError, KeyError), e:
1742 linkpath = os.path.normpath(linkpath)
1743 try:
1744 shutil.copy2(linkpath, targetpath)
1745 except EnvironmentError, e:
1746 raise IOError("link could not be created")
1747
1748 def chown(self, tarinfo, targetpath):
1749 """Set owner of targetpath according to tarinfo.
1750 """
1751 if pwd and hasattr(os, "geteuid") and os.geteuid() == 0:
1752 # We have to be root to do so.
1753 try:
1754 g = grp.getgrnam(tarinfo.gname)[2]
1755 except KeyError:
1756 try:
1757 g = grp.getgrgid(tarinfo.gid)[2]
1758 except KeyError:
1759 g = os.getgid()
1760 try:
1761 u = pwd.getpwnam(tarinfo.uname)[2]
1762 except KeyError:
1763 try:
1764 u = pwd.getpwuid(tarinfo.uid)[2]
1765 except KeyError:
1766 u = os.getuid()
1767 try:
1768 if tarinfo.issym() and hasattr(os, "lchown"):
1769 os.lchown(targetpath, u, g)
1770 else:
1771 if sys.platform != "os2emx":
1772 os.chown(targetpath, u, g)
1773 except EnvironmentError, e:
1774 raise ExtractError("could not change owner")
1775
1776 def chmod(self, tarinfo, targetpath):
1777 """Set file permissions of targetpath according to tarinfo.
1778 """
1779 if hasattr(os, 'chmod'):
1780 try:
1781 os.chmod(targetpath, tarinfo.mode)
1782 except EnvironmentError, e:
1783 raise ExtractError("could not change mode")
1784
1785 def utime(self, tarinfo, targetpath):
1786 """Set modification time of targetpath according to tarinfo.
1787 """
1788 if not hasattr(os, 'utime'):
1789 return
1790 if sys.platform == "win32" and tarinfo.isdir():
1791 # According to msdn.microsoft.com, it is an error (EACCES)
1792 # to use utime() on directories.
1793 return
1794 try:
1795 os.utime(targetpath, (tarinfo.mtime, tarinfo.mtime))
1796 except EnvironmentError, e:
1797 raise ExtractError("could not change modification time")
1798
1799 #--------------------------------------------------------------------------
1800 def next(self):
1801 """Return the next member of the archive as a TarInfo object, when
1802 TarFile is opened for reading. Return None if there is no more
1803 available.
1804 """
1805 self._check("ra")
1806 if self.firstmember is not None:
1807 m = self.firstmember
1808 self.firstmember = None
1809 return m
1810
1811 # Read the next block.
1812 self.fileobj.seek(self.offset)
1813 while True:
1814 buf = self.fileobj.read(BLOCKSIZE)
1815 if not buf:
1816 return None
1817
1818 try:
1819 tarinfo = TarInfo.frombuf(buf)
1820
1821 # Set the TarInfo object's offset to the current position of the
1822 # TarFile and set self.offset to the position where the data blocks
1823 # should begin.
1824 tarinfo.offset = self.offset
1825 self.offset += BLOCKSIZE
1826
1827 tarinfo = self.proc_member(tarinfo)
1828
1829 except ValueError, e:
1830 if self.ignore_zeros:
1831 self._dbg(2, "0x%X: empty or invalid block: %s" %
1832 (self.offset, e))
1833 self.offset += BLOCKSIZE
1834 continue
1835 else:
1836 if self.offset == 0:
1837 raise ReadError("empty, unreadable or compressed "
1838 "file: %s" % e)
1839 return None
1840 break
1841
1842 # Some old tar programs represent a directory as a regular
1843 # file with a trailing slash.
1844 if tarinfo.isreg() and tarinfo.name.endswith("/"):
1845 tarinfo.type = DIRTYPE
1846
1847 # Directory names should have a '/' at the end.
1848 if tarinfo.isdir() and not tarinfo.name.endswith("/"):
1849 tarinfo.name += "/"
1850
1851 self.members.append(tarinfo)
1852 return tarinfo
1853
1854 #--------------------------------------------------------------------------
1855 # The following are methods that are called depending on the type of a
1856 # member. The entry point is proc_member() which is called with a TarInfo
1857 # object created from the header block from the current offset. The
1858 # proc_member() method can be overridden in a subclass to add custom
1859 # proc_*() methods. A proc_*() method MUST implement the following
1860 # operations:
1861 # 1. Set tarinfo.offset_data to the position where the data blocks begin,
1862 # if there is data that follows.
1863 # 2. Set self.offset to the position where the next member's header will
1864 # begin.
1865 # 3. Return tarinfo or another valid TarInfo object.
1866 def proc_member(self, tarinfo):
1867 """Choose the right processing method for tarinfo depending
1868 on its type and call it.
1869 """
1870 if tarinfo.type in (GNUTYPE_LONGNAME, GNUTYPE_LONGLINK):
1871 return self.proc_gnulong(tarinfo)
1872 elif tarinfo.type == GNUTYPE_SPARSE:
1873 return self.proc_sparse(tarinfo)
1874 else:
1875 return self.proc_builtin(tarinfo)
1876
1877 def proc_builtin(self, tarinfo):
1878 """Process a builtin type member or an unknown member
1879 which will be treated as a regular file.
1880 """
1881 tarinfo.offset_data = self.offset
1882 if tarinfo.isreg() or tarinfo.type not in SUPPORTED_TYPES:
1883 # Skip the following data blocks.
1884 self.offset += self._block(tarinfo.size)
1885 return tarinfo
1886
1887 def proc_gnulong(self, tarinfo):
1888 """Process the blocks that hold a GNU longname
1889 or longlink member.
1890 """
1891 buf = ""
1892 count = tarinfo.size
1893 while count > 0:
1894 block = self.fileobj.read(BLOCKSIZE)
1895 buf += block
1896 self.offset += BLOCKSIZE
1897 count -= BLOCKSIZE
1898
1899 # Fetch the next header and process it.
1900 b = self.fileobj.read(BLOCKSIZE)
1901 t = TarInfo.frombuf(b)
1902 t.offset = self.offset
1903 self.offset += BLOCKSIZE
1904 next = self.proc_member(t)
1905
1906 # Patch the TarInfo object from the next header with
1907 # the longname information.
1908 next.offset = tarinfo.offset
1909 if tarinfo.type == GNUTYPE_LONGNAME:
1910 next.name = nts(buf)
1911 elif tarinfo.type == GNUTYPE_LONGLINK:
1912 next.linkname = nts(buf)
1913
1914 return next
1915
1916 def proc_sparse(self, tarinfo):
1917 """Process a GNU sparse header plus extra headers.
1918 """
1919 buf = tarinfo.buf
1920 sp = _ringbuffer()
1921 pos = 386
1922 lastpos = 0L
1923 realpos = 0L
1924 # There are 4 possible sparse structs in the
1925 # first header.
1926 for i in xrange(4):
1927 try:
1928 offset = nti(buf[pos:pos + 12])
1929 numbytes = nti(buf[pos + 12:pos + 24])
1930 except ValueError:
1931 break
1932 if offset > lastpos:
1933 sp.append(_hole(lastpos, offset - lastpos))
1934 sp.append(_data(offset, numbytes, realpos))
1935 realpos += numbytes
1936 lastpos = offset + numbytes
1937 pos += 24
1938
1939 isextended = ord(buf[482])
1940 origsize = nti(buf[483:495])
1941
1942 # If the isextended flag is given,
1943 # there are extra headers to process.
1944 while isextended == 1:
1945 buf = self.fileobj.read(BLOCKSIZE)
1946 self.offset += BLOCKSIZE
1947 pos = 0
1948 for i in xrange(21):
1949 try:
1950 offset = nti(buf[pos:pos + 12])
1951 numbytes = nti(buf[pos + 12:pos + 24])
1952 except ValueError:
1953 break
1954 if offset > lastpos:
1955 sp.append(_hole(lastpos, offset - lastpos))
1956 sp.append(_data(offset, numbytes, realpos))
1957 realpos += numbytes
1958 lastpos = offset + numbytes
1959 pos += 24
1960 isextended = ord(buf[504])
1961
1962 if lastpos < origsize:
1963 sp.append(_hole(lastpos, origsize - lastpos))
1964
1965 tarinfo.sparse = sp
1966
1967 tarinfo.offset_data = self.offset
1968 self.offset += self._block(tarinfo.size)
1969 tarinfo.size = origsize
1970
1971 return tarinfo
1972
1973 #--------------------------------------------------------------------------
1974 # Little helper methods:
1975
1976 def _block(self, count):
1977 """Round up a byte count by BLOCKSIZE and return it,
1978 e.g. _block(834) => 1024.
1979 """
1980 blocks, remainder = divmod(count, BLOCKSIZE)
1981 if remainder:
1982 blocks += 1
1983 return blocks * BLOCKSIZE
1984
1985 def _getmember(self, name, tarinfo=None):
1986 """Find an archive member by name from bottom to top.
1987 If tarinfo is given, it is used as the starting point.
1988 """
1989 # Ensure that all members have been loaded.
1990 members = self.getmembers()
1991
1992 if tarinfo is None:
1993 end = len(members)
1994 else:
1995 end = members.index(tarinfo)
1996
1997 for i in xrange(end - 1, -1, -1):
1998 if name == members[i].name:
1999 return members[i]
2000
2001 def _load(self):
2002 """Read through the entire archive file and look for readable
2003 members.
2004 """
2005 while True:
2006 tarinfo = self.next()
2007 if tarinfo is None:
2008 break
2009 self._loaded = True
2010
2011 def _check(self, mode=None):
2012 """Check if TarFile is still open, and if the operation's mode
2013 corresponds to TarFile's mode.
2014 """
2015 if self.closed:
2016 raise IOError("%s is closed" % self.__class__.__name__)
2017 if mode is not None and self._mode not in mode:
2018 raise IOError("bad operation for mode %r" % self._mode)
2019
2020 def __iter__(self):
2021 """Provide an iterator object.
2022 """
2023 if self._loaded:
2024 return iter(self.members)
2025 else:
2026 return TarIter(self)
2027
2028 def _dbg(self, level, msg):
2029 """Write debugging output to sys.stderr.
2030 """
2031 if level <= self.debug:
2032 print >> sys.stderr, msg
2033# class TarFile
2034
2035class TarIter:
2036 """Iterator Class.
2037
2038 for tarinfo in TarFile(...):
2039 suite...
2040 """
2041
2042 def __init__(self, tarfile):
2043 """Construct a TarIter object.
2044 """
2045 self.tarfile = tarfile
2046 self.index = 0
2047 def __iter__(self):
2048 """Return iterator object.
2049 """
2050 return self
2051 def next(self):
2052 """Return the next item using TarFile's next() method.
2053 When all members have been read, set TarFile as _loaded.
2054 """
2055 # Fix for SF #1100429: Under rare circumstances it can
2056 # happen that getmembers() is called during iteration,
2057 # which will cause TarIter to stop prematurely.
2058 if not self.tarfile._loaded:
2059 tarinfo = self.tarfile.next()
2060 if not tarinfo:
2061 self.tarfile._loaded = True
2062 raise StopIteration
2063 else:
2064 try:
2065 tarinfo = self.tarfile.members[self.index]
2066 except IndexError:
2067 raise StopIteration
2068 self.index += 1
2069 return tarinfo
2070
2071# Helper classes for sparse file support
2072class _section:
2073 """Base class for _data and _hole.
2074 """
2075 def __init__(self, offset, size):
2076 self.offset = offset
2077 self.size = size
2078 def __contains__(self, offset):
2079 return self.offset <= offset < self.offset + self.size
2080
2081class _data(_section):
2082 """Represent a data section in a sparse file.
2083 """
2084 def __init__(self, offset, size, realpos):
2085 _section.__init__(self, offset, size)
2086 self.realpos = realpos
2087
2088class _hole(_section):
2089 """Represent a hole section in a sparse file.
2090 """
2091 pass
2092
2093class _ringbuffer(list):
2094 """Ringbuffer class which increases performance
2095 over a regular list.
2096 """
2097 def __init__(self):
2098 self.idx = 0
2099 def find(self, offset):
2100 idx = self.idx
2101 while True:
2102 item = self[idx]
2103 if offset in item:
2104 break
2105 idx += 1
2106 if idx == len(self):
2107 idx = 0
2108 if idx == self.idx:
2109 # End of File
2110 return None
2111 self.idx = idx
2112 return item
2113
2114#---------------------------------------------
2115# zipfile compatible TarFile class
2116#---------------------------------------------
2117TAR_PLAIN = 0 # zipfile.ZIP_STORED
2118TAR_GZIPPED = 8 # zipfile.ZIP_DEFLATED
2119class TarFileCompat:
2120 """TarFile class compatible with standard module zipfile's
2121 ZipFile class.
2122 """
2123 def __init__(self, file, mode="r", compression=TAR_PLAIN):
2124 if compression == TAR_PLAIN:
2125 self.tarfile = TarFile.taropen(file, mode)
2126 elif compression == TAR_GZIPPED:
2127 self.tarfile = TarFile.gzopen(file, mode)
2128 else:
2129 raise ValueError("unknown compression constant")
2130 if mode[0:1] == "r":
2131 members = self.tarfile.getmembers()
2132 for m in members:
2133 m.filename = m.name
2134 m.file_size = m.size
2135 m.date_time = time.gmtime(m.mtime)[:6]
2136 def namelist(self):
2137 return map(lambda m: m.name, self.infolist())
2138 def infolist(self):
2139 return filter(lambda m: m.type in REGULAR_TYPES,
2140 self.tarfile.getmembers())
2141 def printdir(self):
2142 self.tarfile.list()
2143 def testzip(self):
2144 return
2145 def getinfo(self, name):
2146 return self.tarfile.getmember(name)
2147 def read(self, name):
2148 return self.tarfile.extractfile(self.tarfile.getmember(name)).read()
2149 def write(self, filename, arcname=None, compress_type=None):
2150 self.tarfile.add(filename, arcname)
2151 def writestr(self, zinfo, bytes):
2152 try:
2153 from cStringIO import StringIO
2154 except ImportError:
2155 from StringIO import StringIO
2156 import calendar
2157 zinfo.name = zinfo.filename
2158 zinfo.size = zinfo.file_size
2159 zinfo.mtime = calendar.timegm(zinfo.date_time)
2160 self.tarfile.addfile(zinfo, StringIO(bytes))
2161 def close(self):
2162 self.tarfile.close()
2163#class TarFileCompat
2164
2165#--------------------
2166# exported functions
2167#--------------------
2168def is_tarfile(name):
2169 """Return True if name points to a tar archive that we
2170 are able to handle, else return False.
2171 """
2172 try:
2173 t = open(name)
2174 t.close()
2175 return True
2176 except TarError:
2177 return False
2178
2179open = TarFile.open