blob: 8987ca709e7e54649ba2db7cf7ebf208f2ce79ef [file] [log] [blame]
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001#!/usr/bin/env python
2# -*- coding: iso-8859-1 -*-
3#-------------------------------------------------------------------
4# tarfile.py
5#-------------------------------------------------------------------
6# Copyright (C) 2002 Lars Gustäbel <lars@gustaebel.de>
7# All rights reserved.
8#
9# Permission is hereby granted, free of charge, to any person
10# obtaining a copy of this software and associated documentation
11# files (the "Software"), to deal in the Software without
12# restriction, including without limitation the rights to use,
13# copy, modify, merge, publish, distribute, sublicense, and/or sell
14# copies of the Software, and to permit persons to whom the
15# Software is furnished to do so, subject to the following
16# conditions:
17#
18# The above copyright notice and this permission notice shall be
19# included in all copies or substantial portions of the Software.
20#
21# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
22# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
23# OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
24# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
25# HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
26# WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
27# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
28# OTHER DEALINGS IN THE SOFTWARE.
29#
30"""Read from and write to tar format archives.
31"""
32
33__version__ = "$Revision$"
34# $Source$
35
Georg Brandl38c6a222006-05-10 16:26:03 +000036version = "0.8.0"
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +000037__author__ = "Lars Gustäbel (lars@gustaebel.de)"
38__date__ = "$Date$"
39__cvsid__ = "$Id$"
40__credits__ = "Gustavo Niemeyer, Niels Gustäbel, Richard Townsend."
41
42#---------
43# Imports
44#---------
45import sys
46import os
47import shutil
48import stat
49import errno
50import time
51import struct
52
Jack Jansencfc49022003-03-07 13:37:32 +000053if sys.platform == 'mac':
54 # This module needs work for MacOS9, especially in the area of pathname
55 # handling. In many places it is assumed a simple substitution of / by the
56 # local os.path.sep is good enough to convert pathnames, but this does not
57 # work with the mac rooted:path:name versus :nonrooted:path:name syntax
58 raise ImportError, "tarfile does not work for platform==mac"
59
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +000060try:
61 import grp, pwd
62except ImportError:
63 grp = pwd = None
64
65# from tarfile import *
66__all__ = ["TarFile", "TarInfo", "is_tarfile", "TarError"]
67
68#---------------------------------------------------------
69# tar constants
70#---------------------------------------------------------
71NUL = "\0" # the null character
72BLOCKSIZE = 512 # length of processing blocks
73RECORDSIZE = BLOCKSIZE * 20 # length of records
74MAGIC = "ustar" # magic tar string
75VERSION = "00" # version number
76
77LENGTH_NAME = 100 # maximum length of a filename
78LENGTH_LINK = 100 # maximum length of a linkname
79LENGTH_PREFIX = 155 # maximum length of the prefix field
80MAXSIZE_MEMBER = 077777777777L # maximum size of a file (11 octal digits)
81
82REGTYPE = "0" # regular file
83AREGTYPE = "\0" # regular file
84LNKTYPE = "1" # link (inside tarfile)
85SYMTYPE = "2" # symbolic link
86CHRTYPE = "3" # character special device
87BLKTYPE = "4" # block special device
88DIRTYPE = "5" # directory
89FIFOTYPE = "6" # fifo special device
90CONTTYPE = "7" # contiguous file
91
92GNUTYPE_LONGNAME = "L" # GNU tar extension for longnames
93GNUTYPE_LONGLINK = "K" # GNU tar extension for longlink
94GNUTYPE_SPARSE = "S" # GNU tar extension for sparse file
95
96#---------------------------------------------------------
97# tarfile constants
98#---------------------------------------------------------
99SUPPORTED_TYPES = (REGTYPE, AREGTYPE, LNKTYPE, # file types that tarfile
100 SYMTYPE, DIRTYPE, FIFOTYPE, # can cope with.
101 CONTTYPE, CHRTYPE, BLKTYPE,
102 GNUTYPE_LONGNAME, GNUTYPE_LONGLINK,
103 GNUTYPE_SPARSE)
104
105REGULAR_TYPES = (REGTYPE, AREGTYPE, # file types that somehow
106 CONTTYPE, GNUTYPE_SPARSE) # represent regular files
107
108#---------------------------------------------------------
109# Bits used in the mode field, values in octal.
110#---------------------------------------------------------
111S_IFLNK = 0120000 # symbolic link
112S_IFREG = 0100000 # regular file
113S_IFBLK = 0060000 # block device
114S_IFDIR = 0040000 # directory
115S_IFCHR = 0020000 # character device
116S_IFIFO = 0010000 # fifo
117
118TSUID = 04000 # set UID on execution
119TSGID = 02000 # set GID on execution
120TSVTX = 01000 # reserved
121
122TUREAD = 0400 # read by owner
123TUWRITE = 0200 # write by owner
124TUEXEC = 0100 # execute/search by owner
125TGREAD = 0040 # read by group
126TGWRITE = 0020 # write by group
127TGEXEC = 0010 # execute/search by group
128TOREAD = 0004 # read by other
129TOWRITE = 0002 # write by other
130TOEXEC = 0001 # execute/search by other
131
132#---------------------------------------------------------
133# Some useful functions
134#---------------------------------------------------------
135def nts(s):
136 """Convert a null-terminated string buffer to a python string.
137 """
Andrew M. Kuchling864bba12004-07-10 22:02:11 +0000138 return s.rstrip(NUL)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000139
Georg Brandl38c6a222006-05-10 16:26:03 +0000140def stn(s, length):
141 """Convert a python string to a null-terminated string buffer.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000142 """
Georg Brandl38c6a222006-05-10 16:26:03 +0000143 return struct.pack("%ds" % (length - 1), s) + NUL
144
145def nti(s):
146 """Convert a number field to a python number.
147 """
148 # There are two possible encodings for a number field, see
149 # itn() below.
150 if s[0] != chr(0200):
151 n = int(s.rstrip(NUL) or "0", 8)
152 else:
153 n = 0L
154 for i in xrange(len(s) - 1):
155 n <<= 8
156 n += ord(s[i + 1])
157 return n
158
159def itn(n, digits=8, posix=False):
160 """Convert a python number to a number field.
161 """
162 # POSIX 1003.1-1988 requires numbers to be encoded as a string of
163 # octal digits followed by a null-byte, this allows values up to
164 # (8**(digits-1))-1. GNU tar allows storing numbers greater than
165 # that if necessary. A leading 0200 byte indicates this particular
166 # encoding, the following digits-1 bytes are a big-endian
167 # representation. This allows values up to (256**(digits-1))-1.
168 if 0 <= n < 8 ** (digits - 1):
169 s = "%0*o" % (digits - 1, n) + NUL
170 else:
171 if posix:
172 raise ValueError, "overflow in number field"
173
174 if n < 0:
175 # XXX We mimic GNU tar's behaviour with negative numbers,
176 # this could raise OverflowError.
177 n = struct.unpack("L", struct.pack("l", n))[0]
178
179 s = ""
180 for i in xrange(digits - 1):
181 s = chr(n & 0377) + s
182 n >>= 8
183 s = chr(0200) + s
184 return s
185
186def calc_chksums(buf):
187 """Calculate the checksum for a member's header by summing up all
188 characters except for the chksum field which is treated as if
189 it was filled with spaces. According to the GNU tar sources,
190 some tars (Sun and NeXT) calculate chksum with signed char,
191 which will be different if there are chars in the buffer with
192 the high bit set. So we calculate two checksums, unsigned and
193 signed.
194 """
195 unsigned_chksum = 256 + sum(struct.unpack("148B", buf[:148]) + struct.unpack("356B", buf[156:512]))
196 signed_chksum = 256 + sum(struct.unpack("148b", buf[:148]) + struct.unpack("356b", buf[156:512]))
197 return unsigned_chksum, signed_chksum
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000198
199def copyfileobj(src, dst, length=None):
200 """Copy length bytes from fileobj src to fileobj dst.
201 If length is None, copy the entire content.
202 """
203 if length == 0:
204 return
205 if length is None:
206 shutil.copyfileobj(src, dst)
207 return
208
209 BUFSIZE = 16 * 1024
210 blocks, remainder = divmod(length, BUFSIZE)
211 for b in xrange(blocks):
212 buf = src.read(BUFSIZE)
213 if len(buf) < BUFSIZE:
214 raise IOError, "end of file reached"
215 dst.write(buf)
216
217 if remainder != 0:
218 buf = src.read(remainder)
219 if len(buf) < remainder:
220 raise IOError, "end of file reached"
221 dst.write(buf)
222 return
223
224filemode_table = (
Andrew M. Kuchling8bc462f2004-10-20 11:48:42 +0000225 ((S_IFLNK, "l"),
226 (S_IFREG, "-"),
227 (S_IFBLK, "b"),
228 (S_IFDIR, "d"),
229 (S_IFCHR, "c"),
230 (S_IFIFO, "p")),
231
232 ((TUREAD, "r"),),
233 ((TUWRITE, "w"),),
234 ((TUEXEC|TSUID, "s"),
235 (TSUID, "S"),
236 (TUEXEC, "x")),
237
238 ((TGREAD, "r"),),
239 ((TGWRITE, "w"),),
240 ((TGEXEC|TSGID, "s"),
241 (TSGID, "S"),
242 (TGEXEC, "x")),
243
244 ((TOREAD, "r"),),
245 ((TOWRITE, "w"),),
246 ((TOEXEC|TSVTX, "t"),
247 (TSVTX, "T"),
248 (TOEXEC, "x"))
249)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000250
251def filemode(mode):
252 """Convert a file's mode to a string of the form
253 -rwxrwxrwx.
254 Used by TarFile.list()
255 """
Andrew M. Kuchling8bc462f2004-10-20 11:48:42 +0000256 perm = []
257 for table in filemode_table:
258 for bit, char in table:
259 if mode & bit == bit:
260 perm.append(char)
261 break
262 else:
263 perm.append("-")
264 return "".join(perm)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000265
266if os.sep != "/":
267 normpath = lambda path: os.path.normpath(path).replace(os.sep, "/")
268else:
269 normpath = os.path.normpath
270
271class TarError(Exception):
272 """Base exception."""
273 pass
274class ExtractError(TarError):
275 """General exception for extract errors."""
276 pass
277class ReadError(TarError):
278 """Exception for unreadble tar archives."""
279 pass
280class CompressionError(TarError):
281 """Exception for unavailable compression methods."""
282 pass
283class StreamError(TarError):
284 """Exception for unsupported operations on stream-like TarFiles."""
285 pass
286
287#---------------------------
288# internal stream interface
289#---------------------------
290class _LowLevelFile:
291 """Low-level file object. Supports reading and writing.
292 It is used instead of a regular file object for streaming
293 access.
294 """
295
296 def __init__(self, name, mode):
297 mode = {
298 "r": os.O_RDONLY,
299 "w": os.O_WRONLY | os.O_CREAT | os.O_TRUNC,
300 }[mode]
301 if hasattr(os, "O_BINARY"):
302 mode |= os.O_BINARY
303 self.fd = os.open(name, mode)
304
305 def close(self):
306 os.close(self.fd)
307
308 def read(self, size):
309 return os.read(self.fd, size)
310
311 def write(self, s):
312 os.write(self.fd, s)
313
314class _Stream:
315 """Class that serves as an adapter between TarFile and
316 a stream-like object. The stream-like object only
317 needs to have a read() or write() method and is accessed
318 blockwise. Use of gzip or bzip2 compression is possible.
319 A stream-like object could be for example: sys.stdin,
320 sys.stdout, a socket, a tape device etc.
321
322 _Stream is intended to be used only internally.
323 """
324
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000325 def __init__(self, name, mode, comptype, fileobj, bufsize):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000326 """Construct a _Stream object.
327 """
328 self._extfileobj = True
329 if fileobj is None:
330 fileobj = _LowLevelFile(name, mode)
331 self._extfileobj = False
332
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000333 if comptype == '*':
334 # Enable transparent compression detection for the
335 # stream interface
336 fileobj = _StreamProxy(fileobj)
337 comptype = fileobj.getcomptype()
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000338
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000339 self.name = name or ""
340 self.mode = mode
341 self.comptype = comptype
342 self.fileobj = fileobj
343 self.bufsize = bufsize
344 self.buf = ""
345 self.pos = 0L
346 self.closed = False
347
348 if comptype == "gz":
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000349 try:
350 import zlib
351 except ImportError:
352 raise CompressionError, "zlib module is not available"
353 self.zlib = zlib
354 self.crc = zlib.crc32("")
355 if mode == "r":
356 self._init_read_gz()
357 else:
358 self._init_write_gz()
359
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000360 if comptype == "bz2":
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000361 try:
362 import bz2
363 except ImportError:
364 raise CompressionError, "bz2 module is not available"
365 if mode == "r":
366 self.dbuf = ""
367 self.cmp = bz2.BZ2Decompressor()
368 else:
369 self.cmp = bz2.BZ2Compressor()
370
371 def __del__(self):
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000372 if hasattr(self, "closed") and not self.closed:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000373 self.close()
374
375 def _init_write_gz(self):
376 """Initialize for writing with gzip compression.
377 """
378 self.cmp = self.zlib.compressobj(9, self.zlib.DEFLATED,
379 -self.zlib.MAX_WBITS,
380 self.zlib.DEF_MEM_LEVEL,
381 0)
382 timestamp = struct.pack("<L", long(time.time()))
383 self.__write("\037\213\010\010%s\002\377" % timestamp)
384 if self.name.endswith(".gz"):
385 self.name = self.name[:-3]
386 self.__write(self.name + NUL)
387
388 def write(self, s):
389 """Write string s to the stream.
390 """
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000391 if self.comptype == "gz":
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000392 self.crc = self.zlib.crc32(s, self.crc)
393 self.pos += len(s)
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000394 if self.comptype != "tar":
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000395 s = self.cmp.compress(s)
396 self.__write(s)
397
398 def __write(self, s):
399 """Write string s to the stream if a whole new block
400 is ready to be written.
401 """
402 self.buf += s
403 while len(self.buf) > self.bufsize:
404 self.fileobj.write(self.buf[:self.bufsize])
405 self.buf = self.buf[self.bufsize:]
406
407 def close(self):
408 """Close the _Stream object. No operation should be
409 done on it afterwards.
410 """
411 if self.closed:
412 return
413
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000414 if self.mode == "w" and self.comptype != "tar":
Martin v. Löwisc234a522004-08-22 21:28:33 +0000415 self.buf += self.cmp.flush()
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000416
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000417 if self.mode == "w" and self.buf:
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000418 blocks, remainder = divmod(len(self.buf), self.bufsize)
419 if remainder > 0:
420 self.buf += NUL * (self.bufsize - remainder)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000421 self.fileobj.write(self.buf)
422 self.buf = ""
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000423 if self.comptype == "gz":
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000424 self.fileobj.write(struct.pack("<l", self.crc))
Andrew M. Kuchling10a44492003-10-24 17:38:34 +0000425 self.fileobj.write(struct.pack("<L", self.pos & 0xffffFFFFL))
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000426
427 if not self._extfileobj:
428 self.fileobj.close()
429
430 self.closed = True
431
432 def _init_read_gz(self):
433 """Initialize for reading a gzip compressed fileobj.
434 """
435 self.cmp = self.zlib.decompressobj(-self.zlib.MAX_WBITS)
436 self.dbuf = ""
437
438 # taken from gzip.GzipFile with some alterations
439 if self.__read(2) != "\037\213":
440 raise ReadError, "not a gzip file"
441 if self.__read(1) != "\010":
442 raise CompressionError, "unsupported compression method"
443
444 flag = ord(self.__read(1))
445 self.__read(6)
446
447 if flag & 4:
448 xlen = ord(self.__read(1)) + 256 * ord(self.__read(1))
449 self.read(xlen)
450 if flag & 8:
451 while True:
452 s = self.__read(1)
453 if not s or s == NUL:
454 break
455 if flag & 16:
456 while True:
457 s = self.__read(1)
458 if not s or s == NUL:
459 break
460 if flag & 2:
461 self.__read(2)
462
463 def tell(self):
464 """Return the stream's file pointer position.
465 """
466 return self.pos
467
468 def seek(self, pos=0):
469 """Set the stream's file pointer to pos. Negative seeking
470 is forbidden.
471 """
472 if pos - self.pos >= 0:
473 blocks, remainder = divmod(pos - self.pos, self.bufsize)
474 for i in xrange(blocks):
475 self.read(self.bufsize)
476 self.read(remainder)
477 else:
478 raise StreamError, "seeking backwards is not allowed"
479 return self.pos
480
481 def read(self, size=None):
482 """Return the next size number of bytes from the stream.
483 If size is not defined, return all bytes of the stream
484 up to EOF.
485 """
486 if size is None:
487 t = []
488 while True:
489 buf = self._read(self.bufsize)
490 if not buf:
491 break
492 t.append(buf)
493 buf = "".join(t)
494 else:
495 buf = self._read(size)
496 self.pos += len(buf)
497 return buf
498
499 def _read(self, size):
500 """Return size bytes from the stream.
501 """
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000502 if self.comptype == "tar":
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000503 return self.__read(size)
504
505 c = len(self.dbuf)
506 t = [self.dbuf]
507 while c < size:
508 buf = self.__read(self.bufsize)
509 if not buf:
510 break
511 buf = self.cmp.decompress(buf)
512 t.append(buf)
513 c += len(buf)
514 t = "".join(t)
515 self.dbuf = t[size:]
516 return t[:size]
517
518 def __read(self, size):
519 """Return size bytes from stream. If internal buffer is empty,
520 read another block from the stream.
521 """
522 c = len(self.buf)
523 t = [self.buf]
524 while c < size:
525 buf = self.fileobj.read(self.bufsize)
526 if not buf:
527 break
528 t.append(buf)
529 c += len(buf)
530 t = "".join(t)
531 self.buf = t[size:]
532 return t[:size]
533# class _Stream
534
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000535class _StreamProxy(object):
536 """Small proxy class that enables transparent compression
537 detection for the Stream interface (mode 'r|*').
538 """
539
540 def __init__(self, fileobj):
541 self.fileobj = fileobj
542 self.buf = self.fileobj.read(BLOCKSIZE)
543
544 def read(self, size):
545 self.read = self.fileobj.read
546 return self.buf
547
548 def getcomptype(self):
549 if self.buf.startswith("\037\213\010"):
550 return "gz"
551 if self.buf.startswith("BZh91"):
552 return "bz2"
553 return "tar"
554
555 def close(self):
556 self.fileobj.close()
557# class StreamProxy
558
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000559#------------------------
560# Extraction file object
561#------------------------
562class ExFileObject(object):
563 """File-like object for reading an archive member.
564 Is returned by TarFile.extractfile(). Support for
565 sparse files included.
566 """
567
568 def __init__(self, tarfile, tarinfo):
569 self.fileobj = tarfile.fileobj
570 self.name = tarinfo.name
571 self.mode = "r"
572 self.closed = False
573 self.offset = tarinfo.offset_data
574 self.size = tarinfo.size
575 self.pos = 0L
576 self.linebuffer = ""
577 if tarinfo.issparse():
578 self.sparse = tarinfo.sparse
579 self.read = self._readsparse
580 else:
581 self.read = self._readnormal
582
583 def __read(self, size):
584 """Overloadable read method.
585 """
586 return self.fileobj.read(size)
587
588 def readline(self, size=-1):
589 """Read a line with approx. size. If size is negative,
590 read a whole line. readline() and read() must not
591 be mixed up (!).
592 """
593 if size < 0:
594 size = sys.maxint
595
596 nl = self.linebuffer.find("\n")
597 if nl >= 0:
598 nl = min(nl, size)
599 else:
600 size -= len(self.linebuffer)
Martin v. Löwisc11d6f12004-08-25 10:52:58 +0000601 while (nl < 0 and size > 0):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000602 buf = self.read(min(size, 100))
603 if not buf:
604 break
605 self.linebuffer += buf
606 size -= len(buf)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000607 nl = self.linebuffer.find("\n")
608 if nl == -1:
609 s = self.linebuffer
610 self.linebuffer = ""
611 return s
612 buf = self.linebuffer[:nl]
613 self.linebuffer = self.linebuffer[nl + 1:]
614 while buf[-1:] == "\r":
615 buf = buf[:-1]
616 return buf + "\n"
617
618 def readlines(self):
619 """Return a list with all (following) lines.
620 """
621 result = []
622 while True:
623 line = self.readline()
624 if not line: break
625 result.append(line)
626 return result
627
628 def _readnormal(self, size=None):
629 """Read operation for regular files.
630 """
631 if self.closed:
632 raise ValueError, "file is closed"
633 self.fileobj.seek(self.offset + self.pos)
634 bytesleft = self.size - self.pos
635 if size is None:
636 bytestoread = bytesleft
637 else:
638 bytestoread = min(size, bytesleft)
639 self.pos += bytestoread
640 return self.__read(bytestoread)
641
642 def _readsparse(self, size=None):
643 """Read operation for sparse files.
644 """
645 if self.closed:
646 raise ValueError, "file is closed"
647
648 if size is None:
649 size = self.size - self.pos
650
651 data = []
652 while size > 0:
653 buf = self._readsparsesection(size)
654 if not buf:
655 break
656 size -= len(buf)
657 data.append(buf)
658 return "".join(data)
659
660 def _readsparsesection(self, size):
661 """Read a single section of a sparse file.
662 """
663 section = self.sparse.find(self.pos)
664
665 if section is None:
666 return ""
667
668 toread = min(size, section.offset + section.size - self.pos)
669 if isinstance(section, _data):
670 realpos = section.realpos + self.pos - section.offset
671 self.pos += toread
672 self.fileobj.seek(self.offset + realpos)
673 return self.__read(toread)
674 else:
675 self.pos += toread
676 return NUL * toread
677
678 def tell(self):
679 """Return the current file position.
680 """
681 return self.pos
682
683 def seek(self, pos, whence=0):
684 """Seek to a position in the file.
685 """
686 self.linebuffer = ""
687 if whence == 0:
688 self.pos = min(max(pos, 0), self.size)
689 if whence == 1:
690 if pos < 0:
691 self.pos = max(self.pos + pos, 0)
692 else:
693 self.pos = min(self.pos + pos, self.size)
694 if whence == 2:
695 self.pos = max(min(self.size + pos, self.size), 0)
696
697 def close(self):
698 """Close the file object.
699 """
700 self.closed = True
Martin v. Löwisdf241532005-03-03 08:17:42 +0000701
702 def __iter__(self):
703 """Get an iterator over the file object.
704 """
705 if self.closed:
Georg Brandl38c6a222006-05-10 16:26:03 +0000706 raise ValueError, "I/O operation on closed file"
Martin v. Löwisdf241532005-03-03 08:17:42 +0000707 return self
708
709 def next(self):
710 """Get the next item from the file iterator.
711 """
712 result = self.readline()
713 if not result:
714 raise StopIteration
715 return result
Tim Peterseba28be2005-03-28 01:08:02 +0000716
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000717#class ExFileObject
718
719#------------------
720# Exported Classes
721#------------------
722class TarInfo(object):
723 """Informational class which holds the details about an
724 archive member given by a tar header block.
725 TarInfo objects are returned by TarFile.getmember(),
726 TarFile.getmembers() and TarFile.gettarinfo() and are
727 usually created internally.
728 """
729
730 def __init__(self, name=""):
731 """Construct a TarInfo object. name is the optional name
732 of the member.
733 """
734
Georg Brandl38c6a222006-05-10 16:26:03 +0000735 self.name = name # member name (dirnames must end with '/')
736 self.mode = 0666 # file permissions
737 self.uid = 0 # user id
738 self.gid = 0 # group id
739 self.size = 0 # file size
740 self.mtime = 0 # modification time
741 self.chksum = 0 # header checksum
742 self.type = REGTYPE # member type
743 self.linkname = "" # link name
744 self.uname = "user" # user name
745 self.gname = "group" # group name
746 self.devmajor = 0 # device major number
747 self.devminor = 0 # device minor number
748 self.prefix = "" # prefix to filename or information
749 # about sparse files
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000750
Georg Brandl38c6a222006-05-10 16:26:03 +0000751 self.offset = 0 # the tar header starts here
752 self.offset_data = 0 # the file's data starts here
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000753
754 def __repr__(self):
755 return "<%s %r at %#x>" % (self.__class__.__name__,self.name,id(self))
756
Guido van Rossum75b64e62005-01-16 00:16:11 +0000757 @classmethod
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000758 def frombuf(cls, buf):
759 """Construct a TarInfo object from a 512 byte string buffer.
760 """
Georg Brandl38c6a222006-05-10 16:26:03 +0000761 if len(buf) != BLOCKSIZE:
762 raise ValueError, "truncated header"
763 if buf.count(NUL) == BLOCKSIZE:
764 raise ValueError, "empty header"
765
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000766 tarinfo = cls()
Georg Brandl38c6a222006-05-10 16:26:03 +0000767 tarinfo.buf = buf
768 tarinfo.name = nts(buf[0:100])
769 tarinfo.mode = nti(buf[100:108])
770 tarinfo.uid = nti(buf[108:116])
771 tarinfo.gid = nti(buf[116:124])
772 tarinfo.size = nti(buf[124:136])
773 tarinfo.mtime = nti(buf[136:148])
774 tarinfo.chksum = nti(buf[148:156])
775 tarinfo.type = buf[156:157]
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000776 tarinfo.linkname = nts(buf[157:257])
Georg Brandl38c6a222006-05-10 16:26:03 +0000777 tarinfo.uname = nts(buf[265:297])
778 tarinfo.gname = nts(buf[297:329])
779 tarinfo.devmajor = nti(buf[329:337])
780 tarinfo.devminor = nti(buf[337:345])
Jack Jansen2b4b5a52003-04-22 22:03:11 +0000781 tarinfo.prefix = buf[345:500]
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000782
Georg Brandl38c6a222006-05-10 16:26:03 +0000783 if tarinfo.chksum not in calc_chksums(buf):
784 raise ValueError, "invalid header"
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000785 return tarinfo
786
Georg Brandl38c6a222006-05-10 16:26:03 +0000787 def tobuf(self, posix=False):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000788 """Return a tar header block as a 512 byte string.
789 """
Georg Brandl38c6a222006-05-10 16:26:03 +0000790 parts = [
791 stn(self.name, 100),
792 itn(self.mode & 07777, 8, posix),
793 itn(self.uid, 8, posix),
794 itn(self.gid, 8, posix),
795 itn(self.size, 12, posix),
796 itn(self.mtime, 12, posix),
797 " ", # checksum field
798 self.type,
799 stn(self.linkname, 100),
800 stn(MAGIC, 6),
801 stn(VERSION, 2),
802 stn(self.uname, 32),
803 stn(self.gname, 32),
804 itn(self.devmajor, 8, posix),
805 itn(self.devminor, 8, posix),
806 stn(self.prefix, 155)
807 ]
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000808
Georg Brandl38c6a222006-05-10 16:26:03 +0000809 buf = struct.pack("%ds" % BLOCKSIZE, "".join(parts))
810 chksum = calc_chksums(buf)[0]
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000811 buf = buf[:148] + "%06o\0" % chksum + buf[155:]
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000812 self.buf = buf
813 return buf
814
815 def isreg(self):
816 return self.type in REGULAR_TYPES
817 def isfile(self):
818 return self.isreg()
819 def isdir(self):
820 return self.type == DIRTYPE
821 def issym(self):
822 return self.type == SYMTYPE
823 def islnk(self):
824 return self.type == LNKTYPE
825 def ischr(self):
826 return self.type == CHRTYPE
827 def isblk(self):
828 return self.type == BLKTYPE
829 def isfifo(self):
830 return self.type == FIFOTYPE
831 def issparse(self):
832 return self.type == GNUTYPE_SPARSE
833 def isdev(self):
834 return self.type in (CHRTYPE, BLKTYPE, FIFOTYPE)
835# class TarInfo
836
837class TarFile(object):
838 """The TarFile Class provides an interface to tar archives.
839 """
840
841 debug = 0 # May be set from 0 (no msgs) to 3 (all msgs)
842
843 dereference = False # If true, add content of linked file to the
844 # tar file, else the link.
845
846 ignore_zeros = False # If true, skips empty or invalid blocks and
847 # continues processing.
848
849 errorlevel = 0 # If 0, fatal errors only appear in debug
850 # messages (if debug >= 0). If > 0, errors
851 # are passed to the caller as exceptions.
852
Martin v. Löwis75b9da42004-08-18 13:57:44 +0000853 posix = False # If True, generates POSIX.1-1990-compliant
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000854 # archives (no GNU extensions!)
855
856 fileobject = ExFileObject
857
858 def __init__(self, name=None, mode="r", fileobj=None):
859 """Open an (uncompressed) tar archive `name'. `mode' is either 'r' to
860 read from an existing archive, 'a' to append data to an existing
861 file or 'w' to create a new file overwriting an existing one. `mode'
862 defaults to 'r'.
863 If `fileobj' is given, it is used for reading or writing data. If it
864 can be determined, `mode' is overridden by `fileobj's mode.
865 `fileobj' is not closed, when TarFile is closed.
866 """
Martin v. Löwisfaffa152005-08-24 06:43:09 +0000867 self.name = name
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000868
869 if len(mode) > 1 or mode not in "raw":
870 raise ValueError, "mode must be 'r', 'a' or 'w'"
871 self._mode = mode
872 self.mode = {"r": "rb", "a": "r+b", "w": "wb"}[mode]
873
874 if not fileobj:
875 fileobj = file(self.name, self.mode)
876 self._extfileobj = False
877 else:
878 if self.name is None and hasattr(fileobj, "name"):
Martin v. Löwisfaffa152005-08-24 06:43:09 +0000879 self.name = fileobj.name
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000880 if hasattr(fileobj, "mode"):
881 self.mode = fileobj.mode
882 self._extfileobj = True
883 self.fileobj = fileobj
884
885 # Init datastructures
Georg Brandl38c6a222006-05-10 16:26:03 +0000886 self.closed = False
887 self.members = [] # list of members as TarInfo objects
888 self._loaded = False # flag if all members have been read
889 self.offset = 0L # current position in the archive file
890 self.inodes = {} # dictionary caching the inodes of
891 # archive members already added
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000892
893 if self._mode == "r":
894 self.firstmember = None
895 self.firstmember = self.next()
896
897 if self._mode == "a":
898 # Move to the end of the archive,
899 # before the first empty block.
900 self.firstmember = None
901 while True:
902 try:
903 tarinfo = self.next()
904 except ReadError:
905 self.fileobj.seek(0)
906 break
907 if tarinfo is None:
908 self.fileobj.seek(- BLOCKSIZE, 1)
909 break
910
911 if self._mode in "aw":
912 self._loaded = True
913
914 #--------------------------------------------------------------------------
915 # Below are the classmethods which act as alternate constructors to the
916 # TarFile class. The open() method is the only one that is needed for
917 # public use; it is the "super"-constructor and is able to select an
918 # adequate "sub"-constructor for a particular compression using the mapping
919 # from OPEN_METH.
920 #
921 # This concept allows one to subclass TarFile without losing the comfort of
922 # the super-constructor. A sub-constructor is registered and made available
923 # by adding it to the mapping in OPEN_METH.
924
Guido van Rossum75b64e62005-01-16 00:16:11 +0000925 @classmethod
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000926 def open(cls, name=None, mode="r", fileobj=None, bufsize=20*512):
927 """Open a tar archive for reading, writing or appending. Return
928 an appropriate TarFile class.
929
930 mode:
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000931 'r' or 'r:*' open for reading with transparent compression
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000932 'r:' open for reading exclusively uncompressed
933 'r:gz' open for reading with gzip compression
934 'r:bz2' open for reading with bzip2 compression
935 'a' or 'a:' open for appending
936 'w' or 'w:' open for writing without compression
937 'w:gz' open for writing with gzip compression
938 'w:bz2' open for writing with bzip2 compression
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000939
940 'r|*' open a stream of tar blocks with transparent compression
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000941 'r|' open an uncompressed stream of tar blocks for reading
942 'r|gz' open a gzip compressed stream of tar blocks
943 'r|bz2' open a bzip2 compressed stream of tar blocks
944 'w|' open an uncompressed stream for writing
945 'w|gz' open a gzip compressed stream for writing
946 'w|bz2' open a bzip2 compressed stream for writing
947 """
948
949 if not name and not fileobj:
950 raise ValueError, "nothing to open"
951
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000952 if mode in ("r", "r:*"):
953 # Find out which *open() is appropriate for opening the file.
954 for comptype in cls.OPEN_METH:
955 func = getattr(cls, cls.OPEN_METH[comptype])
956 try:
957 return func(name, "r", fileobj)
958 except (ReadError, CompressionError):
959 continue
960 raise ReadError, "file could not be opened successfully"
961
962 elif ":" in mode:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000963 filemode, comptype = mode.split(":", 1)
964 filemode = filemode or "r"
965 comptype = comptype or "tar"
966
967 # Select the *open() function according to
968 # given compression.
969 if comptype in cls.OPEN_METH:
970 func = getattr(cls, cls.OPEN_METH[comptype])
971 else:
972 raise CompressionError, "unknown compression type %r" % comptype
973 return func(name, filemode, fileobj)
974
975 elif "|" in mode:
976 filemode, comptype = mode.split("|", 1)
977 filemode = filemode or "r"
978 comptype = comptype or "tar"
979
980 if filemode not in "rw":
981 raise ValueError, "mode must be 'r' or 'w'"
982
983 t = cls(name, filemode,
984 _Stream(name, filemode, comptype, fileobj, bufsize))
985 t._extfileobj = False
986 return t
987
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000988 elif mode in "aw":
989 return cls.taropen(name, mode, fileobj)
990
991 raise ValueError, "undiscernible mode"
992
Guido van Rossum75b64e62005-01-16 00:16:11 +0000993 @classmethod
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000994 def taropen(cls, name, mode="r", fileobj=None):
995 """Open uncompressed tar archive name for reading or writing.
996 """
997 if len(mode) > 1 or mode not in "raw":
998 raise ValueError, "mode must be 'r', 'a' or 'w'"
999 return cls(name, mode, fileobj)
1000
Guido van Rossum75b64e62005-01-16 00:16:11 +00001001 @classmethod
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001002 def gzopen(cls, name, mode="r", fileobj=None, compresslevel=9):
1003 """Open gzip compressed tar archive name for reading or writing.
1004 Appending is not allowed.
1005 """
1006 if len(mode) > 1 or mode not in "rw":
1007 raise ValueError, "mode must be 'r' or 'w'"
1008
1009 try:
1010 import gzip
Neal Norwitz4ec68242003-04-11 03:05:56 +00001011 gzip.GzipFile
1012 except (ImportError, AttributeError):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001013 raise CompressionError, "gzip module is not available"
1014
1015 pre, ext = os.path.splitext(name)
Martin v. Löwisfaffa152005-08-24 06:43:09 +00001016 pre = os.path.basename(pre)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001017 if ext == ".tgz":
1018 ext = ".tar"
1019 if ext == ".gz":
1020 ext = ""
Martin v. Löwisfaffa152005-08-24 06:43:09 +00001021 tarname = pre + ext
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001022
1023 if fileobj is None:
1024 fileobj = file(name, mode + "b")
1025
Martin v. Löwisfaffa152005-08-24 06:43:09 +00001026 if mode != "r":
1027 name = tarname
1028
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001029 try:
Martin v. Löwisfaffa152005-08-24 06:43:09 +00001030 t = cls.taropen(tarname, mode,
1031 gzip.GzipFile(name, mode, compresslevel, fileobj)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001032 )
1033 except IOError:
1034 raise ReadError, "not a gzip file"
1035 t._extfileobj = False
1036 return t
1037
Guido van Rossum75b64e62005-01-16 00:16:11 +00001038 @classmethod
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001039 def bz2open(cls, name, mode="r", fileobj=None, compresslevel=9):
1040 """Open bzip2 compressed tar archive name for reading or writing.
1041 Appending is not allowed.
1042 """
1043 if len(mode) > 1 or mode not in "rw":
1044 raise ValueError, "mode must be 'r' or 'w'."
1045
1046 try:
1047 import bz2
1048 except ImportError:
1049 raise CompressionError, "bz2 module is not available"
1050
Martin v. Löwisfaffa152005-08-24 06:43:09 +00001051 pre, ext = os.path.splitext(name)
1052 pre = os.path.basename(pre)
1053 if ext == ".tbz2":
1054 ext = ".tar"
1055 if ext == ".bz2":
1056 ext = ""
1057 tarname = pre + ext
1058
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001059 if fileobj is not None:
1060 raise ValueError, "no support for external file objects"
1061
1062 try:
Martin v. Löwisfaffa152005-08-24 06:43:09 +00001063 t = cls.taropen(tarname, mode, bz2.BZ2File(name, mode, compresslevel=compresslevel))
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001064 except IOError:
1065 raise ReadError, "not a bzip2 file"
1066 t._extfileobj = False
1067 return t
1068
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001069 # All *open() methods are registered here.
1070 OPEN_METH = {
1071 "tar": "taropen", # uncompressed tar
1072 "gz": "gzopen", # gzip compressed tar
1073 "bz2": "bz2open" # bzip2 compressed tar
1074 }
1075
1076 #--------------------------------------------------------------------------
1077 # The public methods which TarFile provides:
1078
1079 def close(self):
1080 """Close the TarFile. In write-mode, two finishing zero blocks are
1081 appended to the archive.
1082 """
1083 if self.closed:
1084 return
1085
1086 if self._mode in "aw":
1087 self.fileobj.write(NUL * (BLOCKSIZE * 2))
1088 self.offset += (BLOCKSIZE * 2)
1089 # fill up the end with zero-blocks
1090 # (like option -b20 for tar does)
1091 blocks, remainder = divmod(self.offset, RECORDSIZE)
1092 if remainder > 0:
1093 self.fileobj.write(NUL * (RECORDSIZE - remainder))
1094
1095 if not self._extfileobj:
1096 self.fileobj.close()
1097 self.closed = True
1098
1099 def getmember(self, name):
1100 """Return a TarInfo object for member `name'. If `name' can not be
1101 found in the archive, KeyError is raised. If a member occurs more
1102 than once in the archive, its last occurence is assumed to be the
1103 most up-to-date version.
1104 """
Martin v. Löwisf3c56112004-09-18 09:08:52 +00001105 tarinfo = self._getmember(name)
1106 if tarinfo is None:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001107 raise KeyError, "filename %r not found" % name
Martin v. Löwisf3c56112004-09-18 09:08:52 +00001108 return tarinfo
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001109
1110 def getmembers(self):
1111 """Return the members of the archive as a list of TarInfo objects. The
1112 list has the same order as the members in the archive.
1113 """
1114 self._check()
1115 if not self._loaded: # if we want to obtain a list of
1116 self._load() # all members, we first have to
1117 # scan the whole archive.
1118 return self.members
1119
1120 def getnames(self):
1121 """Return the members of the archive as a list of their names. It has
1122 the same order as the list returned by getmembers().
1123 """
Martin v. Löwisf3c56112004-09-18 09:08:52 +00001124 return [tarinfo.name for tarinfo in self.getmembers()]
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001125
1126 def gettarinfo(self, name=None, arcname=None, fileobj=None):
1127 """Create a TarInfo object for either the file `name' or the file
1128 object `fileobj' (using os.fstat on its file descriptor). You can
1129 modify some of the TarInfo's attributes before you add it using
1130 addfile(). If given, `arcname' specifies an alternative name for the
1131 file in the archive.
1132 """
1133 self._check("aw")
1134
1135 # When fileobj is given, replace name by
1136 # fileobj's real name.
1137 if fileobj is not None:
1138 name = fileobj.name
1139
1140 # Building the name of the member in the archive.
1141 # Backward slashes are converted to forward slashes,
1142 # Absolute paths are turned to relative paths.
1143 if arcname is None:
1144 arcname = name
1145 arcname = normpath(arcname)
1146 drv, arcname = os.path.splitdrive(arcname)
1147 while arcname[0:1] == "/":
1148 arcname = arcname[1:]
1149
1150 # Now, fill the TarInfo object with
1151 # information specific for the file.
1152 tarinfo = TarInfo()
1153
1154 # Use os.stat or os.lstat, depending on platform
1155 # and if symlinks shall be resolved.
1156 if fileobj is None:
1157 if hasattr(os, "lstat") and not self.dereference:
1158 statres = os.lstat(name)
1159 else:
1160 statres = os.stat(name)
1161 else:
1162 statres = os.fstat(fileobj.fileno())
1163 linkname = ""
1164
1165 stmd = statres.st_mode
1166 if stat.S_ISREG(stmd):
1167 inode = (statres.st_ino, statres.st_dev)
Neal Norwitzb0e32e22005-10-20 04:50:13 +00001168 if not self.dereference and \
1169 statres.st_nlink > 1 and inode in self.inodes:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001170 # Is it a hardlink to an already
1171 # archived file?
1172 type = LNKTYPE
1173 linkname = self.inodes[inode]
1174 else:
1175 # The inode is added only if its valid.
1176 # For win32 it is always 0.
1177 type = REGTYPE
1178 if inode[0]:
1179 self.inodes[inode] = arcname
1180 elif stat.S_ISDIR(stmd):
1181 type = DIRTYPE
1182 if arcname[-1:] != "/":
1183 arcname += "/"
1184 elif stat.S_ISFIFO(stmd):
1185 type = FIFOTYPE
1186 elif stat.S_ISLNK(stmd):
1187 type = SYMTYPE
1188 linkname = os.readlink(name)
1189 elif stat.S_ISCHR(stmd):
1190 type = CHRTYPE
1191 elif stat.S_ISBLK(stmd):
1192 type = BLKTYPE
1193 else:
1194 return None
1195
1196 # Fill the TarInfo object with all
1197 # information we can get.
Martin v. Löwis5dbdc592005-08-27 10:07:56 +00001198 tarinfo.name = arcname
1199 tarinfo.mode = stmd
1200 tarinfo.uid = statres.st_uid
1201 tarinfo.gid = statres.st_gid
1202 if stat.S_ISREG(stmd):
Martin v. Löwis61d77e02004-08-20 06:35:46 +00001203 tarinfo.size = statres.st_size
Martin v. Löwis5dbdc592005-08-27 10:07:56 +00001204 else:
1205 tarinfo.size = 0L
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001206 tarinfo.mtime = statres.st_mtime
Martin v. Löwis5dbdc592005-08-27 10:07:56 +00001207 tarinfo.type = type
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001208 tarinfo.linkname = linkname
1209 if pwd:
1210 try:
1211 tarinfo.uname = pwd.getpwuid(tarinfo.uid)[0]
1212 except KeyError:
1213 pass
1214 if grp:
1215 try:
1216 tarinfo.gname = grp.getgrgid(tarinfo.gid)[0]
1217 except KeyError:
1218 pass
1219
1220 if type in (CHRTYPE, BLKTYPE):
1221 if hasattr(os, "major") and hasattr(os, "minor"):
1222 tarinfo.devmajor = os.major(statres.st_rdev)
1223 tarinfo.devminor = os.minor(statres.st_rdev)
1224 return tarinfo
1225
1226 def list(self, verbose=True):
1227 """Print a table of contents to sys.stdout. If `verbose' is False, only
1228 the names of the members are printed. If it is True, an `ls -l'-like
1229 output is produced.
1230 """
1231 self._check()
1232
1233 for tarinfo in self:
1234 if verbose:
1235 print filemode(tarinfo.mode),
1236 print "%s/%s" % (tarinfo.uname or tarinfo.uid,
1237 tarinfo.gname or tarinfo.gid),
1238 if tarinfo.ischr() or tarinfo.isblk():
1239 print "%10s" % ("%d,%d" \
1240 % (tarinfo.devmajor, tarinfo.devminor)),
1241 else:
1242 print "%10d" % tarinfo.size,
1243 print "%d-%02d-%02d %02d:%02d:%02d" \
1244 % time.localtime(tarinfo.mtime)[:6],
1245
1246 print tarinfo.name,
1247
1248 if verbose:
1249 if tarinfo.issym():
1250 print "->", tarinfo.linkname,
1251 if tarinfo.islnk():
1252 print "link to", tarinfo.linkname,
1253 print
1254
1255 def add(self, name, arcname=None, recursive=True):
1256 """Add the file `name' to the archive. `name' may be any type of file
1257 (directory, fifo, symbolic link, etc.). If given, `arcname'
1258 specifies an alternative name for the file in the archive.
1259 Directories are added recursively by default. This can be avoided by
1260 setting `recursive' to False.
1261 """
1262 self._check("aw")
1263
1264 if arcname is None:
1265 arcname = name
1266
1267 # Skip if somebody tries to archive the archive...
Martin v. Löwisfaffa152005-08-24 06:43:09 +00001268 if self.name is not None \
1269 and os.path.abspath(name) == os.path.abspath(self.name):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001270 self._dbg(2, "tarfile: Skipped %r" % name)
1271 return
1272
1273 # Special case: The user wants to add the current
1274 # working directory.
1275 if name == ".":
1276 if recursive:
1277 if arcname == ".":
1278 arcname = ""
1279 for f in os.listdir("."):
1280 self.add(f, os.path.join(arcname, f))
1281 return
1282
1283 self._dbg(1, name)
1284
1285 # Create a TarInfo object from the file.
1286 tarinfo = self.gettarinfo(name, arcname)
1287
1288 if tarinfo is None:
1289 self._dbg(1, "tarfile: Unsupported type %r" % name)
1290 return
1291
1292 # Append the tar header and data to the archive.
1293 if tarinfo.isreg():
1294 f = file(name, "rb")
1295 self.addfile(tarinfo, f)
1296 f.close()
1297
Martin v. Löwis5dbdc592005-08-27 10:07:56 +00001298 elif tarinfo.isdir():
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001299 self.addfile(tarinfo)
1300 if recursive:
1301 for f in os.listdir(name):
1302 self.add(os.path.join(name, f), os.path.join(arcname, f))
1303
Martin v. Löwis5dbdc592005-08-27 10:07:56 +00001304 else:
1305 self.addfile(tarinfo)
1306
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001307 def addfile(self, tarinfo, fileobj=None):
1308 """Add the TarInfo object `tarinfo' to the archive. If `fileobj' is
1309 given, tarinfo.size bytes are read from it and added to the archive.
1310 You can create TarInfo objects using gettarinfo().
1311 On Windows platforms, `fileobj' should always be opened with mode
1312 'rb' to avoid irritation about the file size.
1313 """
1314 self._check("aw")
1315
1316 tarinfo.name = normpath(tarinfo.name)
1317 if tarinfo.isdir():
1318 # directories should end with '/'
1319 tarinfo.name += "/"
1320
1321 if tarinfo.linkname:
1322 tarinfo.linkname = normpath(tarinfo.linkname)
1323
1324 if tarinfo.size > MAXSIZE_MEMBER:
Neal Norwitzd96d1012004-07-20 22:23:02 +00001325 if self.posix:
1326 raise ValueError, "file is too large (>= 8 GB)"
1327 else:
1328 self._dbg(2, "tarfile: Created GNU tar largefile header")
1329
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001330
1331 if len(tarinfo.linkname) > LENGTH_LINK:
1332 if self.posix:
1333 raise ValueError, "linkname is too long (>%d)" \
1334 % (LENGTH_LINK)
1335 else:
1336 self._create_gnulong(tarinfo.linkname, GNUTYPE_LONGLINK)
1337 tarinfo.linkname = tarinfo.linkname[:LENGTH_LINK -1]
1338 self._dbg(2, "tarfile: Created GNU tar extension LONGLINK")
1339
1340 if len(tarinfo.name) > LENGTH_NAME:
1341 if self.posix:
1342 prefix = tarinfo.name[:LENGTH_PREFIX + 1]
1343 while prefix and prefix[-1] != "/":
Tim Peters2c60f7a2003-01-29 03:49:43 +00001344 prefix = prefix[:-1]
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001345
1346 name = tarinfo.name[len(prefix):]
1347 prefix = prefix[:-1]
1348
1349 if not prefix or len(name) > LENGTH_NAME:
1350 raise ValueError, "name is too long (>%d)" \
1351 % (LENGTH_NAME)
1352
1353 tarinfo.name = name
1354 tarinfo.prefix = prefix
1355 else:
1356 self._create_gnulong(tarinfo.name, GNUTYPE_LONGNAME)
1357 tarinfo.name = tarinfo.name[:LENGTH_NAME - 1]
1358 self._dbg(2, "tarfile: Created GNU tar extension LONGNAME")
1359
Georg Brandl38c6a222006-05-10 16:26:03 +00001360 self.fileobj.write(tarinfo.tobuf(self.posix))
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001361 self.offset += BLOCKSIZE
1362
1363 # If there's data to follow, append it.
1364 if fileobj is not None:
1365 copyfileobj(fileobj, self.fileobj, tarinfo.size)
1366 blocks, remainder = divmod(tarinfo.size, BLOCKSIZE)
1367 if remainder > 0:
1368 self.fileobj.write(NUL * (BLOCKSIZE - remainder))
1369 blocks += 1
1370 self.offset += blocks * BLOCKSIZE
1371
Martin v. Löwisf3c56112004-09-18 09:08:52 +00001372 self.members.append(tarinfo)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001373
Martin v. Löwis00a73e72005-03-04 19:40:34 +00001374 def extractall(self, path=".", members=None):
1375 """Extract all members from the archive to the current working
1376 directory and set owner, modification time and permissions on
1377 directories afterwards. `path' specifies a different directory
1378 to extract to. `members' is optional and must be a subset of the
1379 list returned by getmembers().
1380 """
1381 directories = []
1382
1383 if members is None:
1384 members = self
1385
1386 for tarinfo in members:
1387 if tarinfo.isdir():
1388 # Extract directory with a safe mode, so that
1389 # all files below can be extracted as well.
1390 try:
1391 os.makedirs(os.path.join(path, tarinfo.name), 0777)
1392 except EnvironmentError:
1393 pass
1394 directories.append(tarinfo)
1395 else:
1396 self.extract(tarinfo, path)
1397
1398 # Reverse sort directories.
1399 directories.sort(lambda a, b: cmp(a.name, b.name))
1400 directories.reverse()
1401
1402 # Set correct owner, mtime and filemode on directories.
1403 for tarinfo in directories:
1404 path = os.path.join(path, tarinfo.name)
1405 try:
1406 self.chown(tarinfo, path)
1407 self.utime(tarinfo, path)
1408 self.chmod(tarinfo, path)
1409 except ExtractError, e:
1410 if self.errorlevel > 1:
1411 raise
1412 else:
1413 self._dbg(1, "tarfile: %s" % e)
1414
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001415 def extract(self, member, path=""):
1416 """Extract a member from the archive to the current working directory,
1417 using its full name. Its file information is extracted as accurately
1418 as possible. `member' may be a filename or a TarInfo object. You can
1419 specify a different directory using `path'.
1420 """
1421 self._check("r")
1422
1423 if isinstance(member, TarInfo):
1424 tarinfo = member
1425 else:
1426 tarinfo = self.getmember(member)
1427
Neal Norwitza4f651a2004-07-20 22:07:44 +00001428 # Prepare the link target for makelink().
1429 if tarinfo.islnk():
1430 tarinfo._link_target = os.path.join(path, tarinfo.linkname)
1431
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001432 try:
1433 self._extract_member(tarinfo, os.path.join(path, tarinfo.name))
1434 except EnvironmentError, e:
1435 if self.errorlevel > 0:
1436 raise
1437 else:
1438 if e.filename is None:
1439 self._dbg(1, "tarfile: %s" % e.strerror)
1440 else:
1441 self._dbg(1, "tarfile: %s %r" % (e.strerror, e.filename))
1442 except ExtractError, e:
1443 if self.errorlevel > 1:
1444 raise
1445 else:
1446 self._dbg(1, "tarfile: %s" % e)
1447
1448 def extractfile(self, member):
1449 """Extract a member from the archive as a file object. `member' may be
1450 a filename or a TarInfo object. If `member' is a regular file, a
1451 file-like object is returned. If `member' is a link, a file-like
1452 object is constructed from the link's target. If `member' is none of
1453 the above, None is returned.
1454 The file-like object is read-only and provides the following
1455 methods: read(), readline(), readlines(), seek() and tell()
1456 """
1457 self._check("r")
1458
1459 if isinstance(member, TarInfo):
1460 tarinfo = member
1461 else:
1462 tarinfo = self.getmember(member)
1463
1464 if tarinfo.isreg():
1465 return self.fileobject(self, tarinfo)
1466
1467 elif tarinfo.type not in SUPPORTED_TYPES:
1468 # If a member's type is unknown, it is treated as a
1469 # regular file.
1470 return self.fileobject(self, tarinfo)
1471
1472 elif tarinfo.islnk() or tarinfo.issym():
1473 if isinstance(self.fileobj, _Stream):
1474 # A small but ugly workaround for the case that someone tries
1475 # to extract a (sym)link as a file-object from a non-seekable
1476 # stream of tar blocks.
1477 raise StreamError, "cannot extract (sym)link as file object"
1478 else:
Georg Brandl7eb4b7d2005-07-22 21:49:32 +00001479 # A (sym)link's file object is its target's file object.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001480 return self.extractfile(self._getmember(tarinfo.linkname,
1481 tarinfo))
1482 else:
1483 # If there's no data associated with the member (directory, chrdev,
1484 # blkdev, etc.), return None instead of a file object.
1485 return None
1486
1487 def _extract_member(self, tarinfo, targetpath):
1488 """Extract the TarInfo object tarinfo to a physical
1489 file called targetpath.
1490 """
1491 # Fetch the TarInfo object for the given name
1492 # and build the destination pathname, replacing
1493 # forward slashes to platform specific separators.
1494 if targetpath[-1:] == "/":
1495 targetpath = targetpath[:-1]
1496 targetpath = os.path.normpath(targetpath)
1497
1498 # Create all upper directories.
1499 upperdirs = os.path.dirname(targetpath)
1500 if upperdirs and not os.path.exists(upperdirs):
1501 ti = TarInfo()
1502 ti.name = upperdirs
1503 ti.type = DIRTYPE
1504 ti.mode = 0777
1505 ti.mtime = tarinfo.mtime
1506 ti.uid = tarinfo.uid
1507 ti.gid = tarinfo.gid
1508 ti.uname = tarinfo.uname
1509 ti.gname = tarinfo.gname
1510 try:
1511 self._extract_member(ti, ti.name)
1512 except:
1513 pass
1514
1515 if tarinfo.islnk() or tarinfo.issym():
1516 self._dbg(1, "%s -> %s" % (tarinfo.name, tarinfo.linkname))
1517 else:
1518 self._dbg(1, tarinfo.name)
1519
1520 if tarinfo.isreg():
1521 self.makefile(tarinfo, targetpath)
1522 elif tarinfo.isdir():
1523 self.makedir(tarinfo, targetpath)
1524 elif tarinfo.isfifo():
1525 self.makefifo(tarinfo, targetpath)
1526 elif tarinfo.ischr() or tarinfo.isblk():
1527 self.makedev(tarinfo, targetpath)
1528 elif tarinfo.islnk() or tarinfo.issym():
1529 self.makelink(tarinfo, targetpath)
1530 elif tarinfo.type not in SUPPORTED_TYPES:
1531 self.makeunknown(tarinfo, targetpath)
1532 else:
1533 self.makefile(tarinfo, targetpath)
1534
1535 self.chown(tarinfo, targetpath)
1536 if not tarinfo.issym():
1537 self.chmod(tarinfo, targetpath)
1538 self.utime(tarinfo, targetpath)
1539
1540 #--------------------------------------------------------------------------
1541 # Below are the different file methods. They are called via
1542 # _extract_member() when extract() is called. They can be replaced in a
1543 # subclass to implement other functionality.
1544
1545 def makedir(self, tarinfo, targetpath):
1546 """Make a directory called targetpath.
1547 """
1548 try:
1549 os.mkdir(targetpath)
1550 except EnvironmentError, e:
1551 if e.errno != errno.EEXIST:
1552 raise
1553
1554 def makefile(self, tarinfo, targetpath):
1555 """Make a file called targetpath.
1556 """
1557 source = self.extractfile(tarinfo)
1558 target = file(targetpath, "wb")
1559 copyfileobj(source, target)
1560 source.close()
1561 target.close()
1562
1563 def makeunknown(self, tarinfo, targetpath):
1564 """Make a file from a TarInfo object with an unknown type
1565 at targetpath.
1566 """
1567 self.makefile(tarinfo, targetpath)
1568 self._dbg(1, "tarfile: Unknown file type %r, " \
1569 "extracted as regular file." % tarinfo.type)
1570
1571 def makefifo(self, tarinfo, targetpath):
1572 """Make a fifo called targetpath.
1573 """
1574 if hasattr(os, "mkfifo"):
1575 os.mkfifo(targetpath)
1576 else:
1577 raise ExtractError, "fifo not supported by system"
1578
1579 def makedev(self, tarinfo, targetpath):
1580 """Make a character or block device called targetpath.
1581 """
1582 if not hasattr(os, "mknod") or not hasattr(os, "makedev"):
1583 raise ExtractError, "special devices not supported by system"
1584
1585 mode = tarinfo.mode
1586 if tarinfo.isblk():
1587 mode |= stat.S_IFBLK
1588 else:
1589 mode |= stat.S_IFCHR
1590
1591 os.mknod(targetpath, mode,
1592 os.makedev(tarinfo.devmajor, tarinfo.devminor))
1593
1594 def makelink(self, tarinfo, targetpath):
1595 """Make a (symbolic) link called targetpath. If it cannot be created
1596 (platform limitation), we try to make a copy of the referenced file
1597 instead of a link.
1598 """
1599 linkpath = tarinfo.linkname
1600 try:
1601 if tarinfo.issym():
1602 os.symlink(linkpath, targetpath)
1603 else:
Neal Norwitza4f651a2004-07-20 22:07:44 +00001604 # See extract().
1605 os.link(tarinfo._link_target, targetpath)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001606 except AttributeError:
1607 if tarinfo.issym():
1608 linkpath = os.path.join(os.path.dirname(tarinfo.name),
1609 linkpath)
1610 linkpath = normpath(linkpath)
1611
1612 try:
1613 self._extract_member(self.getmember(linkpath), targetpath)
1614 except (EnvironmentError, KeyError), e:
1615 linkpath = os.path.normpath(linkpath)
1616 try:
1617 shutil.copy2(linkpath, targetpath)
1618 except EnvironmentError, e:
1619 raise IOError, "link could not be created"
1620
1621 def chown(self, tarinfo, targetpath):
1622 """Set owner of targetpath according to tarinfo.
1623 """
1624 if pwd and hasattr(os, "geteuid") and os.geteuid() == 0:
1625 # We have to be root to do so.
1626 try:
1627 g = grp.getgrnam(tarinfo.gname)[2]
1628 except KeyError:
1629 try:
1630 g = grp.getgrgid(tarinfo.gid)[2]
1631 except KeyError:
1632 g = os.getgid()
1633 try:
1634 u = pwd.getpwnam(tarinfo.uname)[2]
1635 except KeyError:
1636 try:
1637 u = pwd.getpwuid(tarinfo.uid)[2]
1638 except KeyError:
1639 u = os.getuid()
1640 try:
1641 if tarinfo.issym() and hasattr(os, "lchown"):
1642 os.lchown(targetpath, u, g)
1643 else:
Andrew MacIntyre7970d202003-02-19 12:51:34 +00001644 if sys.platform != "os2emx":
1645 os.chown(targetpath, u, g)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001646 except EnvironmentError, e:
1647 raise ExtractError, "could not change owner"
1648
1649 def chmod(self, tarinfo, targetpath):
1650 """Set file permissions of targetpath according to tarinfo.
1651 """
Jack Jansen834eff62003-03-07 12:47:06 +00001652 if hasattr(os, 'chmod'):
1653 try:
1654 os.chmod(targetpath, tarinfo.mode)
1655 except EnvironmentError, e:
1656 raise ExtractError, "could not change mode"
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001657
1658 def utime(self, tarinfo, targetpath):
1659 """Set modification time of targetpath according to tarinfo.
1660 """
Jack Jansen834eff62003-03-07 12:47:06 +00001661 if not hasattr(os, 'utime'):
Tim Petersf9347782003-03-07 15:36:41 +00001662 return
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001663 if sys.platform == "win32" and tarinfo.isdir():
1664 # According to msdn.microsoft.com, it is an error (EACCES)
1665 # to use utime() on directories.
1666 return
1667 try:
1668 os.utime(targetpath, (tarinfo.mtime, tarinfo.mtime))
1669 except EnvironmentError, e:
1670 raise ExtractError, "could not change modification time"
1671
1672 #--------------------------------------------------------------------------
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001673 def next(self):
1674 """Return the next member of the archive as a TarInfo object, when
1675 TarFile is opened for reading. Return None if there is no more
1676 available.
1677 """
1678 self._check("ra")
1679 if self.firstmember is not None:
1680 m = self.firstmember
1681 self.firstmember = None
1682 return m
1683
1684 # Read the next block.
Andrew M. Kuchling864bba12004-07-10 22:02:11 +00001685 self.fileobj.seek(self.offset)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001686 while True:
1687 buf = self.fileobj.read(BLOCKSIZE)
1688 if not buf:
1689 return None
Georg Brandl38c6a222006-05-10 16:26:03 +00001690
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001691 try:
1692 tarinfo = TarInfo.frombuf(buf)
Georg Brandl38c6a222006-05-10 16:26:03 +00001693
1694 # Set the TarInfo object's offset to the current position of the
1695 # TarFile and set self.offset to the position where the data blocks
1696 # should begin.
1697 tarinfo.offset = self.offset
1698 self.offset += BLOCKSIZE
1699
1700 tarinfo = self.proc_member(tarinfo)
1701
1702 except ValueError, e:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001703 if self.ignore_zeros:
Georg Brandl38c6a222006-05-10 16:26:03 +00001704 self._dbg(2, "0x%X: %s" % (self.offset, e))
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001705 self.offset += BLOCKSIZE
1706 continue
1707 else:
Andrew M. Kuchling864bba12004-07-10 22:02:11 +00001708 if self.offset == 0:
Georg Brandl38c6a222006-05-10 16:26:03 +00001709 raise ReadError, str(e)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001710 return None
1711 break
1712
Georg Brandl38c6a222006-05-10 16:26:03 +00001713 # Some old tar programs represent a directory as a regular
1714 # file with a trailing slash.
1715 if tarinfo.isreg() and tarinfo.name.endswith("/"):
1716 tarinfo.type = DIRTYPE
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001717
Georg Brandl38c6a222006-05-10 16:26:03 +00001718 # The prefix field is used for filenames > 100 in
1719 # the POSIX standard.
1720 # name = prefix + '/' + name
1721 tarinfo.name = normpath(os.path.join(nts(tarinfo.prefix), tarinfo.name))
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001722
Georg Brandl38c6a222006-05-10 16:26:03 +00001723 # Directory names should have a '/' at the end.
1724 if tarinfo.isdir():
1725 tarinfo.name += "/"
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001726
Martin v. Löwisf3c56112004-09-18 09:08:52 +00001727 self.members.append(tarinfo)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001728 return tarinfo
1729
1730 #--------------------------------------------------------------------------
Georg Brandl38c6a222006-05-10 16:26:03 +00001731 # The following are methods that are called depending on the type of a
1732 # member. The entry point is proc_member() which is called with a TarInfo
1733 # object created from the header block from the current offset. The
1734 # proc_member() method can be overridden in a subclass to add custom
1735 # proc_*() methods. A proc_*() method MUST implement the following
1736 # operations:
1737 # 1. Set tarinfo.offset_data to the position where the data blocks begin,
1738 # if there is data that follows.
1739 # 2. Set self.offset to the position where the next member's header will
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001740 # begin.
Georg Brandl38c6a222006-05-10 16:26:03 +00001741 # 3. Return tarinfo or another valid TarInfo object.
1742 def proc_member(self, tarinfo):
1743 """Choose the right processing method for tarinfo depending
1744 on its type and call it.
1745 """
1746 if tarinfo.type in (GNUTYPE_LONGNAME, GNUTYPE_LONGLINK):
1747 return self.proc_gnulong(tarinfo)
1748 elif tarinfo.type == GNUTYPE_SPARSE:
1749 return self.proc_sparse(tarinfo)
1750 else:
1751 return self.proc_builtin(tarinfo)
1752
1753 def proc_builtin(self, tarinfo):
1754 """Process a builtin type member or an unknown member
1755 which will be treated as a regular file.
1756 """
1757 tarinfo.offset_data = self.offset
1758 if tarinfo.isreg() or tarinfo.type not in SUPPORTED_TYPES:
1759 # Skip the following data blocks.
1760 self.offset += self._block(tarinfo.size)
1761 return tarinfo
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001762
1763 def proc_gnulong(self, tarinfo):
Georg Brandl38c6a222006-05-10 16:26:03 +00001764 """Process the blocks that hold a GNU longname
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001765 or longlink member.
1766 """
1767 buf = ""
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001768 count = tarinfo.size
1769 while count > 0:
1770 block = self.fileobj.read(BLOCKSIZE)
1771 buf += block
1772 self.offset += BLOCKSIZE
1773 count -= BLOCKSIZE
1774
Georg Brandl38c6a222006-05-10 16:26:03 +00001775 # Fetch the next header and process it.
1776 b = self.fileobj.read(BLOCKSIZE)
1777 t = TarInfo.frombuf(b)
1778 t.offset = self.offset
1779 self.offset += BLOCKSIZE
1780 next = self.proc_member(t)
Andrew M. Kuchling864bba12004-07-10 22:02:11 +00001781
Georg Brandl38c6a222006-05-10 16:26:03 +00001782 # Patch the TarInfo object from the next header with
1783 # the longname information.
Andrew M. Kuchling864bba12004-07-10 22:02:11 +00001784 next.offset = tarinfo.offset
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001785 if tarinfo.type == GNUTYPE_LONGNAME:
Andrew M. Kuchling864bba12004-07-10 22:02:11 +00001786 next.name = nts(buf)
1787 elif tarinfo.type == GNUTYPE_LONGLINK:
1788 next.linkname = nts(buf)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001789
Andrew M. Kuchling864bba12004-07-10 22:02:11 +00001790 return next
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001791
1792 def proc_sparse(self, tarinfo):
Georg Brandl38c6a222006-05-10 16:26:03 +00001793 """Process a GNU sparse header plus extra headers.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001794 """
Georg Brandl38c6a222006-05-10 16:26:03 +00001795 buf = tarinfo.buf
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001796 sp = _ringbuffer()
1797 pos = 386
1798 lastpos = 0L
1799 realpos = 0L
1800 # There are 4 possible sparse structs in the
1801 # first header.
1802 for i in xrange(4):
1803 try:
Georg Brandl38c6a222006-05-10 16:26:03 +00001804 offset = nti(buf[pos:pos + 12])
1805 numbytes = nti(buf[pos + 12:pos + 24])
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001806 except ValueError:
1807 break
1808 if offset > lastpos:
1809 sp.append(_hole(lastpos, offset - lastpos))
1810 sp.append(_data(offset, numbytes, realpos))
1811 realpos += numbytes
1812 lastpos = offset + numbytes
1813 pos += 24
1814
1815 isextended = ord(buf[482])
Georg Brandl38c6a222006-05-10 16:26:03 +00001816 origsize = nti(buf[483:495])
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001817
1818 # If the isextended flag is given,
1819 # there are extra headers to process.
1820 while isextended == 1:
1821 buf = self.fileobj.read(BLOCKSIZE)
1822 self.offset += BLOCKSIZE
1823 pos = 0
1824 for i in xrange(21):
1825 try:
Georg Brandl38c6a222006-05-10 16:26:03 +00001826 offset = nti(buf[pos:pos + 12])
1827 numbytes = nti(buf[pos + 12:pos + 24])
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001828 except ValueError:
1829 break
1830 if offset > lastpos:
1831 sp.append(_hole(lastpos, offset - lastpos))
1832 sp.append(_data(offset, numbytes, realpos))
1833 realpos += numbytes
1834 lastpos = offset + numbytes
1835 pos += 24
1836 isextended = ord(buf[504])
1837
1838 if lastpos < origsize:
1839 sp.append(_hole(lastpos, origsize - lastpos))
1840
1841 tarinfo.sparse = sp
1842
1843 tarinfo.offset_data = self.offset
1844 self.offset += self._block(tarinfo.size)
1845 tarinfo.size = origsize
Andrew M. Kuchling864bba12004-07-10 22:02:11 +00001846
Georg Brandl38c6a222006-05-10 16:26:03 +00001847 # Clear the prefix field so that it is not used
1848 # as a pathname in next().
1849 tarinfo.prefix = ""
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001850
Georg Brandl38c6a222006-05-10 16:26:03 +00001851 return tarinfo
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001852
1853 #--------------------------------------------------------------------------
1854 # Little helper methods:
1855
1856 def _block(self, count):
1857 """Round up a byte count by BLOCKSIZE and return it,
1858 e.g. _block(834) => 1024.
1859 """
1860 blocks, remainder = divmod(count, BLOCKSIZE)
1861 if remainder:
1862 blocks += 1
1863 return blocks * BLOCKSIZE
1864
1865 def _getmember(self, name, tarinfo=None):
1866 """Find an archive member by name from bottom to top.
1867 If tarinfo is given, it is used as the starting point.
1868 """
Martin v. Löwisf3c56112004-09-18 09:08:52 +00001869 # Ensure that all members have been loaded.
1870 members = self.getmembers()
1871
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001872 if tarinfo is None:
Martin v. Löwisf3c56112004-09-18 09:08:52 +00001873 end = len(members)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001874 else:
Martin v. Löwisf3c56112004-09-18 09:08:52 +00001875 end = members.index(tarinfo)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001876
1877 for i in xrange(end - 1, -1, -1):
Martin v. Löwisf3c56112004-09-18 09:08:52 +00001878 if name == members[i].name:
1879 return members[i]
Andrew M. Kuchling864bba12004-07-10 22:02:11 +00001880
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001881 def _load(self):
1882 """Read through the entire archive file and look for readable
1883 members.
1884 """
1885 while True:
1886 tarinfo = self.next()
1887 if tarinfo is None:
1888 break
1889 self._loaded = True
1890
1891 def _check(self, mode=None):
1892 """Check if TarFile is still open, and if the operation's mode
1893 corresponds to TarFile's mode.
1894 """
1895 if self.closed:
1896 raise IOError, "%s is closed" % self.__class__.__name__
1897 if mode is not None and self._mode not in mode:
1898 raise IOError, "bad operation for mode %r" % self._mode
1899
1900 def __iter__(self):
1901 """Provide an iterator object.
1902 """
1903 if self._loaded:
1904 return iter(self.members)
1905 else:
1906 return TarIter(self)
1907
1908 def _create_gnulong(self, name, type):
1909 """Write a GNU longname/longlink member to the TarFile.
1910 It consists of an extended tar header, with the length
1911 of the longname as size, followed by data blocks,
1912 which contain the longname as a null terminated string.
1913 """
Neal Norwitz0662f8a2004-07-20 21:54:18 +00001914 name += NUL
1915
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001916 tarinfo = TarInfo()
1917 tarinfo.name = "././@LongLink"
1918 tarinfo.type = type
1919 tarinfo.mode = 0
1920 tarinfo.size = len(name)
1921
1922 # write extended header
1923 self.fileobj.write(tarinfo.tobuf())
Neal Norwitz0662f8a2004-07-20 21:54:18 +00001924 self.offset += BLOCKSIZE
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001925 # write name blocks
1926 self.fileobj.write(name)
1927 blocks, remainder = divmod(tarinfo.size, BLOCKSIZE)
1928 if remainder > 0:
1929 self.fileobj.write(NUL * (BLOCKSIZE - remainder))
1930 blocks += 1
1931 self.offset += blocks * BLOCKSIZE
1932
1933 def _dbg(self, level, msg):
1934 """Write debugging output to sys.stderr.
1935 """
1936 if level <= self.debug:
1937 print >> sys.stderr, msg
1938# class TarFile
1939
1940class TarIter:
1941 """Iterator Class.
1942
1943 for tarinfo in TarFile(...):
1944 suite...
1945 """
1946
1947 def __init__(self, tarfile):
1948 """Construct a TarIter object.
1949 """
1950 self.tarfile = tarfile
Martin v. Löwis637431b2005-03-03 23:12:42 +00001951 self.index = 0
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001952 def __iter__(self):
1953 """Return iterator object.
1954 """
1955 return self
1956 def next(self):
1957 """Return the next item using TarFile's next() method.
1958 When all members have been read, set TarFile as _loaded.
1959 """
Martin v. Löwis637431b2005-03-03 23:12:42 +00001960 # Fix for SF #1100429: Under rare circumstances it can
1961 # happen that getmembers() is called during iteration,
1962 # which will cause TarIter to stop prematurely.
1963 if not self.tarfile._loaded:
1964 tarinfo = self.tarfile.next()
1965 if not tarinfo:
1966 self.tarfile._loaded = True
1967 raise StopIteration
1968 else:
1969 try:
1970 tarinfo = self.tarfile.members[self.index]
1971 except IndexError:
1972 raise StopIteration
1973 self.index += 1
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001974 return tarinfo
1975
1976# Helper classes for sparse file support
1977class _section:
1978 """Base class for _data and _hole.
1979 """
1980 def __init__(self, offset, size):
1981 self.offset = offset
1982 self.size = size
1983 def __contains__(self, offset):
1984 return self.offset <= offset < self.offset + self.size
1985
1986class _data(_section):
1987 """Represent a data section in a sparse file.
1988 """
1989 def __init__(self, offset, size, realpos):
1990 _section.__init__(self, offset, size)
1991 self.realpos = realpos
1992
1993class _hole(_section):
1994 """Represent a hole section in a sparse file.
1995 """
1996 pass
1997
1998class _ringbuffer(list):
1999 """Ringbuffer class which increases performance
2000 over a regular list.
2001 """
2002 def __init__(self):
2003 self.idx = 0
2004 def find(self, offset):
2005 idx = self.idx
2006 while True:
2007 item = self[idx]
2008 if offset in item:
2009 break
2010 idx += 1
2011 if idx == len(self):
2012 idx = 0
2013 if idx == self.idx:
2014 # End of File
2015 return None
2016 self.idx = idx
2017 return item
2018
2019#---------------------------------------------
2020# zipfile compatible TarFile class
2021#---------------------------------------------
2022TAR_PLAIN = 0 # zipfile.ZIP_STORED
2023TAR_GZIPPED = 8 # zipfile.ZIP_DEFLATED
2024class TarFileCompat:
2025 """TarFile class compatible with standard module zipfile's
2026 ZipFile class.
2027 """
2028 def __init__(self, file, mode="r", compression=TAR_PLAIN):
2029 if compression == TAR_PLAIN:
2030 self.tarfile = TarFile.taropen(file, mode)
2031 elif compression == TAR_GZIPPED:
2032 self.tarfile = TarFile.gzopen(file, mode)
2033 else:
2034 raise ValueError, "unknown compression constant"
2035 if mode[0:1] == "r":
2036 members = self.tarfile.getmembers()
Raymond Hettingera1d09e22005-09-11 16:34:05 +00002037 for m in members:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002038 m.filename = m.name
2039 m.file_size = m.size
2040 m.date_time = time.gmtime(m.mtime)[:6]
2041 def namelist(self):
2042 return map(lambda m: m.name, self.infolist())
2043 def infolist(self):
2044 return filter(lambda m: m.type in REGULAR_TYPES,
2045 self.tarfile.getmembers())
2046 def printdir(self):
2047 self.tarfile.list()
2048 def testzip(self):
2049 return
2050 def getinfo(self, name):
2051 return self.tarfile.getmember(name)
2052 def read(self, name):
2053 return self.tarfile.extractfile(self.tarfile.getmember(name)).read()
2054 def write(self, filename, arcname=None, compress_type=None):
2055 self.tarfile.add(filename, arcname)
2056 def writestr(self, zinfo, bytes):
Raymond Hettingera6172712004-12-31 19:15:26 +00002057 try:
2058 from cStringIO import StringIO
2059 except ImportError:
2060 from StringIO import StringIO
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002061 import calendar
2062 zinfo.name = zinfo.filename
2063 zinfo.size = zinfo.file_size
2064 zinfo.mtime = calendar.timegm(zinfo.date_time)
Raymond Hettingera6172712004-12-31 19:15:26 +00002065 self.tarfile.addfile(zinfo, StringIO(bytes))
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002066 def close(self):
2067 self.tarfile.close()
2068#class TarFileCompat
2069
2070#--------------------
2071# exported functions
2072#--------------------
2073def is_tarfile(name):
2074 """Return True if name points to a tar archive that we
2075 are able to handle, else return False.
2076 """
2077 try:
2078 t = open(name)
2079 t.close()
2080 return True
2081 except TarError:
2082 return False
2083
2084open = TarFile.open