blob: 6c297838b88f3c141d8b08339498381ba7532462 [file] [log] [blame]
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001#!/usr/bin/env python
2# -*- coding: iso-8859-1 -*-
3#-------------------------------------------------------------------
4# tarfile.py
5#-------------------------------------------------------------------
6# Copyright (C) 2002 Lars Gustäbel <lars@gustaebel.de>
7# All rights reserved.
8#
9# Permission is hereby granted, free of charge, to any person
10# obtaining a copy of this software and associated documentation
11# files (the "Software"), to deal in the Software without
12# restriction, including without limitation the rights to use,
13# copy, modify, merge, publish, distribute, sublicense, and/or sell
14# copies of the Software, and to permit persons to whom the
15# Software is furnished to do so, subject to the following
16# conditions:
17#
18# The above copyright notice and this permission notice shall be
19# included in all copies or substantial portions of the Software.
20#
21# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
22# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
23# OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
24# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
25# HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
26# WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
27# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
28# OTHER DEALINGS IN THE SOFTWARE.
29#
30"""Read from and write to tar format archives.
31"""
32
33__version__ = "$Revision$"
34# $Source$
35
Georg Brandl38c6a222006-05-10 16:26:03 +000036version = "0.8.0"
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +000037__author__ = "Lars Gustäbel (lars@gustaebel.de)"
38__date__ = "$Date$"
39__cvsid__ = "$Id$"
40__credits__ = "Gustavo Niemeyer, Niels Gustäbel, Richard Townsend."
41
42#---------
43# Imports
44#---------
45import sys
46import os
47import shutil
48import stat
49import errno
50import time
51import struct
52
Jack Jansencfc49022003-03-07 13:37:32 +000053if sys.platform == 'mac':
54 # This module needs work for MacOS9, especially in the area of pathname
55 # handling. In many places it is assumed a simple substitution of / by the
56 # local os.path.sep is good enough to convert pathnames, but this does not
57 # work with the mac rooted:path:name versus :nonrooted:path:name syntax
58 raise ImportError, "tarfile does not work for platform==mac"
59
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +000060try:
61 import grp, pwd
62except ImportError:
63 grp = pwd = None
64
65# from tarfile import *
66__all__ = ["TarFile", "TarInfo", "is_tarfile", "TarError"]
67
68#---------------------------------------------------------
69# tar constants
70#---------------------------------------------------------
71NUL = "\0" # the null character
72BLOCKSIZE = 512 # length of processing blocks
73RECORDSIZE = BLOCKSIZE * 20 # length of records
74MAGIC = "ustar" # magic tar string
75VERSION = "00" # version number
76
77LENGTH_NAME = 100 # maximum length of a filename
78LENGTH_LINK = 100 # maximum length of a linkname
79LENGTH_PREFIX = 155 # maximum length of the prefix field
80MAXSIZE_MEMBER = 077777777777L # maximum size of a file (11 octal digits)
81
82REGTYPE = "0" # regular file
83AREGTYPE = "\0" # regular file
84LNKTYPE = "1" # link (inside tarfile)
85SYMTYPE = "2" # symbolic link
86CHRTYPE = "3" # character special device
87BLKTYPE = "4" # block special device
88DIRTYPE = "5" # directory
89FIFOTYPE = "6" # fifo special device
90CONTTYPE = "7" # contiguous file
91
92GNUTYPE_LONGNAME = "L" # GNU tar extension for longnames
93GNUTYPE_LONGLINK = "K" # GNU tar extension for longlink
94GNUTYPE_SPARSE = "S" # GNU tar extension for sparse file
95
96#---------------------------------------------------------
97# tarfile constants
98#---------------------------------------------------------
99SUPPORTED_TYPES = (REGTYPE, AREGTYPE, LNKTYPE, # file types that tarfile
100 SYMTYPE, DIRTYPE, FIFOTYPE, # can cope with.
101 CONTTYPE, CHRTYPE, BLKTYPE,
102 GNUTYPE_LONGNAME, GNUTYPE_LONGLINK,
103 GNUTYPE_SPARSE)
104
105REGULAR_TYPES = (REGTYPE, AREGTYPE, # file types that somehow
106 CONTTYPE, GNUTYPE_SPARSE) # represent regular files
107
108#---------------------------------------------------------
109# Bits used in the mode field, values in octal.
110#---------------------------------------------------------
111S_IFLNK = 0120000 # symbolic link
112S_IFREG = 0100000 # regular file
113S_IFBLK = 0060000 # block device
114S_IFDIR = 0040000 # directory
115S_IFCHR = 0020000 # character device
116S_IFIFO = 0010000 # fifo
117
118TSUID = 04000 # set UID on execution
119TSGID = 02000 # set GID on execution
120TSVTX = 01000 # reserved
121
122TUREAD = 0400 # read by owner
123TUWRITE = 0200 # write by owner
124TUEXEC = 0100 # execute/search by owner
125TGREAD = 0040 # read by group
126TGWRITE = 0020 # write by group
127TGEXEC = 0010 # execute/search by group
128TOREAD = 0004 # read by other
129TOWRITE = 0002 # write by other
130TOEXEC = 0001 # execute/search by other
131
132#---------------------------------------------------------
133# Some useful functions
134#---------------------------------------------------------
135def nts(s):
136 """Convert a null-terminated string buffer to a python string.
137 """
Andrew M. Kuchling864bba12004-07-10 22:02:11 +0000138 return s.rstrip(NUL)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000139
Georg Brandl38c6a222006-05-10 16:26:03 +0000140def stn(s, length):
141 """Convert a python string to a null-terminated string buffer.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000142 """
Georg Brandl38c6a222006-05-10 16:26:03 +0000143 return struct.pack("%ds" % (length - 1), s) + NUL
144
145def nti(s):
146 """Convert a number field to a python number.
147 """
148 # There are two possible encodings for a number field, see
149 # itn() below.
150 if s[0] != chr(0200):
151 n = int(s.rstrip(NUL) or "0", 8)
152 else:
153 n = 0L
154 for i in xrange(len(s) - 1):
155 n <<= 8
156 n += ord(s[i + 1])
157 return n
158
159def itn(n, digits=8, posix=False):
160 """Convert a python number to a number field.
161 """
162 # POSIX 1003.1-1988 requires numbers to be encoded as a string of
163 # octal digits followed by a null-byte, this allows values up to
164 # (8**(digits-1))-1. GNU tar allows storing numbers greater than
165 # that if necessary. A leading 0200 byte indicates this particular
166 # encoding, the following digits-1 bytes are a big-endian
167 # representation. This allows values up to (256**(digits-1))-1.
168 if 0 <= n < 8 ** (digits - 1):
169 s = "%0*o" % (digits - 1, n) + NUL
170 else:
171 if posix:
172 raise ValueError, "overflow in number field"
173
174 if n < 0:
175 # XXX We mimic GNU tar's behaviour with negative numbers,
176 # this could raise OverflowError.
177 n = struct.unpack("L", struct.pack("l", n))[0]
178
179 s = ""
180 for i in xrange(digits - 1):
181 s = chr(n & 0377) + s
182 n >>= 8
183 s = chr(0200) + s
184 return s
185
186def calc_chksums(buf):
187 """Calculate the checksum for a member's header by summing up all
188 characters except for the chksum field which is treated as if
189 it was filled with spaces. According to the GNU tar sources,
190 some tars (Sun and NeXT) calculate chksum with signed char,
191 which will be different if there are chars in the buffer with
192 the high bit set. So we calculate two checksums, unsigned and
193 signed.
194 """
195 unsigned_chksum = 256 + sum(struct.unpack("148B", buf[:148]) + struct.unpack("356B", buf[156:512]))
196 signed_chksum = 256 + sum(struct.unpack("148b", buf[:148]) + struct.unpack("356b", buf[156:512]))
197 return unsigned_chksum, signed_chksum
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000198
199def copyfileobj(src, dst, length=None):
200 """Copy length bytes from fileobj src to fileobj dst.
201 If length is None, copy the entire content.
202 """
203 if length == 0:
204 return
205 if length is None:
206 shutil.copyfileobj(src, dst)
207 return
208
209 BUFSIZE = 16 * 1024
210 blocks, remainder = divmod(length, BUFSIZE)
211 for b in xrange(blocks):
212 buf = src.read(BUFSIZE)
213 if len(buf) < BUFSIZE:
214 raise IOError, "end of file reached"
215 dst.write(buf)
216
217 if remainder != 0:
218 buf = src.read(remainder)
219 if len(buf) < remainder:
220 raise IOError, "end of file reached"
221 dst.write(buf)
222 return
223
224filemode_table = (
Andrew M. Kuchling8bc462f2004-10-20 11:48:42 +0000225 ((S_IFLNK, "l"),
226 (S_IFREG, "-"),
227 (S_IFBLK, "b"),
228 (S_IFDIR, "d"),
229 (S_IFCHR, "c"),
230 (S_IFIFO, "p")),
231
232 ((TUREAD, "r"),),
233 ((TUWRITE, "w"),),
234 ((TUEXEC|TSUID, "s"),
235 (TSUID, "S"),
236 (TUEXEC, "x")),
237
238 ((TGREAD, "r"),),
239 ((TGWRITE, "w"),),
240 ((TGEXEC|TSGID, "s"),
241 (TSGID, "S"),
242 (TGEXEC, "x")),
243
244 ((TOREAD, "r"),),
245 ((TOWRITE, "w"),),
246 ((TOEXEC|TSVTX, "t"),
247 (TSVTX, "T"),
248 (TOEXEC, "x"))
249)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000250
251def filemode(mode):
252 """Convert a file's mode to a string of the form
253 -rwxrwxrwx.
254 Used by TarFile.list()
255 """
Andrew M. Kuchling8bc462f2004-10-20 11:48:42 +0000256 perm = []
257 for table in filemode_table:
258 for bit, char in table:
259 if mode & bit == bit:
260 perm.append(char)
261 break
262 else:
263 perm.append("-")
264 return "".join(perm)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000265
266if os.sep != "/":
267 normpath = lambda path: os.path.normpath(path).replace(os.sep, "/")
268else:
269 normpath = os.path.normpath
270
271class TarError(Exception):
272 """Base exception."""
273 pass
274class ExtractError(TarError):
275 """General exception for extract errors."""
276 pass
277class ReadError(TarError):
278 """Exception for unreadble tar archives."""
279 pass
280class CompressionError(TarError):
281 """Exception for unavailable compression methods."""
282 pass
283class StreamError(TarError):
284 """Exception for unsupported operations on stream-like TarFiles."""
285 pass
286
287#---------------------------
288# internal stream interface
289#---------------------------
290class _LowLevelFile:
291 """Low-level file object. Supports reading and writing.
292 It is used instead of a regular file object for streaming
293 access.
294 """
295
296 def __init__(self, name, mode):
297 mode = {
298 "r": os.O_RDONLY,
299 "w": os.O_WRONLY | os.O_CREAT | os.O_TRUNC,
300 }[mode]
301 if hasattr(os, "O_BINARY"):
302 mode |= os.O_BINARY
303 self.fd = os.open(name, mode)
304
305 def close(self):
306 os.close(self.fd)
307
308 def read(self, size):
309 return os.read(self.fd, size)
310
311 def write(self, s):
312 os.write(self.fd, s)
313
314class _Stream:
315 """Class that serves as an adapter between TarFile and
316 a stream-like object. The stream-like object only
317 needs to have a read() or write() method and is accessed
318 blockwise. Use of gzip or bzip2 compression is possible.
319 A stream-like object could be for example: sys.stdin,
320 sys.stdout, a socket, a tape device etc.
321
322 _Stream is intended to be used only internally.
323 """
324
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000325 def __init__(self, name, mode, comptype, fileobj, bufsize):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000326 """Construct a _Stream object.
327 """
328 self._extfileobj = True
329 if fileobj is None:
330 fileobj = _LowLevelFile(name, mode)
331 self._extfileobj = False
332
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000333 if comptype == '*':
334 # Enable transparent compression detection for the
335 # stream interface
336 fileobj = _StreamProxy(fileobj)
337 comptype = fileobj.getcomptype()
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000338
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000339 self.name = name or ""
340 self.mode = mode
341 self.comptype = comptype
342 self.fileobj = fileobj
343 self.bufsize = bufsize
344 self.buf = ""
345 self.pos = 0L
346 self.closed = False
347
348 if comptype == "gz":
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000349 try:
350 import zlib
351 except ImportError:
352 raise CompressionError, "zlib module is not available"
353 self.zlib = zlib
354 self.crc = zlib.crc32("")
355 if mode == "r":
356 self._init_read_gz()
357 else:
358 self._init_write_gz()
359
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000360 if comptype == "bz2":
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000361 try:
362 import bz2
363 except ImportError:
364 raise CompressionError, "bz2 module is not available"
365 if mode == "r":
366 self.dbuf = ""
367 self.cmp = bz2.BZ2Decompressor()
368 else:
369 self.cmp = bz2.BZ2Compressor()
370
371 def __del__(self):
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000372 if hasattr(self, "closed") and not self.closed:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000373 self.close()
374
375 def _init_write_gz(self):
376 """Initialize for writing with gzip compression.
377 """
378 self.cmp = self.zlib.compressobj(9, self.zlib.DEFLATED,
379 -self.zlib.MAX_WBITS,
380 self.zlib.DEF_MEM_LEVEL,
381 0)
382 timestamp = struct.pack("<L", long(time.time()))
383 self.__write("\037\213\010\010%s\002\377" % timestamp)
384 if self.name.endswith(".gz"):
385 self.name = self.name[:-3]
386 self.__write(self.name + NUL)
387
388 def write(self, s):
389 """Write string s to the stream.
390 """
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000391 if self.comptype == "gz":
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000392 self.crc = self.zlib.crc32(s, self.crc)
393 self.pos += len(s)
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000394 if self.comptype != "tar":
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000395 s = self.cmp.compress(s)
396 self.__write(s)
397
398 def __write(self, s):
399 """Write string s to the stream if a whole new block
400 is ready to be written.
401 """
402 self.buf += s
403 while len(self.buf) > self.bufsize:
404 self.fileobj.write(self.buf[:self.bufsize])
405 self.buf = self.buf[self.bufsize:]
406
407 def close(self):
408 """Close the _Stream object. No operation should be
409 done on it afterwards.
410 """
411 if self.closed:
412 return
413
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000414 if self.mode == "w" and self.comptype != "tar":
Martin v. Löwisc234a522004-08-22 21:28:33 +0000415 self.buf += self.cmp.flush()
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000416
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000417 if self.mode == "w" and self.buf:
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000418 blocks, remainder = divmod(len(self.buf), self.bufsize)
419 if remainder > 0:
420 self.buf += NUL * (self.bufsize - remainder)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000421 self.fileobj.write(self.buf)
422 self.buf = ""
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000423 if self.comptype == "gz":
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000424 self.fileobj.write(struct.pack("<l", self.crc))
Andrew M. Kuchling10a44492003-10-24 17:38:34 +0000425 self.fileobj.write(struct.pack("<L", self.pos & 0xffffFFFFL))
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000426
427 if not self._extfileobj:
428 self.fileobj.close()
429
430 self.closed = True
431
432 def _init_read_gz(self):
433 """Initialize for reading a gzip compressed fileobj.
434 """
435 self.cmp = self.zlib.decompressobj(-self.zlib.MAX_WBITS)
436 self.dbuf = ""
437
438 # taken from gzip.GzipFile with some alterations
439 if self.__read(2) != "\037\213":
440 raise ReadError, "not a gzip file"
441 if self.__read(1) != "\010":
442 raise CompressionError, "unsupported compression method"
443
444 flag = ord(self.__read(1))
445 self.__read(6)
446
447 if flag & 4:
448 xlen = ord(self.__read(1)) + 256 * ord(self.__read(1))
449 self.read(xlen)
450 if flag & 8:
451 while True:
452 s = self.__read(1)
453 if not s or s == NUL:
454 break
455 if flag & 16:
456 while True:
457 s = self.__read(1)
458 if not s or s == NUL:
459 break
460 if flag & 2:
461 self.__read(2)
462
463 def tell(self):
464 """Return the stream's file pointer position.
465 """
466 return self.pos
467
468 def seek(self, pos=0):
469 """Set the stream's file pointer to pos. Negative seeking
470 is forbidden.
471 """
472 if pos - self.pos >= 0:
473 blocks, remainder = divmod(pos - self.pos, self.bufsize)
474 for i in xrange(blocks):
475 self.read(self.bufsize)
476 self.read(remainder)
477 else:
478 raise StreamError, "seeking backwards is not allowed"
479 return self.pos
480
481 def read(self, size=None):
482 """Return the next size number of bytes from the stream.
483 If size is not defined, return all bytes of the stream
484 up to EOF.
485 """
486 if size is None:
487 t = []
488 while True:
489 buf = self._read(self.bufsize)
490 if not buf:
491 break
492 t.append(buf)
493 buf = "".join(t)
494 else:
495 buf = self._read(size)
496 self.pos += len(buf)
497 return buf
498
499 def _read(self, size):
500 """Return size bytes from the stream.
501 """
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000502 if self.comptype == "tar":
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000503 return self.__read(size)
504
505 c = len(self.dbuf)
506 t = [self.dbuf]
507 while c < size:
508 buf = self.__read(self.bufsize)
509 if not buf:
510 break
511 buf = self.cmp.decompress(buf)
512 t.append(buf)
513 c += len(buf)
514 t = "".join(t)
515 self.dbuf = t[size:]
516 return t[:size]
517
518 def __read(self, size):
519 """Return size bytes from stream. If internal buffer is empty,
520 read another block from the stream.
521 """
522 c = len(self.buf)
523 t = [self.buf]
524 while c < size:
525 buf = self.fileobj.read(self.bufsize)
526 if not buf:
527 break
528 t.append(buf)
529 c += len(buf)
530 t = "".join(t)
531 self.buf = t[size:]
532 return t[:size]
533# class _Stream
534
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000535class _StreamProxy(object):
536 """Small proxy class that enables transparent compression
537 detection for the Stream interface (mode 'r|*').
538 """
539
540 def __init__(self, fileobj):
541 self.fileobj = fileobj
542 self.buf = self.fileobj.read(BLOCKSIZE)
543
544 def read(self, size):
545 self.read = self.fileobj.read
546 return self.buf
547
548 def getcomptype(self):
549 if self.buf.startswith("\037\213\010"):
550 return "gz"
551 if self.buf.startswith("BZh91"):
552 return "bz2"
553 return "tar"
554
555 def close(self):
556 self.fileobj.close()
557# class StreamProxy
558
Georg Brandl49c8f4c2006-05-15 19:30:35 +0000559class _BZ2Proxy(object):
560 """Small proxy class that enables external file object
561 support for "r:bz2" and "w:bz2" modes. This is actually
562 a workaround for a limitation in bz2 module's BZ2File
563 class which (unlike gzip.GzipFile) has no support for
564 a file object argument.
565 """
566
567 blocksize = 16 * 1024
568
569 def __init__(self, fileobj, mode):
570 self.fileobj = fileobj
571 self.mode = mode
572 self.init()
573
574 def init(self):
575 import bz2
576 self.pos = 0
577 if self.mode == "r":
578 self.bz2obj = bz2.BZ2Decompressor()
579 self.fileobj.seek(0)
580 self.buf = ""
581 else:
582 self.bz2obj = bz2.BZ2Compressor()
583
584 def read(self, size):
585 b = [self.buf]
586 x = len(self.buf)
587 while x < size:
588 try:
589 raw = self.fileobj.read(self.blocksize)
590 data = self.bz2obj.decompress(raw)
591 b.append(data)
592 except EOFError:
593 break
594 x += len(data)
595 self.buf = "".join(b)
596
597 buf = self.buf[:size]
598 self.buf = self.buf[size:]
599 self.pos += len(buf)
600 return buf
601
602 def seek(self, pos):
603 if pos < self.pos:
604 self.init()
605 self.read(pos - self.pos)
606
607 def tell(self):
608 return self.pos
609
610 def write(self, data):
611 self.pos += len(data)
612 raw = self.bz2obj.compress(data)
613 self.fileobj.write(raw)
614
615 def close(self):
616 if self.mode == "w":
617 raw = self.bz2obj.flush()
618 self.fileobj.write(raw)
619 self.fileobj.close()
620# class _BZ2Proxy
621
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000622#------------------------
623# Extraction file object
624#------------------------
625class ExFileObject(object):
626 """File-like object for reading an archive member.
627 Is returned by TarFile.extractfile(). Support for
628 sparse files included.
629 """
630
631 def __init__(self, tarfile, tarinfo):
632 self.fileobj = tarfile.fileobj
633 self.name = tarinfo.name
634 self.mode = "r"
635 self.closed = False
636 self.offset = tarinfo.offset_data
637 self.size = tarinfo.size
638 self.pos = 0L
639 self.linebuffer = ""
640 if tarinfo.issparse():
641 self.sparse = tarinfo.sparse
642 self.read = self._readsparse
643 else:
644 self.read = self._readnormal
645
646 def __read(self, size):
647 """Overloadable read method.
648 """
649 return self.fileobj.read(size)
650
651 def readline(self, size=-1):
652 """Read a line with approx. size. If size is negative,
653 read a whole line. readline() and read() must not
654 be mixed up (!).
655 """
656 if size < 0:
657 size = sys.maxint
658
659 nl = self.linebuffer.find("\n")
660 if nl >= 0:
661 nl = min(nl, size)
662 else:
663 size -= len(self.linebuffer)
Martin v. Löwisc11d6f12004-08-25 10:52:58 +0000664 while (nl < 0 and size > 0):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000665 buf = self.read(min(size, 100))
666 if not buf:
667 break
668 self.linebuffer += buf
669 size -= len(buf)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000670 nl = self.linebuffer.find("\n")
671 if nl == -1:
672 s = self.linebuffer
673 self.linebuffer = ""
674 return s
675 buf = self.linebuffer[:nl]
676 self.linebuffer = self.linebuffer[nl + 1:]
677 while buf[-1:] == "\r":
678 buf = buf[:-1]
679 return buf + "\n"
680
681 def readlines(self):
682 """Return a list with all (following) lines.
683 """
684 result = []
685 while True:
686 line = self.readline()
687 if not line: break
688 result.append(line)
689 return result
690
691 def _readnormal(self, size=None):
692 """Read operation for regular files.
693 """
694 if self.closed:
695 raise ValueError, "file is closed"
696 self.fileobj.seek(self.offset + self.pos)
697 bytesleft = self.size - self.pos
698 if size is None:
699 bytestoread = bytesleft
700 else:
701 bytestoread = min(size, bytesleft)
702 self.pos += bytestoread
703 return self.__read(bytestoread)
704
705 def _readsparse(self, size=None):
706 """Read operation for sparse files.
707 """
708 if self.closed:
709 raise ValueError, "file is closed"
710
711 if size is None:
712 size = self.size - self.pos
713
714 data = []
715 while size > 0:
716 buf = self._readsparsesection(size)
717 if not buf:
718 break
719 size -= len(buf)
720 data.append(buf)
721 return "".join(data)
722
723 def _readsparsesection(self, size):
724 """Read a single section of a sparse file.
725 """
726 section = self.sparse.find(self.pos)
727
728 if section is None:
729 return ""
730
731 toread = min(size, section.offset + section.size - self.pos)
732 if isinstance(section, _data):
733 realpos = section.realpos + self.pos - section.offset
734 self.pos += toread
735 self.fileobj.seek(self.offset + realpos)
736 return self.__read(toread)
737 else:
738 self.pos += toread
739 return NUL * toread
740
741 def tell(self):
742 """Return the current file position.
743 """
744 return self.pos
745
746 def seek(self, pos, whence=0):
747 """Seek to a position in the file.
748 """
749 self.linebuffer = ""
750 if whence == 0:
751 self.pos = min(max(pos, 0), self.size)
752 if whence == 1:
753 if pos < 0:
754 self.pos = max(self.pos + pos, 0)
755 else:
756 self.pos = min(self.pos + pos, self.size)
757 if whence == 2:
758 self.pos = max(min(self.size + pos, self.size), 0)
759
760 def close(self):
761 """Close the file object.
762 """
763 self.closed = True
Martin v. Löwisdf241532005-03-03 08:17:42 +0000764
765 def __iter__(self):
766 """Get an iterator over the file object.
767 """
768 if self.closed:
Georg Brandl38c6a222006-05-10 16:26:03 +0000769 raise ValueError, "I/O operation on closed file"
Martin v. Löwisdf241532005-03-03 08:17:42 +0000770 return self
771
772 def next(self):
773 """Get the next item from the file iterator.
774 """
775 result = self.readline()
776 if not result:
777 raise StopIteration
778 return result
Tim Peterseba28be2005-03-28 01:08:02 +0000779
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000780#class ExFileObject
781
782#------------------
783# Exported Classes
784#------------------
785class TarInfo(object):
786 """Informational class which holds the details about an
787 archive member given by a tar header block.
788 TarInfo objects are returned by TarFile.getmember(),
789 TarFile.getmembers() and TarFile.gettarinfo() and are
790 usually created internally.
791 """
792
793 def __init__(self, name=""):
794 """Construct a TarInfo object. name is the optional name
795 of the member.
796 """
797
Georg Brandl38c6a222006-05-10 16:26:03 +0000798 self.name = name # member name (dirnames must end with '/')
799 self.mode = 0666 # file permissions
800 self.uid = 0 # user id
801 self.gid = 0 # group id
802 self.size = 0 # file size
803 self.mtime = 0 # modification time
804 self.chksum = 0 # header checksum
805 self.type = REGTYPE # member type
806 self.linkname = "" # link name
807 self.uname = "user" # user name
808 self.gname = "group" # group name
809 self.devmajor = 0 # device major number
810 self.devminor = 0 # device minor number
811 self.prefix = "" # prefix to filename or information
812 # about sparse files
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000813
Georg Brandl38c6a222006-05-10 16:26:03 +0000814 self.offset = 0 # the tar header starts here
815 self.offset_data = 0 # the file's data starts here
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000816
817 def __repr__(self):
818 return "<%s %r at %#x>" % (self.__class__.__name__,self.name,id(self))
819
Guido van Rossum75b64e62005-01-16 00:16:11 +0000820 @classmethod
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000821 def frombuf(cls, buf):
822 """Construct a TarInfo object from a 512 byte string buffer.
823 """
Georg Brandl38c6a222006-05-10 16:26:03 +0000824 if len(buf) != BLOCKSIZE:
825 raise ValueError, "truncated header"
826 if buf.count(NUL) == BLOCKSIZE:
827 raise ValueError, "empty header"
828
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000829 tarinfo = cls()
Georg Brandl38c6a222006-05-10 16:26:03 +0000830 tarinfo.buf = buf
831 tarinfo.name = nts(buf[0:100])
832 tarinfo.mode = nti(buf[100:108])
833 tarinfo.uid = nti(buf[108:116])
834 tarinfo.gid = nti(buf[116:124])
835 tarinfo.size = nti(buf[124:136])
836 tarinfo.mtime = nti(buf[136:148])
837 tarinfo.chksum = nti(buf[148:156])
838 tarinfo.type = buf[156:157]
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000839 tarinfo.linkname = nts(buf[157:257])
Georg Brandl38c6a222006-05-10 16:26:03 +0000840 tarinfo.uname = nts(buf[265:297])
841 tarinfo.gname = nts(buf[297:329])
842 tarinfo.devmajor = nti(buf[329:337])
843 tarinfo.devminor = nti(buf[337:345])
Jack Jansen2b4b5a52003-04-22 22:03:11 +0000844 tarinfo.prefix = buf[345:500]
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000845
Georg Brandl38c6a222006-05-10 16:26:03 +0000846 if tarinfo.chksum not in calc_chksums(buf):
847 raise ValueError, "invalid header"
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000848 return tarinfo
849
Georg Brandl38c6a222006-05-10 16:26:03 +0000850 def tobuf(self, posix=False):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000851 """Return a tar header block as a 512 byte string.
852 """
Georg Brandl38c6a222006-05-10 16:26:03 +0000853 parts = [
854 stn(self.name, 100),
855 itn(self.mode & 07777, 8, posix),
856 itn(self.uid, 8, posix),
857 itn(self.gid, 8, posix),
858 itn(self.size, 12, posix),
859 itn(self.mtime, 12, posix),
860 " ", # checksum field
861 self.type,
862 stn(self.linkname, 100),
863 stn(MAGIC, 6),
864 stn(VERSION, 2),
865 stn(self.uname, 32),
866 stn(self.gname, 32),
867 itn(self.devmajor, 8, posix),
868 itn(self.devminor, 8, posix),
869 stn(self.prefix, 155)
870 ]
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000871
Georg Brandl38c6a222006-05-10 16:26:03 +0000872 buf = struct.pack("%ds" % BLOCKSIZE, "".join(parts))
873 chksum = calc_chksums(buf)[0]
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000874 buf = buf[:148] + "%06o\0" % chksum + buf[155:]
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000875 self.buf = buf
876 return buf
877
878 def isreg(self):
879 return self.type in REGULAR_TYPES
880 def isfile(self):
881 return self.isreg()
882 def isdir(self):
883 return self.type == DIRTYPE
884 def issym(self):
885 return self.type == SYMTYPE
886 def islnk(self):
887 return self.type == LNKTYPE
888 def ischr(self):
889 return self.type == CHRTYPE
890 def isblk(self):
891 return self.type == BLKTYPE
892 def isfifo(self):
893 return self.type == FIFOTYPE
894 def issparse(self):
895 return self.type == GNUTYPE_SPARSE
896 def isdev(self):
897 return self.type in (CHRTYPE, BLKTYPE, FIFOTYPE)
898# class TarInfo
899
900class TarFile(object):
901 """The TarFile Class provides an interface to tar archives.
902 """
903
904 debug = 0 # May be set from 0 (no msgs) to 3 (all msgs)
905
906 dereference = False # If true, add content of linked file to the
907 # tar file, else the link.
908
909 ignore_zeros = False # If true, skips empty or invalid blocks and
910 # continues processing.
911
912 errorlevel = 0 # If 0, fatal errors only appear in debug
913 # messages (if debug >= 0). If > 0, errors
914 # are passed to the caller as exceptions.
915
Martin v. Löwis75b9da42004-08-18 13:57:44 +0000916 posix = False # If True, generates POSIX.1-1990-compliant
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000917 # archives (no GNU extensions!)
918
919 fileobject = ExFileObject
920
921 def __init__(self, name=None, mode="r", fileobj=None):
922 """Open an (uncompressed) tar archive `name'. `mode' is either 'r' to
923 read from an existing archive, 'a' to append data to an existing
924 file or 'w' to create a new file overwriting an existing one. `mode'
925 defaults to 'r'.
926 If `fileobj' is given, it is used for reading or writing data. If it
927 can be determined, `mode' is overridden by `fileobj's mode.
928 `fileobj' is not closed, when TarFile is closed.
929 """
Martin v. Löwisfaffa152005-08-24 06:43:09 +0000930 self.name = name
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000931
932 if len(mode) > 1 or mode not in "raw":
933 raise ValueError, "mode must be 'r', 'a' or 'w'"
934 self._mode = mode
935 self.mode = {"r": "rb", "a": "r+b", "w": "wb"}[mode]
936
937 if not fileobj:
938 fileobj = file(self.name, self.mode)
939 self._extfileobj = False
940 else:
941 if self.name is None and hasattr(fileobj, "name"):
Martin v. Löwisfaffa152005-08-24 06:43:09 +0000942 self.name = fileobj.name
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000943 if hasattr(fileobj, "mode"):
944 self.mode = fileobj.mode
945 self._extfileobj = True
946 self.fileobj = fileobj
947
948 # Init datastructures
Georg Brandl38c6a222006-05-10 16:26:03 +0000949 self.closed = False
950 self.members = [] # list of members as TarInfo objects
951 self._loaded = False # flag if all members have been read
952 self.offset = 0L # current position in the archive file
953 self.inodes = {} # dictionary caching the inodes of
954 # archive members already added
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000955
956 if self._mode == "r":
957 self.firstmember = None
958 self.firstmember = self.next()
959
960 if self._mode == "a":
961 # Move to the end of the archive,
962 # before the first empty block.
963 self.firstmember = None
964 while True:
965 try:
966 tarinfo = self.next()
967 except ReadError:
968 self.fileobj.seek(0)
969 break
970 if tarinfo is None:
971 self.fileobj.seek(- BLOCKSIZE, 1)
972 break
973
974 if self._mode in "aw":
975 self._loaded = True
976
977 #--------------------------------------------------------------------------
978 # Below are the classmethods which act as alternate constructors to the
979 # TarFile class. The open() method is the only one that is needed for
980 # public use; it is the "super"-constructor and is able to select an
981 # adequate "sub"-constructor for a particular compression using the mapping
982 # from OPEN_METH.
983 #
984 # This concept allows one to subclass TarFile without losing the comfort of
985 # the super-constructor. A sub-constructor is registered and made available
986 # by adding it to the mapping in OPEN_METH.
987
Guido van Rossum75b64e62005-01-16 00:16:11 +0000988 @classmethod
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000989 def open(cls, name=None, mode="r", fileobj=None, bufsize=20*512):
990 """Open a tar archive for reading, writing or appending. Return
991 an appropriate TarFile class.
992
993 mode:
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000994 'r' or 'r:*' open for reading with transparent compression
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000995 'r:' open for reading exclusively uncompressed
996 'r:gz' open for reading with gzip compression
997 'r:bz2' open for reading with bzip2 compression
998 'a' or 'a:' open for appending
999 'w' or 'w:' open for writing without compression
1000 'w:gz' open for writing with gzip compression
1001 'w:bz2' open for writing with bzip2 compression
Martin v. Löwis78be7df2005-03-05 12:47:42 +00001002
1003 'r|*' open a stream of tar blocks with transparent compression
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001004 'r|' open an uncompressed stream of tar blocks for reading
1005 'r|gz' open a gzip compressed stream of tar blocks
1006 'r|bz2' open a bzip2 compressed stream of tar blocks
1007 'w|' open an uncompressed stream for writing
1008 'w|gz' open a gzip compressed stream for writing
1009 'w|bz2' open a bzip2 compressed stream for writing
1010 """
1011
1012 if not name and not fileobj:
1013 raise ValueError, "nothing to open"
1014
Martin v. Löwis78be7df2005-03-05 12:47:42 +00001015 if mode in ("r", "r:*"):
1016 # Find out which *open() is appropriate for opening the file.
1017 for comptype in cls.OPEN_METH:
1018 func = getattr(cls, cls.OPEN_METH[comptype])
1019 try:
1020 return func(name, "r", fileobj)
1021 except (ReadError, CompressionError):
1022 continue
1023 raise ReadError, "file could not be opened successfully"
1024
1025 elif ":" in mode:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001026 filemode, comptype = mode.split(":", 1)
1027 filemode = filemode or "r"
1028 comptype = comptype or "tar"
1029
1030 # Select the *open() function according to
1031 # given compression.
1032 if comptype in cls.OPEN_METH:
1033 func = getattr(cls, cls.OPEN_METH[comptype])
1034 else:
1035 raise CompressionError, "unknown compression type %r" % comptype
1036 return func(name, filemode, fileobj)
1037
1038 elif "|" in mode:
1039 filemode, comptype = mode.split("|", 1)
1040 filemode = filemode or "r"
1041 comptype = comptype or "tar"
1042
1043 if filemode not in "rw":
1044 raise ValueError, "mode must be 'r' or 'w'"
1045
1046 t = cls(name, filemode,
1047 _Stream(name, filemode, comptype, fileobj, bufsize))
1048 t._extfileobj = False
1049 return t
1050
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001051 elif mode in "aw":
1052 return cls.taropen(name, mode, fileobj)
1053
1054 raise ValueError, "undiscernible mode"
1055
Guido van Rossum75b64e62005-01-16 00:16:11 +00001056 @classmethod
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001057 def taropen(cls, name, mode="r", fileobj=None):
1058 """Open uncompressed tar archive name for reading or writing.
1059 """
1060 if len(mode) > 1 or mode not in "raw":
1061 raise ValueError, "mode must be 'r', 'a' or 'w'"
1062 return cls(name, mode, fileobj)
1063
Guido van Rossum75b64e62005-01-16 00:16:11 +00001064 @classmethod
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001065 def gzopen(cls, name, mode="r", fileobj=None, compresslevel=9):
1066 """Open gzip compressed tar archive name for reading or writing.
1067 Appending is not allowed.
1068 """
1069 if len(mode) > 1 or mode not in "rw":
1070 raise ValueError, "mode must be 'r' or 'w'"
1071
1072 try:
1073 import gzip
Neal Norwitz4ec68242003-04-11 03:05:56 +00001074 gzip.GzipFile
1075 except (ImportError, AttributeError):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001076 raise CompressionError, "gzip module is not available"
1077
1078 pre, ext = os.path.splitext(name)
Martin v. Löwisfaffa152005-08-24 06:43:09 +00001079 pre = os.path.basename(pre)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001080 if ext == ".tgz":
1081 ext = ".tar"
1082 if ext == ".gz":
1083 ext = ""
Martin v. Löwisfaffa152005-08-24 06:43:09 +00001084 tarname = pre + ext
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001085
1086 if fileobj is None:
1087 fileobj = file(name, mode + "b")
1088
Martin v. Löwisfaffa152005-08-24 06:43:09 +00001089 if mode != "r":
1090 name = tarname
1091
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001092 try:
Martin v. Löwisfaffa152005-08-24 06:43:09 +00001093 t = cls.taropen(tarname, mode,
1094 gzip.GzipFile(name, mode, compresslevel, fileobj)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001095 )
1096 except IOError:
1097 raise ReadError, "not a gzip file"
1098 t._extfileobj = False
1099 return t
1100
Guido van Rossum75b64e62005-01-16 00:16:11 +00001101 @classmethod
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001102 def bz2open(cls, name, mode="r", fileobj=None, compresslevel=9):
1103 """Open bzip2 compressed tar archive name for reading or writing.
1104 Appending is not allowed.
1105 """
1106 if len(mode) > 1 or mode not in "rw":
1107 raise ValueError, "mode must be 'r' or 'w'."
1108
1109 try:
1110 import bz2
1111 except ImportError:
1112 raise CompressionError, "bz2 module is not available"
1113
Martin v. Löwisfaffa152005-08-24 06:43:09 +00001114 pre, ext = os.path.splitext(name)
1115 pre = os.path.basename(pre)
1116 if ext == ".tbz2":
1117 ext = ".tar"
1118 if ext == ".bz2":
1119 ext = ""
1120 tarname = pre + ext
1121
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001122 if fileobj is not None:
Georg Brandl49c8f4c2006-05-15 19:30:35 +00001123 fileobj = _BZ2Proxy(fileobj, mode)
1124 else:
1125 fileobj = bz2.BZ2File(name, mode, compresslevel=compresslevel)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001126
1127 try:
Georg Brandl49c8f4c2006-05-15 19:30:35 +00001128 t = cls.taropen(tarname, mode, fileobj)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001129 except IOError:
1130 raise ReadError, "not a bzip2 file"
1131 t._extfileobj = False
1132 return t
1133
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001134 # All *open() methods are registered here.
1135 OPEN_METH = {
1136 "tar": "taropen", # uncompressed tar
1137 "gz": "gzopen", # gzip compressed tar
1138 "bz2": "bz2open" # bzip2 compressed tar
1139 }
1140
1141 #--------------------------------------------------------------------------
1142 # The public methods which TarFile provides:
1143
1144 def close(self):
1145 """Close the TarFile. In write-mode, two finishing zero blocks are
1146 appended to the archive.
1147 """
1148 if self.closed:
1149 return
1150
1151 if self._mode in "aw":
1152 self.fileobj.write(NUL * (BLOCKSIZE * 2))
1153 self.offset += (BLOCKSIZE * 2)
1154 # fill up the end with zero-blocks
1155 # (like option -b20 for tar does)
1156 blocks, remainder = divmod(self.offset, RECORDSIZE)
1157 if remainder > 0:
1158 self.fileobj.write(NUL * (RECORDSIZE - remainder))
1159
1160 if not self._extfileobj:
1161 self.fileobj.close()
1162 self.closed = True
1163
1164 def getmember(self, name):
1165 """Return a TarInfo object for member `name'. If `name' can not be
1166 found in the archive, KeyError is raised. If a member occurs more
1167 than once in the archive, its last occurence is assumed to be the
1168 most up-to-date version.
1169 """
Martin v. Löwisf3c56112004-09-18 09:08:52 +00001170 tarinfo = self._getmember(name)
1171 if tarinfo is None:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001172 raise KeyError, "filename %r not found" % name
Martin v. Löwisf3c56112004-09-18 09:08:52 +00001173 return tarinfo
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001174
1175 def getmembers(self):
1176 """Return the members of the archive as a list of TarInfo objects. The
1177 list has the same order as the members in the archive.
1178 """
1179 self._check()
1180 if not self._loaded: # if we want to obtain a list of
1181 self._load() # all members, we first have to
1182 # scan the whole archive.
1183 return self.members
1184
1185 def getnames(self):
1186 """Return the members of the archive as a list of their names. It has
1187 the same order as the list returned by getmembers().
1188 """
Martin v. Löwisf3c56112004-09-18 09:08:52 +00001189 return [tarinfo.name for tarinfo in self.getmembers()]
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001190
1191 def gettarinfo(self, name=None, arcname=None, fileobj=None):
1192 """Create a TarInfo object for either the file `name' or the file
1193 object `fileobj' (using os.fstat on its file descriptor). You can
1194 modify some of the TarInfo's attributes before you add it using
1195 addfile(). If given, `arcname' specifies an alternative name for the
1196 file in the archive.
1197 """
1198 self._check("aw")
1199
1200 # When fileobj is given, replace name by
1201 # fileobj's real name.
1202 if fileobj is not None:
1203 name = fileobj.name
1204
1205 # Building the name of the member in the archive.
1206 # Backward slashes are converted to forward slashes,
1207 # Absolute paths are turned to relative paths.
1208 if arcname is None:
1209 arcname = name
1210 arcname = normpath(arcname)
1211 drv, arcname = os.path.splitdrive(arcname)
1212 while arcname[0:1] == "/":
1213 arcname = arcname[1:]
1214
1215 # Now, fill the TarInfo object with
1216 # information specific for the file.
1217 tarinfo = TarInfo()
1218
1219 # Use os.stat or os.lstat, depending on platform
1220 # and if symlinks shall be resolved.
1221 if fileobj is None:
1222 if hasattr(os, "lstat") and not self.dereference:
1223 statres = os.lstat(name)
1224 else:
1225 statres = os.stat(name)
1226 else:
1227 statres = os.fstat(fileobj.fileno())
1228 linkname = ""
1229
1230 stmd = statres.st_mode
1231 if stat.S_ISREG(stmd):
1232 inode = (statres.st_ino, statres.st_dev)
Neal Norwitzb0e32e22005-10-20 04:50:13 +00001233 if not self.dereference and \
1234 statres.st_nlink > 1 and inode in self.inodes:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001235 # Is it a hardlink to an already
1236 # archived file?
1237 type = LNKTYPE
1238 linkname = self.inodes[inode]
1239 else:
1240 # The inode is added only if its valid.
1241 # For win32 it is always 0.
1242 type = REGTYPE
1243 if inode[0]:
1244 self.inodes[inode] = arcname
1245 elif stat.S_ISDIR(stmd):
1246 type = DIRTYPE
1247 if arcname[-1:] != "/":
1248 arcname += "/"
1249 elif stat.S_ISFIFO(stmd):
1250 type = FIFOTYPE
1251 elif stat.S_ISLNK(stmd):
1252 type = SYMTYPE
1253 linkname = os.readlink(name)
1254 elif stat.S_ISCHR(stmd):
1255 type = CHRTYPE
1256 elif stat.S_ISBLK(stmd):
1257 type = BLKTYPE
1258 else:
1259 return None
1260
1261 # Fill the TarInfo object with all
1262 # information we can get.
Martin v. Löwis5dbdc592005-08-27 10:07:56 +00001263 tarinfo.name = arcname
1264 tarinfo.mode = stmd
1265 tarinfo.uid = statres.st_uid
1266 tarinfo.gid = statres.st_gid
1267 if stat.S_ISREG(stmd):
Martin v. Löwis61d77e02004-08-20 06:35:46 +00001268 tarinfo.size = statres.st_size
Martin v. Löwis5dbdc592005-08-27 10:07:56 +00001269 else:
1270 tarinfo.size = 0L
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001271 tarinfo.mtime = statres.st_mtime
Martin v. Löwis5dbdc592005-08-27 10:07:56 +00001272 tarinfo.type = type
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001273 tarinfo.linkname = linkname
1274 if pwd:
1275 try:
1276 tarinfo.uname = pwd.getpwuid(tarinfo.uid)[0]
1277 except KeyError:
1278 pass
1279 if grp:
1280 try:
1281 tarinfo.gname = grp.getgrgid(tarinfo.gid)[0]
1282 except KeyError:
1283 pass
1284
1285 if type in (CHRTYPE, BLKTYPE):
1286 if hasattr(os, "major") and hasattr(os, "minor"):
1287 tarinfo.devmajor = os.major(statres.st_rdev)
1288 tarinfo.devminor = os.minor(statres.st_rdev)
1289 return tarinfo
1290
1291 def list(self, verbose=True):
1292 """Print a table of contents to sys.stdout. If `verbose' is False, only
1293 the names of the members are printed. If it is True, an `ls -l'-like
1294 output is produced.
1295 """
1296 self._check()
1297
1298 for tarinfo in self:
1299 if verbose:
1300 print filemode(tarinfo.mode),
1301 print "%s/%s" % (tarinfo.uname or tarinfo.uid,
1302 tarinfo.gname or tarinfo.gid),
1303 if tarinfo.ischr() or tarinfo.isblk():
1304 print "%10s" % ("%d,%d" \
1305 % (tarinfo.devmajor, tarinfo.devminor)),
1306 else:
1307 print "%10d" % tarinfo.size,
1308 print "%d-%02d-%02d %02d:%02d:%02d" \
1309 % time.localtime(tarinfo.mtime)[:6],
1310
1311 print tarinfo.name,
1312
1313 if verbose:
1314 if tarinfo.issym():
1315 print "->", tarinfo.linkname,
1316 if tarinfo.islnk():
1317 print "link to", tarinfo.linkname,
1318 print
1319
1320 def add(self, name, arcname=None, recursive=True):
1321 """Add the file `name' to the archive. `name' may be any type of file
1322 (directory, fifo, symbolic link, etc.). If given, `arcname'
1323 specifies an alternative name for the file in the archive.
1324 Directories are added recursively by default. This can be avoided by
1325 setting `recursive' to False.
1326 """
1327 self._check("aw")
1328
1329 if arcname is None:
1330 arcname = name
1331
1332 # Skip if somebody tries to archive the archive...
Martin v. Löwisfaffa152005-08-24 06:43:09 +00001333 if self.name is not None \
1334 and os.path.abspath(name) == os.path.abspath(self.name):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001335 self._dbg(2, "tarfile: Skipped %r" % name)
1336 return
1337
1338 # Special case: The user wants to add the current
1339 # working directory.
1340 if name == ".":
1341 if recursive:
1342 if arcname == ".":
1343 arcname = ""
1344 for f in os.listdir("."):
1345 self.add(f, os.path.join(arcname, f))
1346 return
1347
1348 self._dbg(1, name)
1349
1350 # Create a TarInfo object from the file.
1351 tarinfo = self.gettarinfo(name, arcname)
1352
1353 if tarinfo is None:
1354 self._dbg(1, "tarfile: Unsupported type %r" % name)
1355 return
1356
1357 # Append the tar header and data to the archive.
1358 if tarinfo.isreg():
1359 f = file(name, "rb")
1360 self.addfile(tarinfo, f)
1361 f.close()
1362
Martin v. Löwis5dbdc592005-08-27 10:07:56 +00001363 elif tarinfo.isdir():
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001364 self.addfile(tarinfo)
1365 if recursive:
1366 for f in os.listdir(name):
1367 self.add(os.path.join(name, f), os.path.join(arcname, f))
1368
Martin v. Löwis5dbdc592005-08-27 10:07:56 +00001369 else:
1370 self.addfile(tarinfo)
1371
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001372 def addfile(self, tarinfo, fileobj=None):
1373 """Add the TarInfo object `tarinfo' to the archive. If `fileobj' is
1374 given, tarinfo.size bytes are read from it and added to the archive.
1375 You can create TarInfo objects using gettarinfo().
1376 On Windows platforms, `fileobj' should always be opened with mode
1377 'rb' to avoid irritation about the file size.
1378 """
1379 self._check("aw")
1380
1381 tarinfo.name = normpath(tarinfo.name)
1382 if tarinfo.isdir():
1383 # directories should end with '/'
1384 tarinfo.name += "/"
1385
1386 if tarinfo.linkname:
1387 tarinfo.linkname = normpath(tarinfo.linkname)
1388
1389 if tarinfo.size > MAXSIZE_MEMBER:
Neal Norwitzd96d1012004-07-20 22:23:02 +00001390 if self.posix:
1391 raise ValueError, "file is too large (>= 8 GB)"
1392 else:
1393 self._dbg(2, "tarfile: Created GNU tar largefile header")
1394
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001395
1396 if len(tarinfo.linkname) > LENGTH_LINK:
1397 if self.posix:
1398 raise ValueError, "linkname is too long (>%d)" \
1399 % (LENGTH_LINK)
1400 else:
1401 self._create_gnulong(tarinfo.linkname, GNUTYPE_LONGLINK)
1402 tarinfo.linkname = tarinfo.linkname[:LENGTH_LINK -1]
1403 self._dbg(2, "tarfile: Created GNU tar extension LONGLINK")
1404
1405 if len(tarinfo.name) > LENGTH_NAME:
1406 if self.posix:
1407 prefix = tarinfo.name[:LENGTH_PREFIX + 1]
1408 while prefix and prefix[-1] != "/":
Tim Peters2c60f7a2003-01-29 03:49:43 +00001409 prefix = prefix[:-1]
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001410
1411 name = tarinfo.name[len(prefix):]
1412 prefix = prefix[:-1]
1413
1414 if not prefix or len(name) > LENGTH_NAME:
1415 raise ValueError, "name is too long (>%d)" \
1416 % (LENGTH_NAME)
1417
1418 tarinfo.name = name
1419 tarinfo.prefix = prefix
1420 else:
1421 self._create_gnulong(tarinfo.name, GNUTYPE_LONGNAME)
1422 tarinfo.name = tarinfo.name[:LENGTH_NAME - 1]
1423 self._dbg(2, "tarfile: Created GNU tar extension LONGNAME")
1424
Georg Brandl38c6a222006-05-10 16:26:03 +00001425 self.fileobj.write(tarinfo.tobuf(self.posix))
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001426 self.offset += BLOCKSIZE
1427
1428 # If there's data to follow, append it.
1429 if fileobj is not None:
1430 copyfileobj(fileobj, self.fileobj, tarinfo.size)
1431 blocks, remainder = divmod(tarinfo.size, BLOCKSIZE)
1432 if remainder > 0:
1433 self.fileobj.write(NUL * (BLOCKSIZE - remainder))
1434 blocks += 1
1435 self.offset += blocks * BLOCKSIZE
1436
Martin v. Löwisf3c56112004-09-18 09:08:52 +00001437 self.members.append(tarinfo)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001438
Martin v. Löwis00a73e72005-03-04 19:40:34 +00001439 def extractall(self, path=".", members=None):
1440 """Extract all members from the archive to the current working
1441 directory and set owner, modification time and permissions on
1442 directories afterwards. `path' specifies a different directory
1443 to extract to. `members' is optional and must be a subset of the
1444 list returned by getmembers().
1445 """
1446 directories = []
1447
1448 if members is None:
1449 members = self
1450
1451 for tarinfo in members:
1452 if tarinfo.isdir():
1453 # Extract directory with a safe mode, so that
1454 # all files below can be extracted as well.
1455 try:
1456 os.makedirs(os.path.join(path, tarinfo.name), 0777)
1457 except EnvironmentError:
1458 pass
1459 directories.append(tarinfo)
1460 else:
1461 self.extract(tarinfo, path)
1462
1463 # Reverse sort directories.
1464 directories.sort(lambda a, b: cmp(a.name, b.name))
1465 directories.reverse()
1466
1467 # Set correct owner, mtime and filemode on directories.
1468 for tarinfo in directories:
1469 path = os.path.join(path, tarinfo.name)
1470 try:
1471 self.chown(tarinfo, path)
1472 self.utime(tarinfo, path)
1473 self.chmod(tarinfo, path)
1474 except ExtractError, e:
1475 if self.errorlevel > 1:
1476 raise
1477 else:
1478 self._dbg(1, "tarfile: %s" % e)
1479
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001480 def extract(self, member, path=""):
1481 """Extract a member from the archive to the current working directory,
1482 using its full name. Its file information is extracted as accurately
1483 as possible. `member' may be a filename or a TarInfo object. You can
1484 specify a different directory using `path'.
1485 """
1486 self._check("r")
1487
1488 if isinstance(member, TarInfo):
1489 tarinfo = member
1490 else:
1491 tarinfo = self.getmember(member)
1492
Neal Norwitza4f651a2004-07-20 22:07:44 +00001493 # Prepare the link target for makelink().
1494 if tarinfo.islnk():
1495 tarinfo._link_target = os.path.join(path, tarinfo.linkname)
1496
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001497 try:
1498 self._extract_member(tarinfo, os.path.join(path, tarinfo.name))
1499 except EnvironmentError, e:
1500 if self.errorlevel > 0:
1501 raise
1502 else:
1503 if e.filename is None:
1504 self._dbg(1, "tarfile: %s" % e.strerror)
1505 else:
1506 self._dbg(1, "tarfile: %s %r" % (e.strerror, e.filename))
1507 except ExtractError, e:
1508 if self.errorlevel > 1:
1509 raise
1510 else:
1511 self._dbg(1, "tarfile: %s" % e)
1512
1513 def extractfile(self, member):
1514 """Extract a member from the archive as a file object. `member' may be
1515 a filename or a TarInfo object. If `member' is a regular file, a
1516 file-like object is returned. If `member' is a link, a file-like
1517 object is constructed from the link's target. If `member' is none of
1518 the above, None is returned.
1519 The file-like object is read-only and provides the following
1520 methods: read(), readline(), readlines(), seek() and tell()
1521 """
1522 self._check("r")
1523
1524 if isinstance(member, TarInfo):
1525 tarinfo = member
1526 else:
1527 tarinfo = self.getmember(member)
1528
1529 if tarinfo.isreg():
1530 return self.fileobject(self, tarinfo)
1531
1532 elif tarinfo.type not in SUPPORTED_TYPES:
1533 # If a member's type is unknown, it is treated as a
1534 # regular file.
1535 return self.fileobject(self, tarinfo)
1536
1537 elif tarinfo.islnk() or tarinfo.issym():
1538 if isinstance(self.fileobj, _Stream):
1539 # A small but ugly workaround for the case that someone tries
1540 # to extract a (sym)link as a file-object from a non-seekable
1541 # stream of tar blocks.
1542 raise StreamError, "cannot extract (sym)link as file object"
1543 else:
Georg Brandl7eb4b7d2005-07-22 21:49:32 +00001544 # A (sym)link's file object is its target's file object.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001545 return self.extractfile(self._getmember(tarinfo.linkname,
1546 tarinfo))
1547 else:
1548 # If there's no data associated with the member (directory, chrdev,
1549 # blkdev, etc.), return None instead of a file object.
1550 return None
1551
1552 def _extract_member(self, tarinfo, targetpath):
1553 """Extract the TarInfo object tarinfo to a physical
1554 file called targetpath.
1555 """
1556 # Fetch the TarInfo object for the given name
1557 # and build the destination pathname, replacing
1558 # forward slashes to platform specific separators.
1559 if targetpath[-1:] == "/":
1560 targetpath = targetpath[:-1]
1561 targetpath = os.path.normpath(targetpath)
1562
1563 # Create all upper directories.
1564 upperdirs = os.path.dirname(targetpath)
1565 if upperdirs and not os.path.exists(upperdirs):
1566 ti = TarInfo()
1567 ti.name = upperdirs
1568 ti.type = DIRTYPE
1569 ti.mode = 0777
1570 ti.mtime = tarinfo.mtime
1571 ti.uid = tarinfo.uid
1572 ti.gid = tarinfo.gid
1573 ti.uname = tarinfo.uname
1574 ti.gname = tarinfo.gname
1575 try:
1576 self._extract_member(ti, ti.name)
1577 except:
1578 pass
1579
1580 if tarinfo.islnk() or tarinfo.issym():
1581 self._dbg(1, "%s -> %s" % (tarinfo.name, tarinfo.linkname))
1582 else:
1583 self._dbg(1, tarinfo.name)
1584
1585 if tarinfo.isreg():
1586 self.makefile(tarinfo, targetpath)
1587 elif tarinfo.isdir():
1588 self.makedir(tarinfo, targetpath)
1589 elif tarinfo.isfifo():
1590 self.makefifo(tarinfo, targetpath)
1591 elif tarinfo.ischr() or tarinfo.isblk():
1592 self.makedev(tarinfo, targetpath)
1593 elif tarinfo.islnk() or tarinfo.issym():
1594 self.makelink(tarinfo, targetpath)
1595 elif tarinfo.type not in SUPPORTED_TYPES:
1596 self.makeunknown(tarinfo, targetpath)
1597 else:
1598 self.makefile(tarinfo, targetpath)
1599
1600 self.chown(tarinfo, targetpath)
1601 if not tarinfo.issym():
1602 self.chmod(tarinfo, targetpath)
1603 self.utime(tarinfo, targetpath)
1604
1605 #--------------------------------------------------------------------------
1606 # Below are the different file methods. They are called via
1607 # _extract_member() when extract() is called. They can be replaced in a
1608 # subclass to implement other functionality.
1609
1610 def makedir(self, tarinfo, targetpath):
1611 """Make a directory called targetpath.
1612 """
1613 try:
1614 os.mkdir(targetpath)
1615 except EnvironmentError, e:
1616 if e.errno != errno.EEXIST:
1617 raise
1618
1619 def makefile(self, tarinfo, targetpath):
1620 """Make a file called targetpath.
1621 """
1622 source = self.extractfile(tarinfo)
1623 target = file(targetpath, "wb")
1624 copyfileobj(source, target)
1625 source.close()
1626 target.close()
1627
1628 def makeunknown(self, tarinfo, targetpath):
1629 """Make a file from a TarInfo object with an unknown type
1630 at targetpath.
1631 """
1632 self.makefile(tarinfo, targetpath)
1633 self._dbg(1, "tarfile: Unknown file type %r, " \
1634 "extracted as regular file." % tarinfo.type)
1635
1636 def makefifo(self, tarinfo, targetpath):
1637 """Make a fifo called targetpath.
1638 """
1639 if hasattr(os, "mkfifo"):
1640 os.mkfifo(targetpath)
1641 else:
1642 raise ExtractError, "fifo not supported by system"
1643
1644 def makedev(self, tarinfo, targetpath):
1645 """Make a character or block device called targetpath.
1646 """
1647 if not hasattr(os, "mknod") or not hasattr(os, "makedev"):
1648 raise ExtractError, "special devices not supported by system"
1649
1650 mode = tarinfo.mode
1651 if tarinfo.isblk():
1652 mode |= stat.S_IFBLK
1653 else:
1654 mode |= stat.S_IFCHR
1655
1656 os.mknod(targetpath, mode,
1657 os.makedev(tarinfo.devmajor, tarinfo.devminor))
1658
1659 def makelink(self, tarinfo, targetpath):
1660 """Make a (symbolic) link called targetpath. If it cannot be created
1661 (platform limitation), we try to make a copy of the referenced file
1662 instead of a link.
1663 """
1664 linkpath = tarinfo.linkname
1665 try:
1666 if tarinfo.issym():
1667 os.symlink(linkpath, targetpath)
1668 else:
Neal Norwitza4f651a2004-07-20 22:07:44 +00001669 # See extract().
1670 os.link(tarinfo._link_target, targetpath)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001671 except AttributeError:
1672 if tarinfo.issym():
1673 linkpath = os.path.join(os.path.dirname(tarinfo.name),
1674 linkpath)
1675 linkpath = normpath(linkpath)
1676
1677 try:
1678 self._extract_member(self.getmember(linkpath), targetpath)
1679 except (EnvironmentError, KeyError), e:
1680 linkpath = os.path.normpath(linkpath)
1681 try:
1682 shutil.copy2(linkpath, targetpath)
1683 except EnvironmentError, e:
1684 raise IOError, "link could not be created"
1685
1686 def chown(self, tarinfo, targetpath):
1687 """Set owner of targetpath according to tarinfo.
1688 """
1689 if pwd and hasattr(os, "geteuid") and os.geteuid() == 0:
1690 # We have to be root to do so.
1691 try:
1692 g = grp.getgrnam(tarinfo.gname)[2]
1693 except KeyError:
1694 try:
1695 g = grp.getgrgid(tarinfo.gid)[2]
1696 except KeyError:
1697 g = os.getgid()
1698 try:
1699 u = pwd.getpwnam(tarinfo.uname)[2]
1700 except KeyError:
1701 try:
1702 u = pwd.getpwuid(tarinfo.uid)[2]
1703 except KeyError:
1704 u = os.getuid()
1705 try:
1706 if tarinfo.issym() and hasattr(os, "lchown"):
1707 os.lchown(targetpath, u, g)
1708 else:
Andrew MacIntyre7970d202003-02-19 12:51:34 +00001709 if sys.platform != "os2emx":
1710 os.chown(targetpath, u, g)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001711 except EnvironmentError, e:
1712 raise ExtractError, "could not change owner"
1713
1714 def chmod(self, tarinfo, targetpath):
1715 """Set file permissions of targetpath according to tarinfo.
1716 """
Jack Jansen834eff62003-03-07 12:47:06 +00001717 if hasattr(os, 'chmod'):
1718 try:
1719 os.chmod(targetpath, tarinfo.mode)
1720 except EnvironmentError, e:
1721 raise ExtractError, "could not change mode"
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001722
1723 def utime(self, tarinfo, targetpath):
1724 """Set modification time of targetpath according to tarinfo.
1725 """
Jack Jansen834eff62003-03-07 12:47:06 +00001726 if not hasattr(os, 'utime'):
Tim Petersf9347782003-03-07 15:36:41 +00001727 return
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001728 if sys.platform == "win32" and tarinfo.isdir():
1729 # According to msdn.microsoft.com, it is an error (EACCES)
1730 # to use utime() on directories.
1731 return
1732 try:
1733 os.utime(targetpath, (tarinfo.mtime, tarinfo.mtime))
1734 except EnvironmentError, e:
1735 raise ExtractError, "could not change modification time"
1736
1737 #--------------------------------------------------------------------------
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001738 def next(self):
1739 """Return the next member of the archive as a TarInfo object, when
1740 TarFile is opened for reading. Return None if there is no more
1741 available.
1742 """
1743 self._check("ra")
1744 if self.firstmember is not None:
1745 m = self.firstmember
1746 self.firstmember = None
1747 return m
1748
1749 # Read the next block.
Andrew M. Kuchling864bba12004-07-10 22:02:11 +00001750 self.fileobj.seek(self.offset)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001751 while True:
1752 buf = self.fileobj.read(BLOCKSIZE)
1753 if not buf:
1754 return None
Georg Brandl38c6a222006-05-10 16:26:03 +00001755
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001756 try:
1757 tarinfo = TarInfo.frombuf(buf)
Georg Brandl38c6a222006-05-10 16:26:03 +00001758
1759 # Set the TarInfo object's offset to the current position of the
1760 # TarFile and set self.offset to the position where the data blocks
1761 # should begin.
1762 tarinfo.offset = self.offset
1763 self.offset += BLOCKSIZE
1764
1765 tarinfo = self.proc_member(tarinfo)
1766
1767 except ValueError, e:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001768 if self.ignore_zeros:
Georg Brandl38c6a222006-05-10 16:26:03 +00001769 self._dbg(2, "0x%X: %s" % (self.offset, e))
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001770 self.offset += BLOCKSIZE
1771 continue
1772 else:
Andrew M. Kuchling864bba12004-07-10 22:02:11 +00001773 if self.offset == 0:
Georg Brandl38c6a222006-05-10 16:26:03 +00001774 raise ReadError, str(e)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001775 return None
1776 break
1777
Georg Brandl38c6a222006-05-10 16:26:03 +00001778 # Some old tar programs represent a directory as a regular
1779 # file with a trailing slash.
1780 if tarinfo.isreg() and tarinfo.name.endswith("/"):
1781 tarinfo.type = DIRTYPE
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001782
Georg Brandl38c6a222006-05-10 16:26:03 +00001783 # The prefix field is used for filenames > 100 in
1784 # the POSIX standard.
1785 # name = prefix + '/' + name
1786 tarinfo.name = normpath(os.path.join(nts(tarinfo.prefix), tarinfo.name))
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001787
Georg Brandl38c6a222006-05-10 16:26:03 +00001788 # Directory names should have a '/' at the end.
1789 if tarinfo.isdir():
1790 tarinfo.name += "/"
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001791
Martin v. Löwisf3c56112004-09-18 09:08:52 +00001792 self.members.append(tarinfo)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001793 return tarinfo
1794
1795 #--------------------------------------------------------------------------
Georg Brandl38c6a222006-05-10 16:26:03 +00001796 # The following are methods that are called depending on the type of a
1797 # member. The entry point is proc_member() which is called with a TarInfo
1798 # object created from the header block from the current offset. The
1799 # proc_member() method can be overridden in a subclass to add custom
1800 # proc_*() methods. A proc_*() method MUST implement the following
1801 # operations:
1802 # 1. Set tarinfo.offset_data to the position where the data blocks begin,
1803 # if there is data that follows.
1804 # 2. Set self.offset to the position where the next member's header will
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001805 # begin.
Georg Brandl38c6a222006-05-10 16:26:03 +00001806 # 3. Return tarinfo or another valid TarInfo object.
1807 def proc_member(self, tarinfo):
1808 """Choose the right processing method for tarinfo depending
1809 on its type and call it.
1810 """
1811 if tarinfo.type in (GNUTYPE_LONGNAME, GNUTYPE_LONGLINK):
1812 return self.proc_gnulong(tarinfo)
1813 elif tarinfo.type == GNUTYPE_SPARSE:
1814 return self.proc_sparse(tarinfo)
1815 else:
1816 return self.proc_builtin(tarinfo)
1817
1818 def proc_builtin(self, tarinfo):
1819 """Process a builtin type member or an unknown member
1820 which will be treated as a regular file.
1821 """
1822 tarinfo.offset_data = self.offset
1823 if tarinfo.isreg() or tarinfo.type not in SUPPORTED_TYPES:
1824 # Skip the following data blocks.
1825 self.offset += self._block(tarinfo.size)
1826 return tarinfo
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001827
1828 def proc_gnulong(self, tarinfo):
Georg Brandl38c6a222006-05-10 16:26:03 +00001829 """Process the blocks that hold a GNU longname
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001830 or longlink member.
1831 """
1832 buf = ""
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001833 count = tarinfo.size
1834 while count > 0:
1835 block = self.fileobj.read(BLOCKSIZE)
1836 buf += block
1837 self.offset += BLOCKSIZE
1838 count -= BLOCKSIZE
1839
Georg Brandl38c6a222006-05-10 16:26:03 +00001840 # Fetch the next header and process it.
1841 b = self.fileobj.read(BLOCKSIZE)
1842 t = TarInfo.frombuf(b)
1843 t.offset = self.offset
1844 self.offset += BLOCKSIZE
1845 next = self.proc_member(t)
Andrew M. Kuchling864bba12004-07-10 22:02:11 +00001846
Georg Brandl38c6a222006-05-10 16:26:03 +00001847 # Patch the TarInfo object from the next header with
1848 # the longname information.
Andrew M. Kuchling864bba12004-07-10 22:02:11 +00001849 next.offset = tarinfo.offset
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001850 if tarinfo.type == GNUTYPE_LONGNAME:
Andrew M. Kuchling864bba12004-07-10 22:02:11 +00001851 next.name = nts(buf)
1852 elif tarinfo.type == GNUTYPE_LONGLINK:
1853 next.linkname = nts(buf)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001854
Andrew M. Kuchling864bba12004-07-10 22:02:11 +00001855 return next
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001856
1857 def proc_sparse(self, tarinfo):
Georg Brandl38c6a222006-05-10 16:26:03 +00001858 """Process a GNU sparse header plus extra headers.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001859 """
Georg Brandl38c6a222006-05-10 16:26:03 +00001860 buf = tarinfo.buf
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001861 sp = _ringbuffer()
1862 pos = 386
1863 lastpos = 0L
1864 realpos = 0L
1865 # There are 4 possible sparse structs in the
1866 # first header.
1867 for i in xrange(4):
1868 try:
Georg Brandl38c6a222006-05-10 16:26:03 +00001869 offset = nti(buf[pos:pos + 12])
1870 numbytes = nti(buf[pos + 12:pos + 24])
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001871 except ValueError:
1872 break
1873 if offset > lastpos:
1874 sp.append(_hole(lastpos, offset - lastpos))
1875 sp.append(_data(offset, numbytes, realpos))
1876 realpos += numbytes
1877 lastpos = offset + numbytes
1878 pos += 24
1879
1880 isextended = ord(buf[482])
Georg Brandl38c6a222006-05-10 16:26:03 +00001881 origsize = nti(buf[483:495])
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001882
1883 # If the isextended flag is given,
1884 # there are extra headers to process.
1885 while isextended == 1:
1886 buf = self.fileobj.read(BLOCKSIZE)
1887 self.offset += BLOCKSIZE
1888 pos = 0
1889 for i in xrange(21):
1890 try:
Georg Brandl38c6a222006-05-10 16:26:03 +00001891 offset = nti(buf[pos:pos + 12])
1892 numbytes = nti(buf[pos + 12:pos + 24])
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001893 except ValueError:
1894 break
1895 if offset > lastpos:
1896 sp.append(_hole(lastpos, offset - lastpos))
1897 sp.append(_data(offset, numbytes, realpos))
1898 realpos += numbytes
1899 lastpos = offset + numbytes
1900 pos += 24
1901 isextended = ord(buf[504])
1902
1903 if lastpos < origsize:
1904 sp.append(_hole(lastpos, origsize - lastpos))
1905
1906 tarinfo.sparse = sp
1907
1908 tarinfo.offset_data = self.offset
1909 self.offset += self._block(tarinfo.size)
1910 tarinfo.size = origsize
Andrew M. Kuchling864bba12004-07-10 22:02:11 +00001911
Georg Brandl38c6a222006-05-10 16:26:03 +00001912 # Clear the prefix field so that it is not used
1913 # as a pathname in next().
1914 tarinfo.prefix = ""
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001915
Georg Brandl38c6a222006-05-10 16:26:03 +00001916 return tarinfo
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001917
1918 #--------------------------------------------------------------------------
1919 # Little helper methods:
1920
1921 def _block(self, count):
1922 """Round up a byte count by BLOCKSIZE and return it,
1923 e.g. _block(834) => 1024.
1924 """
1925 blocks, remainder = divmod(count, BLOCKSIZE)
1926 if remainder:
1927 blocks += 1
1928 return blocks * BLOCKSIZE
1929
1930 def _getmember(self, name, tarinfo=None):
1931 """Find an archive member by name from bottom to top.
1932 If tarinfo is given, it is used as the starting point.
1933 """
Martin v. Löwisf3c56112004-09-18 09:08:52 +00001934 # Ensure that all members have been loaded.
1935 members = self.getmembers()
1936
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001937 if tarinfo is None:
Martin v. Löwisf3c56112004-09-18 09:08:52 +00001938 end = len(members)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001939 else:
Martin v. Löwisf3c56112004-09-18 09:08:52 +00001940 end = members.index(tarinfo)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001941
1942 for i in xrange(end - 1, -1, -1):
Martin v. Löwisf3c56112004-09-18 09:08:52 +00001943 if name == members[i].name:
1944 return members[i]
Andrew M. Kuchling864bba12004-07-10 22:02:11 +00001945
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001946 def _load(self):
1947 """Read through the entire archive file and look for readable
1948 members.
1949 """
1950 while True:
1951 tarinfo = self.next()
1952 if tarinfo is None:
1953 break
1954 self._loaded = True
1955
1956 def _check(self, mode=None):
1957 """Check if TarFile is still open, and if the operation's mode
1958 corresponds to TarFile's mode.
1959 """
1960 if self.closed:
1961 raise IOError, "%s is closed" % self.__class__.__name__
1962 if mode is not None and self._mode not in mode:
1963 raise IOError, "bad operation for mode %r" % self._mode
1964
1965 def __iter__(self):
1966 """Provide an iterator object.
1967 """
1968 if self._loaded:
1969 return iter(self.members)
1970 else:
1971 return TarIter(self)
1972
1973 def _create_gnulong(self, name, type):
1974 """Write a GNU longname/longlink member to the TarFile.
1975 It consists of an extended tar header, with the length
1976 of the longname as size, followed by data blocks,
1977 which contain the longname as a null terminated string.
1978 """
Neal Norwitz0662f8a2004-07-20 21:54:18 +00001979 name += NUL
1980
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001981 tarinfo = TarInfo()
1982 tarinfo.name = "././@LongLink"
1983 tarinfo.type = type
1984 tarinfo.mode = 0
1985 tarinfo.size = len(name)
1986
1987 # write extended header
1988 self.fileobj.write(tarinfo.tobuf())
Neal Norwitz0662f8a2004-07-20 21:54:18 +00001989 self.offset += BLOCKSIZE
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001990 # write name blocks
1991 self.fileobj.write(name)
1992 blocks, remainder = divmod(tarinfo.size, BLOCKSIZE)
1993 if remainder > 0:
1994 self.fileobj.write(NUL * (BLOCKSIZE - remainder))
1995 blocks += 1
1996 self.offset += blocks * BLOCKSIZE
1997
1998 def _dbg(self, level, msg):
1999 """Write debugging output to sys.stderr.
2000 """
2001 if level <= self.debug:
2002 print >> sys.stderr, msg
2003# class TarFile
2004
2005class TarIter:
2006 """Iterator Class.
2007
2008 for tarinfo in TarFile(...):
2009 suite...
2010 """
2011
2012 def __init__(self, tarfile):
2013 """Construct a TarIter object.
2014 """
2015 self.tarfile = tarfile
Martin v. Löwis637431b2005-03-03 23:12:42 +00002016 self.index = 0
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002017 def __iter__(self):
2018 """Return iterator object.
2019 """
2020 return self
2021 def next(self):
2022 """Return the next item using TarFile's next() method.
2023 When all members have been read, set TarFile as _loaded.
2024 """
Martin v. Löwis637431b2005-03-03 23:12:42 +00002025 # Fix for SF #1100429: Under rare circumstances it can
2026 # happen that getmembers() is called during iteration,
2027 # which will cause TarIter to stop prematurely.
2028 if not self.tarfile._loaded:
2029 tarinfo = self.tarfile.next()
2030 if not tarinfo:
2031 self.tarfile._loaded = True
2032 raise StopIteration
2033 else:
2034 try:
2035 tarinfo = self.tarfile.members[self.index]
2036 except IndexError:
2037 raise StopIteration
2038 self.index += 1
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002039 return tarinfo
2040
2041# Helper classes for sparse file support
2042class _section:
2043 """Base class for _data and _hole.
2044 """
2045 def __init__(self, offset, size):
2046 self.offset = offset
2047 self.size = size
2048 def __contains__(self, offset):
2049 return self.offset <= offset < self.offset + self.size
2050
2051class _data(_section):
2052 """Represent a data section in a sparse file.
2053 """
2054 def __init__(self, offset, size, realpos):
2055 _section.__init__(self, offset, size)
2056 self.realpos = realpos
2057
2058class _hole(_section):
2059 """Represent a hole section in a sparse file.
2060 """
2061 pass
2062
2063class _ringbuffer(list):
2064 """Ringbuffer class which increases performance
2065 over a regular list.
2066 """
2067 def __init__(self):
2068 self.idx = 0
2069 def find(self, offset):
2070 idx = self.idx
2071 while True:
2072 item = self[idx]
2073 if offset in item:
2074 break
2075 idx += 1
2076 if idx == len(self):
2077 idx = 0
2078 if idx == self.idx:
2079 # End of File
2080 return None
2081 self.idx = idx
2082 return item
2083
2084#---------------------------------------------
2085# zipfile compatible TarFile class
2086#---------------------------------------------
2087TAR_PLAIN = 0 # zipfile.ZIP_STORED
2088TAR_GZIPPED = 8 # zipfile.ZIP_DEFLATED
2089class TarFileCompat:
2090 """TarFile class compatible with standard module zipfile's
2091 ZipFile class.
2092 """
2093 def __init__(self, file, mode="r", compression=TAR_PLAIN):
2094 if compression == TAR_PLAIN:
2095 self.tarfile = TarFile.taropen(file, mode)
2096 elif compression == TAR_GZIPPED:
2097 self.tarfile = TarFile.gzopen(file, mode)
2098 else:
2099 raise ValueError, "unknown compression constant"
2100 if mode[0:1] == "r":
2101 members = self.tarfile.getmembers()
Raymond Hettingera1d09e22005-09-11 16:34:05 +00002102 for m in members:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002103 m.filename = m.name
2104 m.file_size = m.size
2105 m.date_time = time.gmtime(m.mtime)[:6]
2106 def namelist(self):
2107 return map(lambda m: m.name, self.infolist())
2108 def infolist(self):
2109 return filter(lambda m: m.type in REGULAR_TYPES,
2110 self.tarfile.getmembers())
2111 def printdir(self):
2112 self.tarfile.list()
2113 def testzip(self):
2114 return
2115 def getinfo(self, name):
2116 return self.tarfile.getmember(name)
2117 def read(self, name):
2118 return self.tarfile.extractfile(self.tarfile.getmember(name)).read()
2119 def write(self, filename, arcname=None, compress_type=None):
2120 self.tarfile.add(filename, arcname)
2121 def writestr(self, zinfo, bytes):
Raymond Hettingera6172712004-12-31 19:15:26 +00002122 try:
2123 from cStringIO import StringIO
2124 except ImportError:
2125 from StringIO import StringIO
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002126 import calendar
2127 zinfo.name = zinfo.filename
2128 zinfo.size = zinfo.file_size
2129 zinfo.mtime = calendar.timegm(zinfo.date_time)
Raymond Hettingera6172712004-12-31 19:15:26 +00002130 self.tarfile.addfile(zinfo, StringIO(bytes))
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002131 def close(self):
2132 self.tarfile.close()
2133#class TarFileCompat
2134
2135#--------------------
2136# exported functions
2137#--------------------
2138def is_tarfile(name):
2139 """Return True if name points to a tar archive that we
2140 are able to handle, else return False.
2141 """
2142 try:
2143 t = open(name)
2144 t.close()
2145 return True
2146 except TarError:
2147 return False
2148
2149open = TarFile.open