blob: 9f42a377f88f9510e395a4d8ec7beef12110f240 [file] [log] [blame]
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001#!/usr/bin/env python
2# -*- coding: iso-8859-1 -*-
3#-------------------------------------------------------------------
4# tarfile.py
5#-------------------------------------------------------------------
6# Copyright (C) 2002 Lars Gustäbel <lars@gustaebel.de>
7# All rights reserved.
8#
9# Permission is hereby granted, free of charge, to any person
10# obtaining a copy of this software and associated documentation
11# files (the "Software"), to deal in the Software without
12# restriction, including without limitation the rights to use,
13# copy, modify, merge, publish, distribute, sublicense, and/or sell
14# copies of the Software, and to permit persons to whom the
15# Software is furnished to do so, subject to the following
16# conditions:
17#
18# The above copyright notice and this permission notice shall be
19# included in all copies or substantial portions of the Software.
20#
21# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
22# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
23# OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
24# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
25# HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
26# WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
27# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
28# OTHER DEALINGS IN THE SOFTWARE.
29#
30"""Read from and write to tar format archives.
31"""
32
33__version__ = "$Revision$"
34# $Source$
35
Georg Brandl38c6a222006-05-10 16:26:03 +000036version = "0.8.0"
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +000037__author__ = "Lars Gustäbel (lars@gustaebel.de)"
38__date__ = "$Date$"
39__cvsid__ = "$Id$"
40__credits__ = "Gustavo Niemeyer, Niels Gustäbel, Richard Townsend."
41
42#---------
43# Imports
44#---------
45import sys
46import os
47import shutil
48import stat
49import errno
50import time
51import struct
52
Jack Jansencfc49022003-03-07 13:37:32 +000053if sys.platform == 'mac':
54 # This module needs work for MacOS9, especially in the area of pathname
55 # handling. In many places it is assumed a simple substitution of / by the
56 # local os.path.sep is good enough to convert pathnames, but this does not
57 # work with the mac rooted:path:name versus :nonrooted:path:name syntax
58 raise ImportError, "tarfile does not work for platform==mac"
59
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +000060try:
61 import grp, pwd
62except ImportError:
63 grp = pwd = None
64
65# from tarfile import *
66__all__ = ["TarFile", "TarInfo", "is_tarfile", "TarError"]
67
68#---------------------------------------------------------
69# tar constants
70#---------------------------------------------------------
71NUL = "\0" # the null character
72BLOCKSIZE = 512 # length of processing blocks
73RECORDSIZE = BLOCKSIZE * 20 # length of records
74MAGIC = "ustar" # magic tar string
75VERSION = "00" # version number
76
77LENGTH_NAME = 100 # maximum length of a filename
78LENGTH_LINK = 100 # maximum length of a linkname
79LENGTH_PREFIX = 155 # maximum length of the prefix field
80MAXSIZE_MEMBER = 077777777777L # maximum size of a file (11 octal digits)
81
82REGTYPE = "0" # regular file
83AREGTYPE = "\0" # regular file
84LNKTYPE = "1" # link (inside tarfile)
85SYMTYPE = "2" # symbolic link
86CHRTYPE = "3" # character special device
87BLKTYPE = "4" # block special device
88DIRTYPE = "5" # directory
89FIFOTYPE = "6" # fifo special device
90CONTTYPE = "7" # contiguous file
91
92GNUTYPE_LONGNAME = "L" # GNU tar extension for longnames
93GNUTYPE_LONGLINK = "K" # GNU tar extension for longlink
94GNUTYPE_SPARSE = "S" # GNU tar extension for sparse file
95
96#---------------------------------------------------------
97# tarfile constants
98#---------------------------------------------------------
99SUPPORTED_TYPES = (REGTYPE, AREGTYPE, LNKTYPE, # file types that tarfile
100 SYMTYPE, DIRTYPE, FIFOTYPE, # can cope with.
101 CONTTYPE, CHRTYPE, BLKTYPE,
102 GNUTYPE_LONGNAME, GNUTYPE_LONGLINK,
103 GNUTYPE_SPARSE)
104
105REGULAR_TYPES = (REGTYPE, AREGTYPE, # file types that somehow
106 CONTTYPE, GNUTYPE_SPARSE) # represent regular files
107
108#---------------------------------------------------------
109# Bits used in the mode field, values in octal.
110#---------------------------------------------------------
111S_IFLNK = 0120000 # symbolic link
112S_IFREG = 0100000 # regular file
113S_IFBLK = 0060000 # block device
114S_IFDIR = 0040000 # directory
115S_IFCHR = 0020000 # character device
116S_IFIFO = 0010000 # fifo
117
118TSUID = 04000 # set UID on execution
119TSGID = 02000 # set GID on execution
120TSVTX = 01000 # reserved
121
122TUREAD = 0400 # read by owner
123TUWRITE = 0200 # write by owner
124TUEXEC = 0100 # execute/search by owner
125TGREAD = 0040 # read by group
126TGWRITE = 0020 # write by group
127TGEXEC = 0010 # execute/search by group
128TOREAD = 0004 # read by other
129TOWRITE = 0002 # write by other
130TOEXEC = 0001 # execute/search by other
131
132#---------------------------------------------------------
133# Some useful functions
134#---------------------------------------------------------
135def nts(s):
136 """Convert a null-terminated string buffer to a python string.
137 """
Andrew M. Kuchling864bba12004-07-10 22:02:11 +0000138 return s.rstrip(NUL)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000139
Georg Brandl38c6a222006-05-10 16:26:03 +0000140def stn(s, length):
141 """Convert a python string to a null-terminated string buffer.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000142 """
Georg Brandl38c6a222006-05-10 16:26:03 +0000143 return struct.pack("%ds" % (length - 1), s) + NUL
144
145def nti(s):
146 """Convert a number field to a python number.
147 """
148 # There are two possible encodings for a number field, see
149 # itn() below.
150 if s[0] != chr(0200):
151 n = int(s.rstrip(NUL) or "0", 8)
152 else:
153 n = 0L
154 for i in xrange(len(s) - 1):
155 n <<= 8
156 n += ord(s[i + 1])
157 return n
158
159def itn(n, digits=8, posix=False):
160 """Convert a python number to a number field.
161 """
162 # POSIX 1003.1-1988 requires numbers to be encoded as a string of
163 # octal digits followed by a null-byte, this allows values up to
164 # (8**(digits-1))-1. GNU tar allows storing numbers greater than
165 # that if necessary. A leading 0200 byte indicates this particular
166 # encoding, the following digits-1 bytes are a big-endian
167 # representation. This allows values up to (256**(digits-1))-1.
168 if 0 <= n < 8 ** (digits - 1):
169 s = "%0*o" % (digits - 1, n) + NUL
170 else:
171 if posix:
Georg Brandle4751e32006-05-18 06:11:19 +0000172 raise ValueError("overflow in number field")
Georg Brandl38c6a222006-05-10 16:26:03 +0000173
174 if n < 0:
175 # XXX We mimic GNU tar's behaviour with negative numbers,
176 # this could raise OverflowError.
177 n = struct.unpack("L", struct.pack("l", n))[0]
178
179 s = ""
180 for i in xrange(digits - 1):
181 s = chr(n & 0377) + s
182 n >>= 8
183 s = chr(0200) + s
184 return s
185
186def calc_chksums(buf):
187 """Calculate the checksum for a member's header by summing up all
188 characters except for the chksum field which is treated as if
189 it was filled with spaces. According to the GNU tar sources,
190 some tars (Sun and NeXT) calculate chksum with signed char,
191 which will be different if there are chars in the buffer with
192 the high bit set. So we calculate two checksums, unsigned and
193 signed.
194 """
195 unsigned_chksum = 256 + sum(struct.unpack("148B", buf[:148]) + struct.unpack("356B", buf[156:512]))
196 signed_chksum = 256 + sum(struct.unpack("148b", buf[:148]) + struct.unpack("356b", buf[156:512]))
197 return unsigned_chksum, signed_chksum
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000198
199def copyfileobj(src, dst, length=None):
200 """Copy length bytes from fileobj src to fileobj dst.
201 If length is None, copy the entire content.
202 """
203 if length == 0:
204 return
205 if length is None:
206 shutil.copyfileobj(src, dst)
207 return
208
209 BUFSIZE = 16 * 1024
210 blocks, remainder = divmod(length, BUFSIZE)
211 for b in xrange(blocks):
212 buf = src.read(BUFSIZE)
213 if len(buf) < BUFSIZE:
Georg Brandle4751e32006-05-18 06:11:19 +0000214 raise IOError("end of file reached")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000215 dst.write(buf)
216
217 if remainder != 0:
218 buf = src.read(remainder)
219 if len(buf) < remainder:
Georg Brandle4751e32006-05-18 06:11:19 +0000220 raise IOError("end of file reached")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000221 dst.write(buf)
222 return
223
224filemode_table = (
Andrew M. Kuchling8bc462f2004-10-20 11:48:42 +0000225 ((S_IFLNK, "l"),
226 (S_IFREG, "-"),
227 (S_IFBLK, "b"),
228 (S_IFDIR, "d"),
229 (S_IFCHR, "c"),
230 (S_IFIFO, "p")),
231
232 ((TUREAD, "r"),),
233 ((TUWRITE, "w"),),
234 ((TUEXEC|TSUID, "s"),
235 (TSUID, "S"),
236 (TUEXEC, "x")),
237
238 ((TGREAD, "r"),),
239 ((TGWRITE, "w"),),
240 ((TGEXEC|TSGID, "s"),
241 (TSGID, "S"),
242 (TGEXEC, "x")),
243
244 ((TOREAD, "r"),),
245 ((TOWRITE, "w"),),
246 ((TOEXEC|TSVTX, "t"),
247 (TSVTX, "T"),
248 (TOEXEC, "x"))
249)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000250
251def filemode(mode):
252 """Convert a file's mode to a string of the form
253 -rwxrwxrwx.
254 Used by TarFile.list()
255 """
Andrew M. Kuchling8bc462f2004-10-20 11:48:42 +0000256 perm = []
257 for table in filemode_table:
258 for bit, char in table:
259 if mode & bit == bit:
260 perm.append(char)
261 break
262 else:
263 perm.append("-")
264 return "".join(perm)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000265
266if os.sep != "/":
267 normpath = lambda path: os.path.normpath(path).replace(os.sep, "/")
268else:
269 normpath = os.path.normpath
270
271class TarError(Exception):
272 """Base exception."""
273 pass
274class ExtractError(TarError):
275 """General exception for extract errors."""
276 pass
277class ReadError(TarError):
278 """Exception for unreadble tar archives."""
279 pass
280class CompressionError(TarError):
281 """Exception for unavailable compression methods."""
282 pass
283class StreamError(TarError):
284 """Exception for unsupported operations on stream-like TarFiles."""
285 pass
286
287#---------------------------
288# internal stream interface
289#---------------------------
290class _LowLevelFile:
291 """Low-level file object. Supports reading and writing.
292 It is used instead of a regular file object for streaming
293 access.
294 """
295
296 def __init__(self, name, mode):
297 mode = {
298 "r": os.O_RDONLY,
299 "w": os.O_WRONLY | os.O_CREAT | os.O_TRUNC,
300 }[mode]
301 if hasattr(os, "O_BINARY"):
302 mode |= os.O_BINARY
303 self.fd = os.open(name, mode)
304
305 def close(self):
306 os.close(self.fd)
307
308 def read(self, size):
309 return os.read(self.fd, size)
310
311 def write(self, s):
312 os.write(self.fd, s)
313
314class _Stream:
315 """Class that serves as an adapter between TarFile and
316 a stream-like object. The stream-like object only
317 needs to have a read() or write() method and is accessed
318 blockwise. Use of gzip or bzip2 compression is possible.
319 A stream-like object could be for example: sys.stdin,
320 sys.stdout, a socket, a tape device etc.
321
322 _Stream is intended to be used only internally.
323 """
324
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000325 def __init__(self, name, mode, comptype, fileobj, bufsize):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000326 """Construct a _Stream object.
327 """
328 self._extfileobj = True
329 if fileobj is None:
330 fileobj = _LowLevelFile(name, mode)
331 self._extfileobj = False
332
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000333 if comptype == '*':
334 # Enable transparent compression detection for the
335 # stream interface
336 fileobj = _StreamProxy(fileobj)
337 comptype = fileobj.getcomptype()
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000338
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000339 self.name = name or ""
340 self.mode = mode
341 self.comptype = comptype
342 self.fileobj = fileobj
343 self.bufsize = bufsize
344 self.buf = ""
345 self.pos = 0L
346 self.closed = False
347
348 if comptype == "gz":
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000349 try:
350 import zlib
351 except ImportError:
Georg Brandle4751e32006-05-18 06:11:19 +0000352 raise CompressionError("zlib module is not available")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000353 self.zlib = zlib
354 self.crc = zlib.crc32("")
355 if mode == "r":
356 self._init_read_gz()
357 else:
358 self._init_write_gz()
359
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000360 if comptype == "bz2":
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000361 try:
362 import bz2
363 except ImportError:
Georg Brandle4751e32006-05-18 06:11:19 +0000364 raise CompressionError("bz2 module is not available")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000365 if mode == "r":
366 self.dbuf = ""
367 self.cmp = bz2.BZ2Decompressor()
368 else:
369 self.cmp = bz2.BZ2Compressor()
370
371 def __del__(self):
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000372 if hasattr(self, "closed") and not self.closed:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000373 self.close()
374
375 def _init_write_gz(self):
376 """Initialize for writing with gzip compression.
377 """
378 self.cmp = self.zlib.compressobj(9, self.zlib.DEFLATED,
379 -self.zlib.MAX_WBITS,
380 self.zlib.DEF_MEM_LEVEL,
381 0)
382 timestamp = struct.pack("<L", long(time.time()))
383 self.__write("\037\213\010\010%s\002\377" % timestamp)
384 if self.name.endswith(".gz"):
385 self.name = self.name[:-3]
386 self.__write(self.name + NUL)
387
388 def write(self, s):
389 """Write string s to the stream.
390 """
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000391 if self.comptype == "gz":
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000392 self.crc = self.zlib.crc32(s, self.crc)
393 self.pos += len(s)
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000394 if self.comptype != "tar":
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000395 s = self.cmp.compress(s)
396 self.__write(s)
397
398 def __write(self, s):
399 """Write string s to the stream if a whole new block
400 is ready to be written.
401 """
402 self.buf += s
403 while len(self.buf) > self.bufsize:
404 self.fileobj.write(self.buf[:self.bufsize])
405 self.buf = self.buf[self.bufsize:]
406
407 def close(self):
408 """Close the _Stream object. No operation should be
409 done on it afterwards.
410 """
411 if self.closed:
412 return
413
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000414 if self.mode == "w" and self.comptype != "tar":
Martin v. Löwisc234a522004-08-22 21:28:33 +0000415 self.buf += self.cmp.flush()
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000416
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000417 if self.mode == "w" and self.buf:
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000418 blocks, remainder = divmod(len(self.buf), self.bufsize)
419 if remainder > 0:
420 self.buf += NUL * (self.bufsize - remainder)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000421 self.fileobj.write(self.buf)
422 self.buf = ""
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000423 if self.comptype == "gz":
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000424 self.fileobj.write(struct.pack("<l", self.crc))
Andrew M. Kuchling10a44492003-10-24 17:38:34 +0000425 self.fileobj.write(struct.pack("<L", self.pos & 0xffffFFFFL))
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000426
427 if not self._extfileobj:
428 self.fileobj.close()
429
430 self.closed = True
431
432 def _init_read_gz(self):
433 """Initialize for reading a gzip compressed fileobj.
434 """
435 self.cmp = self.zlib.decompressobj(-self.zlib.MAX_WBITS)
436 self.dbuf = ""
437
438 # taken from gzip.GzipFile with some alterations
439 if self.__read(2) != "\037\213":
Georg Brandle4751e32006-05-18 06:11:19 +0000440 raise ReadError("not a gzip file")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000441 if self.__read(1) != "\010":
Georg Brandle4751e32006-05-18 06:11:19 +0000442 raise CompressionError("unsupported compression method")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000443
444 flag = ord(self.__read(1))
445 self.__read(6)
446
447 if flag & 4:
448 xlen = ord(self.__read(1)) + 256 * ord(self.__read(1))
449 self.read(xlen)
450 if flag & 8:
451 while True:
452 s = self.__read(1)
453 if not s or s == NUL:
454 break
455 if flag & 16:
456 while True:
457 s = self.__read(1)
458 if not s or s == NUL:
459 break
460 if flag & 2:
461 self.__read(2)
462
463 def tell(self):
464 """Return the stream's file pointer position.
465 """
466 return self.pos
467
468 def seek(self, pos=0):
469 """Set the stream's file pointer to pos. Negative seeking
470 is forbidden.
471 """
472 if pos - self.pos >= 0:
473 blocks, remainder = divmod(pos - self.pos, self.bufsize)
474 for i in xrange(blocks):
475 self.read(self.bufsize)
476 self.read(remainder)
477 else:
Georg Brandle4751e32006-05-18 06:11:19 +0000478 raise StreamError("seeking backwards is not allowed")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000479 return self.pos
480
481 def read(self, size=None):
482 """Return the next size number of bytes from the stream.
483 If size is not defined, return all bytes of the stream
484 up to EOF.
485 """
486 if size is None:
487 t = []
488 while True:
489 buf = self._read(self.bufsize)
490 if not buf:
491 break
492 t.append(buf)
493 buf = "".join(t)
494 else:
495 buf = self._read(size)
496 self.pos += len(buf)
497 return buf
498
499 def _read(self, size):
500 """Return size bytes from the stream.
501 """
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000502 if self.comptype == "tar":
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000503 return self.__read(size)
504
505 c = len(self.dbuf)
506 t = [self.dbuf]
507 while c < size:
508 buf = self.__read(self.bufsize)
509 if not buf:
510 break
511 buf = self.cmp.decompress(buf)
512 t.append(buf)
513 c += len(buf)
514 t = "".join(t)
515 self.dbuf = t[size:]
516 return t[:size]
517
518 def __read(self, size):
519 """Return size bytes from stream. If internal buffer is empty,
520 read another block from the stream.
521 """
522 c = len(self.buf)
523 t = [self.buf]
524 while c < size:
525 buf = self.fileobj.read(self.bufsize)
526 if not buf:
527 break
528 t.append(buf)
529 c += len(buf)
530 t = "".join(t)
531 self.buf = t[size:]
532 return t[:size]
533# class _Stream
534
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000535class _StreamProxy(object):
536 """Small proxy class that enables transparent compression
537 detection for the Stream interface (mode 'r|*').
538 """
539
540 def __init__(self, fileobj):
541 self.fileobj = fileobj
542 self.buf = self.fileobj.read(BLOCKSIZE)
543
544 def read(self, size):
545 self.read = self.fileobj.read
546 return self.buf
547
548 def getcomptype(self):
549 if self.buf.startswith("\037\213\010"):
550 return "gz"
551 if self.buf.startswith("BZh91"):
552 return "bz2"
553 return "tar"
554
555 def close(self):
556 self.fileobj.close()
557# class StreamProxy
558
Georg Brandl49c8f4c2006-05-15 19:30:35 +0000559class _BZ2Proxy(object):
560 """Small proxy class that enables external file object
561 support for "r:bz2" and "w:bz2" modes. This is actually
562 a workaround for a limitation in bz2 module's BZ2File
563 class which (unlike gzip.GzipFile) has no support for
564 a file object argument.
565 """
566
567 blocksize = 16 * 1024
568
569 def __init__(self, fileobj, mode):
570 self.fileobj = fileobj
571 self.mode = mode
572 self.init()
573
574 def init(self):
575 import bz2
576 self.pos = 0
577 if self.mode == "r":
578 self.bz2obj = bz2.BZ2Decompressor()
579 self.fileobj.seek(0)
580 self.buf = ""
581 else:
582 self.bz2obj = bz2.BZ2Compressor()
583
584 def read(self, size):
585 b = [self.buf]
586 x = len(self.buf)
587 while x < size:
588 try:
589 raw = self.fileobj.read(self.blocksize)
590 data = self.bz2obj.decompress(raw)
591 b.append(data)
592 except EOFError:
593 break
594 x += len(data)
595 self.buf = "".join(b)
596
597 buf = self.buf[:size]
598 self.buf = self.buf[size:]
599 self.pos += len(buf)
600 return buf
601
602 def seek(self, pos):
603 if pos < self.pos:
604 self.init()
605 self.read(pos - self.pos)
606
607 def tell(self):
608 return self.pos
609
610 def write(self, data):
611 self.pos += len(data)
612 raw = self.bz2obj.compress(data)
613 self.fileobj.write(raw)
614
615 def close(self):
616 if self.mode == "w":
617 raw = self.bz2obj.flush()
618 self.fileobj.write(raw)
619 self.fileobj.close()
620# class _BZ2Proxy
621
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000622#------------------------
623# Extraction file object
624#------------------------
625class ExFileObject(object):
626 """File-like object for reading an archive member.
627 Is returned by TarFile.extractfile(). Support for
628 sparse files included.
629 """
630
631 def __init__(self, tarfile, tarinfo):
632 self.fileobj = tarfile.fileobj
633 self.name = tarinfo.name
634 self.mode = "r"
635 self.closed = False
636 self.offset = tarinfo.offset_data
637 self.size = tarinfo.size
638 self.pos = 0L
639 self.linebuffer = ""
640 if tarinfo.issparse():
641 self.sparse = tarinfo.sparse
642 self.read = self._readsparse
643 else:
644 self.read = self._readnormal
645
646 def __read(self, size):
647 """Overloadable read method.
648 """
649 return self.fileobj.read(size)
650
651 def readline(self, size=-1):
652 """Read a line with approx. size. If size is negative,
653 read a whole line. readline() and read() must not
654 be mixed up (!).
655 """
656 if size < 0:
657 size = sys.maxint
658
659 nl = self.linebuffer.find("\n")
660 if nl >= 0:
661 nl = min(nl, size)
662 else:
663 size -= len(self.linebuffer)
Martin v. Löwisc11d6f12004-08-25 10:52:58 +0000664 while (nl < 0 and size > 0):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000665 buf = self.read(min(size, 100))
666 if not buf:
667 break
668 self.linebuffer += buf
669 size -= len(buf)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000670 nl = self.linebuffer.find("\n")
671 if nl == -1:
672 s = self.linebuffer
673 self.linebuffer = ""
674 return s
675 buf = self.linebuffer[:nl]
676 self.linebuffer = self.linebuffer[nl + 1:]
677 while buf[-1:] == "\r":
678 buf = buf[:-1]
679 return buf + "\n"
680
681 def readlines(self):
682 """Return a list with all (following) lines.
683 """
684 result = []
685 while True:
686 line = self.readline()
687 if not line: break
688 result.append(line)
689 return result
690
691 def _readnormal(self, size=None):
692 """Read operation for regular files.
693 """
694 if self.closed:
Georg Brandle4751e32006-05-18 06:11:19 +0000695 raise ValueError("file is closed")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000696 self.fileobj.seek(self.offset + self.pos)
697 bytesleft = self.size - self.pos
698 if size is None:
699 bytestoread = bytesleft
700 else:
701 bytestoread = min(size, bytesleft)
702 self.pos += bytestoread
703 return self.__read(bytestoread)
704
705 def _readsparse(self, size=None):
706 """Read operation for sparse files.
707 """
708 if self.closed:
Georg Brandle4751e32006-05-18 06:11:19 +0000709 raise ValueError("file is closed")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000710
711 if size is None:
712 size = self.size - self.pos
713
714 data = []
715 while size > 0:
716 buf = self._readsparsesection(size)
717 if not buf:
718 break
719 size -= len(buf)
720 data.append(buf)
721 return "".join(data)
722
723 def _readsparsesection(self, size):
724 """Read a single section of a sparse file.
725 """
726 section = self.sparse.find(self.pos)
727
728 if section is None:
729 return ""
730
731 toread = min(size, section.offset + section.size - self.pos)
732 if isinstance(section, _data):
733 realpos = section.realpos + self.pos - section.offset
734 self.pos += toread
735 self.fileobj.seek(self.offset + realpos)
736 return self.__read(toread)
737 else:
738 self.pos += toread
739 return NUL * toread
740
741 def tell(self):
742 """Return the current file position.
743 """
744 return self.pos
745
746 def seek(self, pos, whence=0):
747 """Seek to a position in the file.
748 """
749 self.linebuffer = ""
750 if whence == 0:
751 self.pos = min(max(pos, 0), self.size)
752 if whence == 1:
753 if pos < 0:
754 self.pos = max(self.pos + pos, 0)
755 else:
756 self.pos = min(self.pos + pos, self.size)
757 if whence == 2:
758 self.pos = max(min(self.size + pos, self.size), 0)
759
760 def close(self):
761 """Close the file object.
762 """
763 self.closed = True
Martin v. Löwisdf241532005-03-03 08:17:42 +0000764
765 def __iter__(self):
766 """Get an iterator over the file object.
767 """
768 if self.closed:
Georg Brandle4751e32006-05-18 06:11:19 +0000769 raise ValueError("I/O operation on closed file")
Martin v. Löwisdf241532005-03-03 08:17:42 +0000770 return self
771
772 def next(self):
773 """Get the next item from the file iterator.
774 """
775 result = self.readline()
776 if not result:
777 raise StopIteration
778 return result
Tim Peterseba28be2005-03-28 01:08:02 +0000779
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000780#class ExFileObject
781
782#------------------
783# Exported Classes
784#------------------
785class TarInfo(object):
786 """Informational class which holds the details about an
787 archive member given by a tar header block.
788 TarInfo objects are returned by TarFile.getmember(),
789 TarFile.getmembers() and TarFile.gettarinfo() and are
790 usually created internally.
791 """
792
793 def __init__(self, name=""):
794 """Construct a TarInfo object. name is the optional name
795 of the member.
796 """
797
Georg Brandl38c6a222006-05-10 16:26:03 +0000798 self.name = name # member name (dirnames must end with '/')
799 self.mode = 0666 # file permissions
800 self.uid = 0 # user id
801 self.gid = 0 # group id
802 self.size = 0 # file size
803 self.mtime = 0 # modification time
804 self.chksum = 0 # header checksum
805 self.type = REGTYPE # member type
806 self.linkname = "" # link name
807 self.uname = "user" # user name
808 self.gname = "group" # group name
809 self.devmajor = 0 # device major number
810 self.devminor = 0 # device minor number
811 self.prefix = "" # prefix to filename or information
812 # about sparse files
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000813
Georg Brandl38c6a222006-05-10 16:26:03 +0000814 self.offset = 0 # the tar header starts here
815 self.offset_data = 0 # the file's data starts here
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000816
817 def __repr__(self):
818 return "<%s %r at %#x>" % (self.__class__.__name__,self.name,id(self))
819
Guido van Rossum75b64e62005-01-16 00:16:11 +0000820 @classmethod
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000821 def frombuf(cls, buf):
822 """Construct a TarInfo object from a 512 byte string buffer.
823 """
Georg Brandl38c6a222006-05-10 16:26:03 +0000824 if len(buf) != BLOCKSIZE:
Georg Brandle4751e32006-05-18 06:11:19 +0000825 raise ValueError("truncated header")
Georg Brandl38c6a222006-05-10 16:26:03 +0000826 if buf.count(NUL) == BLOCKSIZE:
Georg Brandle4751e32006-05-18 06:11:19 +0000827 raise ValueError("empty header")
Georg Brandl38c6a222006-05-10 16:26:03 +0000828
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000829 tarinfo = cls()
Georg Brandl38c6a222006-05-10 16:26:03 +0000830 tarinfo.buf = buf
831 tarinfo.name = nts(buf[0:100])
832 tarinfo.mode = nti(buf[100:108])
833 tarinfo.uid = nti(buf[108:116])
834 tarinfo.gid = nti(buf[116:124])
835 tarinfo.size = nti(buf[124:136])
836 tarinfo.mtime = nti(buf[136:148])
837 tarinfo.chksum = nti(buf[148:156])
838 tarinfo.type = buf[156:157]
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000839 tarinfo.linkname = nts(buf[157:257])
Georg Brandl38c6a222006-05-10 16:26:03 +0000840 tarinfo.uname = nts(buf[265:297])
841 tarinfo.gname = nts(buf[297:329])
842 tarinfo.devmajor = nti(buf[329:337])
843 tarinfo.devminor = nti(buf[337:345])
Jack Jansen2b4b5a52003-04-22 22:03:11 +0000844 tarinfo.prefix = buf[345:500]
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000845
Georg Brandl38c6a222006-05-10 16:26:03 +0000846 if tarinfo.chksum not in calc_chksums(buf):
Georg Brandle4751e32006-05-18 06:11:19 +0000847 raise ValueError("invalid header")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000848 return tarinfo
849
Georg Brandl38c6a222006-05-10 16:26:03 +0000850 def tobuf(self, posix=False):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000851 """Return a tar header block as a 512 byte string.
852 """
Georg Brandl38c6a222006-05-10 16:26:03 +0000853 parts = [
854 stn(self.name, 100),
855 itn(self.mode & 07777, 8, posix),
856 itn(self.uid, 8, posix),
857 itn(self.gid, 8, posix),
858 itn(self.size, 12, posix),
859 itn(self.mtime, 12, posix),
860 " ", # checksum field
861 self.type,
862 stn(self.linkname, 100),
863 stn(MAGIC, 6),
864 stn(VERSION, 2),
865 stn(self.uname, 32),
866 stn(self.gname, 32),
867 itn(self.devmajor, 8, posix),
868 itn(self.devminor, 8, posix),
869 stn(self.prefix, 155)
870 ]
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000871
Georg Brandl38c6a222006-05-10 16:26:03 +0000872 buf = struct.pack("%ds" % BLOCKSIZE, "".join(parts))
873 chksum = calc_chksums(buf)[0]
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000874 buf = buf[:148] + "%06o\0" % chksum + buf[155:]
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000875 self.buf = buf
876 return buf
877
878 def isreg(self):
879 return self.type in REGULAR_TYPES
880 def isfile(self):
881 return self.isreg()
882 def isdir(self):
883 return self.type == DIRTYPE
884 def issym(self):
885 return self.type == SYMTYPE
886 def islnk(self):
887 return self.type == LNKTYPE
888 def ischr(self):
889 return self.type == CHRTYPE
890 def isblk(self):
891 return self.type == BLKTYPE
892 def isfifo(self):
893 return self.type == FIFOTYPE
894 def issparse(self):
895 return self.type == GNUTYPE_SPARSE
896 def isdev(self):
897 return self.type in (CHRTYPE, BLKTYPE, FIFOTYPE)
898# class TarInfo
899
900class TarFile(object):
901 """The TarFile Class provides an interface to tar archives.
902 """
903
904 debug = 0 # May be set from 0 (no msgs) to 3 (all msgs)
905
906 dereference = False # If true, add content of linked file to the
907 # tar file, else the link.
908
909 ignore_zeros = False # If true, skips empty or invalid blocks and
910 # continues processing.
911
912 errorlevel = 0 # If 0, fatal errors only appear in debug
913 # messages (if debug >= 0). If > 0, errors
914 # are passed to the caller as exceptions.
915
Martin v. Löwis75b9da42004-08-18 13:57:44 +0000916 posix = False # If True, generates POSIX.1-1990-compliant
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000917 # archives (no GNU extensions!)
918
919 fileobject = ExFileObject
920
921 def __init__(self, name=None, mode="r", fileobj=None):
922 """Open an (uncompressed) tar archive `name'. `mode' is either 'r' to
923 read from an existing archive, 'a' to append data to an existing
924 file or 'w' to create a new file overwriting an existing one. `mode'
925 defaults to 'r'.
926 If `fileobj' is given, it is used for reading or writing data. If it
927 can be determined, `mode' is overridden by `fileobj's mode.
928 `fileobj' is not closed, when TarFile is closed.
929 """
Martin v. Löwisfaffa152005-08-24 06:43:09 +0000930 self.name = name
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000931
932 if len(mode) > 1 or mode not in "raw":
Georg Brandle4751e32006-05-18 06:11:19 +0000933 raise ValueError("mode must be 'r', 'a' or 'w'")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000934 self._mode = mode
935 self.mode = {"r": "rb", "a": "r+b", "w": "wb"}[mode]
936
937 if not fileobj:
938 fileobj = file(self.name, self.mode)
939 self._extfileobj = False
940 else:
941 if self.name is None and hasattr(fileobj, "name"):
Martin v. Löwisfaffa152005-08-24 06:43:09 +0000942 self.name = fileobj.name
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000943 if hasattr(fileobj, "mode"):
944 self.mode = fileobj.mode
945 self._extfileobj = True
946 self.fileobj = fileobj
947
948 # Init datastructures
Georg Brandl38c6a222006-05-10 16:26:03 +0000949 self.closed = False
950 self.members = [] # list of members as TarInfo objects
951 self._loaded = False # flag if all members have been read
952 self.offset = 0L # current position in the archive file
953 self.inodes = {} # dictionary caching the inodes of
954 # archive members already added
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000955
956 if self._mode == "r":
957 self.firstmember = None
958 self.firstmember = self.next()
959
960 if self._mode == "a":
961 # Move to the end of the archive,
962 # before the first empty block.
963 self.firstmember = None
964 while True:
965 try:
966 tarinfo = self.next()
967 except ReadError:
968 self.fileobj.seek(0)
969 break
970 if tarinfo is None:
971 self.fileobj.seek(- BLOCKSIZE, 1)
972 break
973
974 if self._mode in "aw":
975 self._loaded = True
976
977 #--------------------------------------------------------------------------
978 # Below are the classmethods which act as alternate constructors to the
979 # TarFile class. The open() method is the only one that is needed for
980 # public use; it is the "super"-constructor and is able to select an
981 # adequate "sub"-constructor for a particular compression using the mapping
982 # from OPEN_METH.
983 #
984 # This concept allows one to subclass TarFile without losing the comfort of
985 # the super-constructor. A sub-constructor is registered and made available
986 # by adding it to the mapping in OPEN_METH.
987
Guido van Rossum75b64e62005-01-16 00:16:11 +0000988 @classmethod
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000989 def open(cls, name=None, mode="r", fileobj=None, bufsize=20*512):
990 """Open a tar archive for reading, writing or appending. Return
991 an appropriate TarFile class.
992
993 mode:
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000994 'r' or 'r:*' open for reading with transparent compression
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000995 'r:' open for reading exclusively uncompressed
996 'r:gz' open for reading with gzip compression
997 'r:bz2' open for reading with bzip2 compression
998 'a' or 'a:' open for appending
999 'w' or 'w:' open for writing without compression
1000 'w:gz' open for writing with gzip compression
1001 'w:bz2' open for writing with bzip2 compression
Martin v. Löwis78be7df2005-03-05 12:47:42 +00001002
1003 'r|*' open a stream of tar blocks with transparent compression
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001004 'r|' open an uncompressed stream of tar blocks for reading
1005 'r|gz' open a gzip compressed stream of tar blocks
1006 'r|bz2' open a bzip2 compressed stream of tar blocks
1007 'w|' open an uncompressed stream for writing
1008 'w|gz' open a gzip compressed stream for writing
1009 'w|bz2' open a bzip2 compressed stream for writing
1010 """
1011
1012 if not name and not fileobj:
Georg Brandle4751e32006-05-18 06:11:19 +00001013 raise ValueError("nothing to open")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001014
Martin v. Löwis78be7df2005-03-05 12:47:42 +00001015 if mode in ("r", "r:*"):
1016 # Find out which *open() is appropriate for opening the file.
1017 for comptype in cls.OPEN_METH:
1018 func = getattr(cls, cls.OPEN_METH[comptype])
1019 try:
1020 return func(name, "r", fileobj)
1021 except (ReadError, CompressionError):
1022 continue
Georg Brandle4751e32006-05-18 06:11:19 +00001023 raise ReadError("file could not be opened successfully")
Martin v. Löwis78be7df2005-03-05 12:47:42 +00001024
1025 elif ":" in mode:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001026 filemode, comptype = mode.split(":", 1)
1027 filemode = filemode or "r"
1028 comptype = comptype or "tar"
1029
1030 # Select the *open() function according to
1031 # given compression.
1032 if comptype in cls.OPEN_METH:
1033 func = getattr(cls, cls.OPEN_METH[comptype])
1034 else:
Georg Brandle4751e32006-05-18 06:11:19 +00001035 raise CompressionError("unknown compression type %r" % comptype)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001036 return func(name, filemode, fileobj)
1037
1038 elif "|" in mode:
1039 filemode, comptype = mode.split("|", 1)
1040 filemode = filemode or "r"
1041 comptype = comptype or "tar"
1042
1043 if filemode not in "rw":
Georg Brandle4751e32006-05-18 06:11:19 +00001044 raise ValueError("mode must be 'r' or 'w'")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001045
1046 t = cls(name, filemode,
1047 _Stream(name, filemode, comptype, fileobj, bufsize))
1048 t._extfileobj = False
1049 return t
1050
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001051 elif mode in "aw":
1052 return cls.taropen(name, mode, fileobj)
1053
Georg Brandle4751e32006-05-18 06:11:19 +00001054 raise ValueError("undiscernible mode")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001055
Guido van Rossum75b64e62005-01-16 00:16:11 +00001056 @classmethod
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001057 def taropen(cls, name, mode="r", fileobj=None):
1058 """Open uncompressed tar archive name for reading or writing.
1059 """
1060 if len(mode) > 1 or mode not in "raw":
Georg Brandle4751e32006-05-18 06:11:19 +00001061 raise ValueError("mode must be 'r', 'a' or 'w'")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001062 return cls(name, mode, fileobj)
1063
Guido van Rossum75b64e62005-01-16 00:16:11 +00001064 @classmethod
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001065 def gzopen(cls, name, mode="r", fileobj=None, compresslevel=9):
1066 """Open gzip compressed tar archive name for reading or writing.
1067 Appending is not allowed.
1068 """
1069 if len(mode) > 1 or mode not in "rw":
Georg Brandle4751e32006-05-18 06:11:19 +00001070 raise ValueError("mode must be 'r' or 'w'")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001071
1072 try:
1073 import gzip
Neal Norwitz4ec68242003-04-11 03:05:56 +00001074 gzip.GzipFile
1075 except (ImportError, AttributeError):
Georg Brandle4751e32006-05-18 06:11:19 +00001076 raise CompressionError("gzip module is not available")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001077
1078 pre, ext = os.path.splitext(name)
Martin v. Löwisfaffa152005-08-24 06:43:09 +00001079 pre = os.path.basename(pre)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001080 if ext == ".tgz":
1081 ext = ".tar"
1082 if ext == ".gz":
1083 ext = ""
Martin v. Löwisfaffa152005-08-24 06:43:09 +00001084 tarname = pre + ext
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001085
1086 if fileobj is None:
1087 fileobj = file(name, mode + "b")
1088
Martin v. Löwisfaffa152005-08-24 06:43:09 +00001089 if mode != "r":
1090 name = tarname
1091
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001092 try:
Martin v. Löwisfaffa152005-08-24 06:43:09 +00001093 t = cls.taropen(tarname, mode,
1094 gzip.GzipFile(name, mode, compresslevel, fileobj)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001095 )
1096 except IOError:
Georg Brandle4751e32006-05-18 06:11:19 +00001097 raise ReadError("not a gzip file")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001098 t._extfileobj = False
1099 return t
1100
Guido van Rossum75b64e62005-01-16 00:16:11 +00001101 @classmethod
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001102 def bz2open(cls, name, mode="r", fileobj=None, compresslevel=9):
1103 """Open bzip2 compressed tar archive name for reading or writing.
1104 Appending is not allowed.
1105 """
1106 if len(mode) > 1 or mode not in "rw":
Georg Brandle4751e32006-05-18 06:11:19 +00001107 raise ValueError("mode must be 'r' or 'w'.")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001108
1109 try:
1110 import bz2
1111 except ImportError:
Georg Brandle4751e32006-05-18 06:11:19 +00001112 raise CompressionError("bz2 module is not available")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001113
Martin v. Löwisfaffa152005-08-24 06:43:09 +00001114 pre, ext = os.path.splitext(name)
1115 pre = os.path.basename(pre)
1116 if ext == ".tbz2":
1117 ext = ".tar"
1118 if ext == ".bz2":
1119 ext = ""
1120 tarname = pre + ext
1121
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001122 if fileobj is not None:
Georg Brandl49c8f4c2006-05-15 19:30:35 +00001123 fileobj = _BZ2Proxy(fileobj, mode)
1124 else:
1125 fileobj = bz2.BZ2File(name, mode, compresslevel=compresslevel)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001126
1127 try:
Georg Brandl49c8f4c2006-05-15 19:30:35 +00001128 t = cls.taropen(tarname, mode, fileobj)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001129 except IOError:
Georg Brandle4751e32006-05-18 06:11:19 +00001130 raise ReadError("not a bzip2 file")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001131 t._extfileobj = False
1132 return t
1133
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001134 # All *open() methods are registered here.
1135 OPEN_METH = {
1136 "tar": "taropen", # uncompressed tar
1137 "gz": "gzopen", # gzip compressed tar
1138 "bz2": "bz2open" # bzip2 compressed tar
1139 }
1140
1141 #--------------------------------------------------------------------------
1142 # The public methods which TarFile provides:
1143
1144 def close(self):
1145 """Close the TarFile. In write-mode, two finishing zero blocks are
1146 appended to the archive.
1147 """
1148 if self.closed:
1149 return
1150
1151 if self._mode in "aw":
1152 self.fileobj.write(NUL * (BLOCKSIZE * 2))
1153 self.offset += (BLOCKSIZE * 2)
1154 # fill up the end with zero-blocks
1155 # (like option -b20 for tar does)
1156 blocks, remainder = divmod(self.offset, RECORDSIZE)
1157 if remainder > 0:
1158 self.fileobj.write(NUL * (RECORDSIZE - remainder))
1159
1160 if not self._extfileobj:
1161 self.fileobj.close()
1162 self.closed = True
1163
1164 def getmember(self, name):
1165 """Return a TarInfo object for member `name'. If `name' can not be
1166 found in the archive, KeyError is raised. If a member occurs more
1167 than once in the archive, its last occurence is assumed to be the
1168 most up-to-date version.
1169 """
Martin v. Löwisf3c56112004-09-18 09:08:52 +00001170 tarinfo = self._getmember(name)
1171 if tarinfo is None:
Georg Brandle4751e32006-05-18 06:11:19 +00001172 raise KeyError("filename %r not found" % name)
Martin v. Löwisf3c56112004-09-18 09:08:52 +00001173 return tarinfo
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001174
1175 def getmembers(self):
1176 """Return the members of the archive as a list of TarInfo objects. The
1177 list has the same order as the members in the archive.
1178 """
1179 self._check()
1180 if not self._loaded: # if we want to obtain a list of
1181 self._load() # all members, we first have to
1182 # scan the whole archive.
1183 return self.members
1184
1185 def getnames(self):
1186 """Return the members of the archive as a list of their names. It has
1187 the same order as the list returned by getmembers().
1188 """
Martin v. Löwisf3c56112004-09-18 09:08:52 +00001189 return [tarinfo.name for tarinfo in self.getmembers()]
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001190
1191 def gettarinfo(self, name=None, arcname=None, fileobj=None):
1192 """Create a TarInfo object for either the file `name' or the file
1193 object `fileobj' (using os.fstat on its file descriptor). You can
1194 modify some of the TarInfo's attributes before you add it using
1195 addfile(). If given, `arcname' specifies an alternative name for the
1196 file in the archive.
1197 """
1198 self._check("aw")
1199
1200 # When fileobj is given, replace name by
1201 # fileobj's real name.
1202 if fileobj is not None:
1203 name = fileobj.name
1204
1205 # Building the name of the member in the archive.
1206 # Backward slashes are converted to forward slashes,
1207 # Absolute paths are turned to relative paths.
1208 if arcname is None:
1209 arcname = name
1210 arcname = normpath(arcname)
1211 drv, arcname = os.path.splitdrive(arcname)
1212 while arcname[0:1] == "/":
1213 arcname = arcname[1:]
1214
1215 # Now, fill the TarInfo object with
1216 # information specific for the file.
1217 tarinfo = TarInfo()
1218
1219 # Use os.stat or os.lstat, depending on platform
1220 # and if symlinks shall be resolved.
1221 if fileobj is None:
1222 if hasattr(os, "lstat") and not self.dereference:
1223 statres = os.lstat(name)
1224 else:
1225 statres = os.stat(name)
1226 else:
1227 statres = os.fstat(fileobj.fileno())
1228 linkname = ""
1229
1230 stmd = statres.st_mode
1231 if stat.S_ISREG(stmd):
1232 inode = (statres.st_ino, statres.st_dev)
Neal Norwitzb0e32e22005-10-20 04:50:13 +00001233 if not self.dereference and \
1234 statres.st_nlink > 1 and inode in self.inodes:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001235 # Is it a hardlink to an already
1236 # archived file?
1237 type = LNKTYPE
1238 linkname = self.inodes[inode]
1239 else:
1240 # The inode is added only if its valid.
1241 # For win32 it is always 0.
1242 type = REGTYPE
1243 if inode[0]:
1244 self.inodes[inode] = arcname
1245 elif stat.S_ISDIR(stmd):
1246 type = DIRTYPE
1247 if arcname[-1:] != "/":
1248 arcname += "/"
1249 elif stat.S_ISFIFO(stmd):
1250 type = FIFOTYPE
1251 elif stat.S_ISLNK(stmd):
1252 type = SYMTYPE
1253 linkname = os.readlink(name)
1254 elif stat.S_ISCHR(stmd):
1255 type = CHRTYPE
1256 elif stat.S_ISBLK(stmd):
1257 type = BLKTYPE
1258 else:
1259 return None
1260
1261 # Fill the TarInfo object with all
1262 # information we can get.
Martin v. Löwis5dbdc592005-08-27 10:07:56 +00001263 tarinfo.name = arcname
1264 tarinfo.mode = stmd
1265 tarinfo.uid = statres.st_uid
1266 tarinfo.gid = statres.st_gid
1267 if stat.S_ISREG(stmd):
Martin v. Löwis61d77e02004-08-20 06:35:46 +00001268 tarinfo.size = statres.st_size
Martin v. Löwis5dbdc592005-08-27 10:07:56 +00001269 else:
1270 tarinfo.size = 0L
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001271 tarinfo.mtime = statres.st_mtime
Martin v. Löwis5dbdc592005-08-27 10:07:56 +00001272 tarinfo.type = type
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001273 tarinfo.linkname = linkname
1274 if pwd:
1275 try:
1276 tarinfo.uname = pwd.getpwuid(tarinfo.uid)[0]
1277 except KeyError:
1278 pass
1279 if grp:
1280 try:
1281 tarinfo.gname = grp.getgrgid(tarinfo.gid)[0]
1282 except KeyError:
1283 pass
1284
1285 if type in (CHRTYPE, BLKTYPE):
1286 if hasattr(os, "major") and hasattr(os, "minor"):
1287 tarinfo.devmajor = os.major(statres.st_rdev)
1288 tarinfo.devminor = os.minor(statres.st_rdev)
1289 return tarinfo
1290
1291 def list(self, verbose=True):
1292 """Print a table of contents to sys.stdout. If `verbose' is False, only
1293 the names of the members are printed. If it is True, an `ls -l'-like
1294 output is produced.
1295 """
1296 self._check()
1297
1298 for tarinfo in self:
1299 if verbose:
1300 print filemode(tarinfo.mode),
1301 print "%s/%s" % (tarinfo.uname or tarinfo.uid,
1302 tarinfo.gname or tarinfo.gid),
1303 if tarinfo.ischr() or tarinfo.isblk():
1304 print "%10s" % ("%d,%d" \
1305 % (tarinfo.devmajor, tarinfo.devminor)),
1306 else:
1307 print "%10d" % tarinfo.size,
1308 print "%d-%02d-%02d %02d:%02d:%02d" \
1309 % time.localtime(tarinfo.mtime)[:6],
1310
1311 print tarinfo.name,
1312
1313 if verbose:
1314 if tarinfo.issym():
1315 print "->", tarinfo.linkname,
1316 if tarinfo.islnk():
1317 print "link to", tarinfo.linkname,
1318 print
1319
1320 def add(self, name, arcname=None, recursive=True):
1321 """Add the file `name' to the archive. `name' may be any type of file
1322 (directory, fifo, symbolic link, etc.). If given, `arcname'
1323 specifies an alternative name for the file in the archive.
1324 Directories are added recursively by default. This can be avoided by
1325 setting `recursive' to False.
1326 """
1327 self._check("aw")
1328
1329 if arcname is None:
1330 arcname = name
1331
1332 # Skip if somebody tries to archive the archive...
Martin v. Löwisfaffa152005-08-24 06:43:09 +00001333 if self.name is not None \
1334 and os.path.abspath(name) == os.path.abspath(self.name):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001335 self._dbg(2, "tarfile: Skipped %r" % name)
1336 return
1337
1338 # Special case: The user wants to add the current
1339 # working directory.
1340 if name == ".":
1341 if recursive:
1342 if arcname == ".":
1343 arcname = ""
1344 for f in os.listdir("."):
1345 self.add(f, os.path.join(arcname, f))
1346 return
1347
1348 self._dbg(1, name)
1349
1350 # Create a TarInfo object from the file.
1351 tarinfo = self.gettarinfo(name, arcname)
1352
1353 if tarinfo is None:
1354 self._dbg(1, "tarfile: Unsupported type %r" % name)
1355 return
1356
1357 # Append the tar header and data to the archive.
1358 if tarinfo.isreg():
1359 f = file(name, "rb")
1360 self.addfile(tarinfo, f)
1361 f.close()
1362
Martin v. Löwis5dbdc592005-08-27 10:07:56 +00001363 elif tarinfo.isdir():
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001364 self.addfile(tarinfo)
1365 if recursive:
1366 for f in os.listdir(name):
1367 self.add(os.path.join(name, f), os.path.join(arcname, f))
1368
Martin v. Löwis5dbdc592005-08-27 10:07:56 +00001369 else:
1370 self.addfile(tarinfo)
1371
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001372 def addfile(self, tarinfo, fileobj=None):
1373 """Add the TarInfo object `tarinfo' to the archive. If `fileobj' is
1374 given, tarinfo.size bytes are read from it and added to the archive.
1375 You can create TarInfo objects using gettarinfo().
1376 On Windows platforms, `fileobj' should always be opened with mode
1377 'rb' to avoid irritation about the file size.
1378 """
1379 self._check("aw")
1380
1381 tarinfo.name = normpath(tarinfo.name)
1382 if tarinfo.isdir():
1383 # directories should end with '/'
1384 tarinfo.name += "/"
1385
1386 if tarinfo.linkname:
1387 tarinfo.linkname = normpath(tarinfo.linkname)
1388
1389 if tarinfo.size > MAXSIZE_MEMBER:
Neal Norwitzd96d1012004-07-20 22:23:02 +00001390 if self.posix:
Georg Brandle4751e32006-05-18 06:11:19 +00001391 raise ValueError("file is too large (>= 8 GB)")
Neal Norwitzd96d1012004-07-20 22:23:02 +00001392 else:
1393 self._dbg(2, "tarfile: Created GNU tar largefile header")
1394
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001395
1396 if len(tarinfo.linkname) > LENGTH_LINK:
1397 if self.posix:
Georg Brandle4751e32006-05-18 06:11:19 +00001398 raise ValueError("linkname is too long (>%d)" % (LENGTH_LINK))
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001399 else:
1400 self._create_gnulong(tarinfo.linkname, GNUTYPE_LONGLINK)
1401 tarinfo.linkname = tarinfo.linkname[:LENGTH_LINK -1]
1402 self._dbg(2, "tarfile: Created GNU tar extension LONGLINK")
1403
1404 if len(tarinfo.name) > LENGTH_NAME:
1405 if self.posix:
1406 prefix = tarinfo.name[:LENGTH_PREFIX + 1]
1407 while prefix and prefix[-1] != "/":
Tim Peters2c60f7a2003-01-29 03:49:43 +00001408 prefix = prefix[:-1]
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001409
1410 name = tarinfo.name[len(prefix):]
1411 prefix = prefix[:-1]
1412
1413 if not prefix or len(name) > LENGTH_NAME:
Georg Brandle4751e32006-05-18 06:11:19 +00001414 raise ValueError("name is too long (>%d)" % (LENGTH_NAME))
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001415
1416 tarinfo.name = name
1417 tarinfo.prefix = prefix
1418 else:
1419 self._create_gnulong(tarinfo.name, GNUTYPE_LONGNAME)
1420 tarinfo.name = tarinfo.name[:LENGTH_NAME - 1]
1421 self._dbg(2, "tarfile: Created GNU tar extension LONGNAME")
1422
Georg Brandl38c6a222006-05-10 16:26:03 +00001423 self.fileobj.write(tarinfo.tobuf(self.posix))
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001424 self.offset += BLOCKSIZE
1425
1426 # If there's data to follow, append it.
1427 if fileobj is not None:
1428 copyfileobj(fileobj, self.fileobj, tarinfo.size)
1429 blocks, remainder = divmod(tarinfo.size, BLOCKSIZE)
1430 if remainder > 0:
1431 self.fileobj.write(NUL * (BLOCKSIZE - remainder))
1432 blocks += 1
1433 self.offset += blocks * BLOCKSIZE
1434
Martin v. Löwisf3c56112004-09-18 09:08:52 +00001435 self.members.append(tarinfo)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001436
Martin v. Löwis00a73e72005-03-04 19:40:34 +00001437 def extractall(self, path=".", members=None):
1438 """Extract all members from the archive to the current working
1439 directory and set owner, modification time and permissions on
1440 directories afterwards. `path' specifies a different directory
1441 to extract to. `members' is optional and must be a subset of the
1442 list returned by getmembers().
1443 """
1444 directories = []
1445
1446 if members is None:
1447 members = self
1448
1449 for tarinfo in members:
1450 if tarinfo.isdir():
1451 # Extract directory with a safe mode, so that
1452 # all files below can be extracted as well.
1453 try:
1454 os.makedirs(os.path.join(path, tarinfo.name), 0777)
1455 except EnvironmentError:
1456 pass
1457 directories.append(tarinfo)
1458 else:
1459 self.extract(tarinfo, path)
1460
1461 # Reverse sort directories.
1462 directories.sort(lambda a, b: cmp(a.name, b.name))
1463 directories.reverse()
1464
1465 # Set correct owner, mtime and filemode on directories.
1466 for tarinfo in directories:
1467 path = os.path.join(path, tarinfo.name)
1468 try:
1469 self.chown(tarinfo, path)
1470 self.utime(tarinfo, path)
1471 self.chmod(tarinfo, path)
1472 except ExtractError, e:
1473 if self.errorlevel > 1:
1474 raise
1475 else:
1476 self._dbg(1, "tarfile: %s" % e)
1477
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001478 def extract(self, member, path=""):
1479 """Extract a member from the archive to the current working directory,
1480 using its full name. Its file information is extracted as accurately
1481 as possible. `member' may be a filename or a TarInfo object. You can
1482 specify a different directory using `path'.
1483 """
1484 self._check("r")
1485
1486 if isinstance(member, TarInfo):
1487 tarinfo = member
1488 else:
1489 tarinfo = self.getmember(member)
1490
Neal Norwitza4f651a2004-07-20 22:07:44 +00001491 # Prepare the link target for makelink().
1492 if tarinfo.islnk():
1493 tarinfo._link_target = os.path.join(path, tarinfo.linkname)
1494
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001495 try:
1496 self._extract_member(tarinfo, os.path.join(path, tarinfo.name))
1497 except EnvironmentError, e:
1498 if self.errorlevel > 0:
1499 raise
1500 else:
1501 if e.filename is None:
1502 self._dbg(1, "tarfile: %s" % e.strerror)
1503 else:
1504 self._dbg(1, "tarfile: %s %r" % (e.strerror, e.filename))
1505 except ExtractError, e:
1506 if self.errorlevel > 1:
1507 raise
1508 else:
1509 self._dbg(1, "tarfile: %s" % e)
1510
1511 def extractfile(self, member):
1512 """Extract a member from the archive as a file object. `member' may be
1513 a filename or a TarInfo object. If `member' is a regular file, a
1514 file-like object is returned. If `member' is a link, a file-like
1515 object is constructed from the link's target. If `member' is none of
1516 the above, None is returned.
1517 The file-like object is read-only and provides the following
1518 methods: read(), readline(), readlines(), seek() and tell()
1519 """
1520 self._check("r")
1521
1522 if isinstance(member, TarInfo):
1523 tarinfo = member
1524 else:
1525 tarinfo = self.getmember(member)
1526
1527 if tarinfo.isreg():
1528 return self.fileobject(self, tarinfo)
1529
1530 elif tarinfo.type not in SUPPORTED_TYPES:
1531 # If a member's type is unknown, it is treated as a
1532 # regular file.
1533 return self.fileobject(self, tarinfo)
1534
1535 elif tarinfo.islnk() or tarinfo.issym():
1536 if isinstance(self.fileobj, _Stream):
1537 # A small but ugly workaround for the case that someone tries
1538 # to extract a (sym)link as a file-object from a non-seekable
1539 # stream of tar blocks.
Georg Brandle4751e32006-05-18 06:11:19 +00001540 raise StreamError("cannot extract (sym)link as file object")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001541 else:
Georg Brandl7eb4b7d2005-07-22 21:49:32 +00001542 # A (sym)link's file object is its target's file object.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001543 return self.extractfile(self._getmember(tarinfo.linkname,
1544 tarinfo))
1545 else:
1546 # If there's no data associated with the member (directory, chrdev,
1547 # blkdev, etc.), return None instead of a file object.
1548 return None
1549
1550 def _extract_member(self, tarinfo, targetpath):
1551 """Extract the TarInfo object tarinfo to a physical
1552 file called targetpath.
1553 """
1554 # Fetch the TarInfo object for the given name
1555 # and build the destination pathname, replacing
1556 # forward slashes to platform specific separators.
1557 if targetpath[-1:] == "/":
1558 targetpath = targetpath[:-1]
1559 targetpath = os.path.normpath(targetpath)
1560
1561 # Create all upper directories.
1562 upperdirs = os.path.dirname(targetpath)
1563 if upperdirs and not os.path.exists(upperdirs):
1564 ti = TarInfo()
1565 ti.name = upperdirs
1566 ti.type = DIRTYPE
1567 ti.mode = 0777
1568 ti.mtime = tarinfo.mtime
1569 ti.uid = tarinfo.uid
1570 ti.gid = tarinfo.gid
1571 ti.uname = tarinfo.uname
1572 ti.gname = tarinfo.gname
1573 try:
1574 self._extract_member(ti, ti.name)
1575 except:
1576 pass
1577
1578 if tarinfo.islnk() or tarinfo.issym():
1579 self._dbg(1, "%s -> %s" % (tarinfo.name, tarinfo.linkname))
1580 else:
1581 self._dbg(1, tarinfo.name)
1582
1583 if tarinfo.isreg():
1584 self.makefile(tarinfo, targetpath)
1585 elif tarinfo.isdir():
1586 self.makedir(tarinfo, targetpath)
1587 elif tarinfo.isfifo():
1588 self.makefifo(tarinfo, targetpath)
1589 elif tarinfo.ischr() or tarinfo.isblk():
1590 self.makedev(tarinfo, targetpath)
1591 elif tarinfo.islnk() or tarinfo.issym():
1592 self.makelink(tarinfo, targetpath)
1593 elif tarinfo.type not in SUPPORTED_TYPES:
1594 self.makeunknown(tarinfo, targetpath)
1595 else:
1596 self.makefile(tarinfo, targetpath)
1597
1598 self.chown(tarinfo, targetpath)
1599 if not tarinfo.issym():
1600 self.chmod(tarinfo, targetpath)
1601 self.utime(tarinfo, targetpath)
1602
1603 #--------------------------------------------------------------------------
1604 # Below are the different file methods. They are called via
1605 # _extract_member() when extract() is called. They can be replaced in a
1606 # subclass to implement other functionality.
1607
1608 def makedir(self, tarinfo, targetpath):
1609 """Make a directory called targetpath.
1610 """
1611 try:
1612 os.mkdir(targetpath)
1613 except EnvironmentError, e:
1614 if e.errno != errno.EEXIST:
1615 raise
1616
1617 def makefile(self, tarinfo, targetpath):
1618 """Make a file called targetpath.
1619 """
1620 source = self.extractfile(tarinfo)
1621 target = file(targetpath, "wb")
1622 copyfileobj(source, target)
1623 source.close()
1624 target.close()
1625
1626 def makeunknown(self, tarinfo, targetpath):
1627 """Make a file from a TarInfo object with an unknown type
1628 at targetpath.
1629 """
1630 self.makefile(tarinfo, targetpath)
1631 self._dbg(1, "tarfile: Unknown file type %r, " \
1632 "extracted as regular file." % tarinfo.type)
1633
1634 def makefifo(self, tarinfo, targetpath):
1635 """Make a fifo called targetpath.
1636 """
1637 if hasattr(os, "mkfifo"):
1638 os.mkfifo(targetpath)
1639 else:
Georg Brandle4751e32006-05-18 06:11:19 +00001640 raise ExtractError("fifo not supported by system")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001641
1642 def makedev(self, tarinfo, targetpath):
1643 """Make a character or block device called targetpath.
1644 """
1645 if not hasattr(os, "mknod") or not hasattr(os, "makedev"):
Georg Brandle4751e32006-05-18 06:11:19 +00001646 raise ExtractError("special devices not supported by system")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001647
1648 mode = tarinfo.mode
1649 if tarinfo.isblk():
1650 mode |= stat.S_IFBLK
1651 else:
1652 mode |= stat.S_IFCHR
1653
1654 os.mknod(targetpath, mode,
1655 os.makedev(tarinfo.devmajor, tarinfo.devminor))
1656
1657 def makelink(self, tarinfo, targetpath):
1658 """Make a (symbolic) link called targetpath. If it cannot be created
1659 (platform limitation), we try to make a copy of the referenced file
1660 instead of a link.
1661 """
1662 linkpath = tarinfo.linkname
1663 try:
1664 if tarinfo.issym():
1665 os.symlink(linkpath, targetpath)
1666 else:
Neal Norwitza4f651a2004-07-20 22:07:44 +00001667 # See extract().
1668 os.link(tarinfo._link_target, targetpath)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001669 except AttributeError:
1670 if tarinfo.issym():
1671 linkpath = os.path.join(os.path.dirname(tarinfo.name),
1672 linkpath)
1673 linkpath = normpath(linkpath)
1674
1675 try:
1676 self._extract_member(self.getmember(linkpath), targetpath)
1677 except (EnvironmentError, KeyError), e:
1678 linkpath = os.path.normpath(linkpath)
1679 try:
1680 shutil.copy2(linkpath, targetpath)
1681 except EnvironmentError, e:
Georg Brandle4751e32006-05-18 06:11:19 +00001682 raise IOError("link could not be created")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001683
1684 def chown(self, tarinfo, targetpath):
1685 """Set owner of targetpath according to tarinfo.
1686 """
1687 if pwd and hasattr(os, "geteuid") and os.geteuid() == 0:
1688 # We have to be root to do so.
1689 try:
1690 g = grp.getgrnam(tarinfo.gname)[2]
1691 except KeyError:
1692 try:
1693 g = grp.getgrgid(tarinfo.gid)[2]
1694 except KeyError:
1695 g = os.getgid()
1696 try:
1697 u = pwd.getpwnam(tarinfo.uname)[2]
1698 except KeyError:
1699 try:
1700 u = pwd.getpwuid(tarinfo.uid)[2]
1701 except KeyError:
1702 u = os.getuid()
1703 try:
1704 if tarinfo.issym() and hasattr(os, "lchown"):
1705 os.lchown(targetpath, u, g)
1706 else:
Andrew MacIntyre7970d202003-02-19 12:51:34 +00001707 if sys.platform != "os2emx":
1708 os.chown(targetpath, u, g)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001709 except EnvironmentError, e:
Georg Brandle4751e32006-05-18 06:11:19 +00001710 raise ExtractError("could not change owner")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001711
1712 def chmod(self, tarinfo, targetpath):
1713 """Set file permissions of targetpath according to tarinfo.
1714 """
Jack Jansen834eff62003-03-07 12:47:06 +00001715 if hasattr(os, 'chmod'):
1716 try:
1717 os.chmod(targetpath, tarinfo.mode)
1718 except EnvironmentError, e:
Georg Brandle4751e32006-05-18 06:11:19 +00001719 raise ExtractError("could not change mode")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001720
1721 def utime(self, tarinfo, targetpath):
1722 """Set modification time of targetpath according to tarinfo.
1723 """
Jack Jansen834eff62003-03-07 12:47:06 +00001724 if not hasattr(os, 'utime'):
Tim Petersf9347782003-03-07 15:36:41 +00001725 return
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001726 if sys.platform == "win32" and tarinfo.isdir():
1727 # According to msdn.microsoft.com, it is an error (EACCES)
1728 # to use utime() on directories.
1729 return
1730 try:
1731 os.utime(targetpath, (tarinfo.mtime, tarinfo.mtime))
1732 except EnvironmentError, e:
Georg Brandle4751e32006-05-18 06:11:19 +00001733 raise ExtractError("could not change modification time")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001734
1735 #--------------------------------------------------------------------------
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001736 def next(self):
1737 """Return the next member of the archive as a TarInfo object, when
1738 TarFile is opened for reading. Return None if there is no more
1739 available.
1740 """
1741 self._check("ra")
1742 if self.firstmember is not None:
1743 m = self.firstmember
1744 self.firstmember = None
1745 return m
1746
1747 # Read the next block.
Andrew M. Kuchling864bba12004-07-10 22:02:11 +00001748 self.fileobj.seek(self.offset)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001749 while True:
1750 buf = self.fileobj.read(BLOCKSIZE)
1751 if not buf:
1752 return None
Georg Brandl38c6a222006-05-10 16:26:03 +00001753
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001754 try:
1755 tarinfo = TarInfo.frombuf(buf)
Tim Peters8a299d22006-05-19 19:16:34 +00001756
Georg Brandle4751e32006-05-18 06:11:19 +00001757 # We shouldn't rely on this checksum, because some tar programs
1758 # calculate it differently and it is merely validating the
1759 # header block. We could just as well skip this part, which would
1760 # have a slight effect on performance...
1761 if tarinfo.chksum not in calc_chksums(buf):
1762 self._dbg(1, "tarfile: Bad Checksum %r" % tarinfo.name)
Georg Brandl38c6a222006-05-10 16:26:03 +00001763
1764 # Set the TarInfo object's offset to the current position of the
1765 # TarFile and set self.offset to the position where the data blocks
1766 # should begin.
1767 tarinfo.offset = self.offset
1768 self.offset += BLOCKSIZE
1769
1770 tarinfo = self.proc_member(tarinfo)
1771
1772 except ValueError, e:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001773 if self.ignore_zeros:
Georg Brandle4751e32006-05-18 06:11:19 +00001774 self._dbg(2, "0x%X: empty or invalid block: %s" %
1775 (self.offset, e))
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001776 self.offset += BLOCKSIZE
1777 continue
1778 else:
Andrew M. Kuchling864bba12004-07-10 22:02:11 +00001779 if self.offset == 0:
Georg Brandle4751e32006-05-18 06:11:19 +00001780 raise ReadError("empty, unreadable or compressed "
1781 "file: %s" % e)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001782 return None
1783 break
1784
Georg Brandl38c6a222006-05-10 16:26:03 +00001785 # Some old tar programs represent a directory as a regular
1786 # file with a trailing slash.
1787 if tarinfo.isreg() and tarinfo.name.endswith("/"):
1788 tarinfo.type = DIRTYPE
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001789
Georg Brandl38c6a222006-05-10 16:26:03 +00001790 # The prefix field is used for filenames > 100 in
1791 # the POSIX standard.
1792 # name = prefix + '/' + name
1793 tarinfo.name = normpath(os.path.join(nts(tarinfo.prefix), tarinfo.name))
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001794
Georg Brandl38c6a222006-05-10 16:26:03 +00001795 # Directory names should have a '/' at the end.
1796 if tarinfo.isdir():
1797 tarinfo.name += "/"
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001798
Martin v. Löwisf3c56112004-09-18 09:08:52 +00001799 self.members.append(tarinfo)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001800 return tarinfo
1801
1802 #--------------------------------------------------------------------------
Georg Brandl38c6a222006-05-10 16:26:03 +00001803 # The following are methods that are called depending on the type of a
1804 # member. The entry point is proc_member() which is called with a TarInfo
1805 # object created from the header block from the current offset. The
1806 # proc_member() method can be overridden in a subclass to add custom
1807 # proc_*() methods. A proc_*() method MUST implement the following
1808 # operations:
1809 # 1. Set tarinfo.offset_data to the position where the data blocks begin,
1810 # if there is data that follows.
1811 # 2. Set self.offset to the position where the next member's header will
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001812 # begin.
Georg Brandl38c6a222006-05-10 16:26:03 +00001813 # 3. Return tarinfo or another valid TarInfo object.
1814 def proc_member(self, tarinfo):
1815 """Choose the right processing method for tarinfo depending
1816 on its type and call it.
1817 """
1818 if tarinfo.type in (GNUTYPE_LONGNAME, GNUTYPE_LONGLINK):
1819 return self.proc_gnulong(tarinfo)
1820 elif tarinfo.type == GNUTYPE_SPARSE:
1821 return self.proc_sparse(tarinfo)
1822 else:
1823 return self.proc_builtin(tarinfo)
1824
1825 def proc_builtin(self, tarinfo):
1826 """Process a builtin type member or an unknown member
1827 which will be treated as a regular file.
1828 """
1829 tarinfo.offset_data = self.offset
1830 if tarinfo.isreg() or tarinfo.type not in SUPPORTED_TYPES:
1831 # Skip the following data blocks.
1832 self.offset += self._block(tarinfo.size)
1833 return tarinfo
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001834
1835 def proc_gnulong(self, tarinfo):
Georg Brandl38c6a222006-05-10 16:26:03 +00001836 """Process the blocks that hold a GNU longname
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001837 or longlink member.
1838 """
1839 buf = ""
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001840 count = tarinfo.size
1841 while count > 0:
1842 block = self.fileobj.read(BLOCKSIZE)
1843 buf += block
1844 self.offset += BLOCKSIZE
1845 count -= BLOCKSIZE
1846
Georg Brandl38c6a222006-05-10 16:26:03 +00001847 # Fetch the next header and process it.
1848 b = self.fileobj.read(BLOCKSIZE)
1849 t = TarInfo.frombuf(b)
1850 t.offset = self.offset
1851 self.offset += BLOCKSIZE
1852 next = self.proc_member(t)
Andrew M. Kuchling864bba12004-07-10 22:02:11 +00001853
Georg Brandl38c6a222006-05-10 16:26:03 +00001854 # Patch the TarInfo object from the next header with
1855 # the longname information.
Andrew M. Kuchling864bba12004-07-10 22:02:11 +00001856 next.offset = tarinfo.offset
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001857 if tarinfo.type == GNUTYPE_LONGNAME:
Andrew M. Kuchling864bba12004-07-10 22:02:11 +00001858 next.name = nts(buf)
1859 elif tarinfo.type == GNUTYPE_LONGLINK:
1860 next.linkname = nts(buf)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001861
Andrew M. Kuchling864bba12004-07-10 22:02:11 +00001862 return next
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001863
1864 def proc_sparse(self, tarinfo):
Georg Brandl38c6a222006-05-10 16:26:03 +00001865 """Process a GNU sparse header plus extra headers.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001866 """
Georg Brandl38c6a222006-05-10 16:26:03 +00001867 buf = tarinfo.buf
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001868 sp = _ringbuffer()
1869 pos = 386
1870 lastpos = 0L
1871 realpos = 0L
1872 # There are 4 possible sparse structs in the
1873 # first header.
1874 for i in xrange(4):
1875 try:
Georg Brandl38c6a222006-05-10 16:26:03 +00001876 offset = nti(buf[pos:pos + 12])
1877 numbytes = nti(buf[pos + 12:pos + 24])
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001878 except ValueError:
1879 break
1880 if offset > lastpos:
1881 sp.append(_hole(lastpos, offset - lastpos))
1882 sp.append(_data(offset, numbytes, realpos))
1883 realpos += numbytes
1884 lastpos = offset + numbytes
1885 pos += 24
1886
1887 isextended = ord(buf[482])
Georg Brandl38c6a222006-05-10 16:26:03 +00001888 origsize = nti(buf[483:495])
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001889
1890 # If the isextended flag is given,
1891 # there are extra headers to process.
1892 while isextended == 1:
1893 buf = self.fileobj.read(BLOCKSIZE)
1894 self.offset += BLOCKSIZE
1895 pos = 0
1896 for i in xrange(21):
1897 try:
Georg Brandl38c6a222006-05-10 16:26:03 +00001898 offset = nti(buf[pos:pos + 12])
1899 numbytes = nti(buf[pos + 12:pos + 24])
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001900 except ValueError:
1901 break
1902 if offset > lastpos:
1903 sp.append(_hole(lastpos, offset - lastpos))
1904 sp.append(_data(offset, numbytes, realpos))
1905 realpos += numbytes
1906 lastpos = offset + numbytes
1907 pos += 24
1908 isextended = ord(buf[504])
1909
1910 if lastpos < origsize:
1911 sp.append(_hole(lastpos, origsize - lastpos))
1912
1913 tarinfo.sparse = sp
1914
1915 tarinfo.offset_data = self.offset
1916 self.offset += self._block(tarinfo.size)
1917 tarinfo.size = origsize
Andrew M. Kuchling864bba12004-07-10 22:02:11 +00001918
Georg Brandl38c6a222006-05-10 16:26:03 +00001919 # Clear the prefix field so that it is not used
1920 # as a pathname in next().
1921 tarinfo.prefix = ""
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001922
Georg Brandl38c6a222006-05-10 16:26:03 +00001923 return tarinfo
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001924
1925 #--------------------------------------------------------------------------
1926 # Little helper methods:
1927
1928 def _block(self, count):
1929 """Round up a byte count by BLOCKSIZE and return it,
1930 e.g. _block(834) => 1024.
1931 """
1932 blocks, remainder = divmod(count, BLOCKSIZE)
1933 if remainder:
1934 blocks += 1
1935 return blocks * BLOCKSIZE
1936
1937 def _getmember(self, name, tarinfo=None):
1938 """Find an archive member by name from bottom to top.
1939 If tarinfo is given, it is used as the starting point.
1940 """
Martin v. Löwisf3c56112004-09-18 09:08:52 +00001941 # Ensure that all members have been loaded.
1942 members = self.getmembers()
1943
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001944 if tarinfo is None:
Martin v. Löwisf3c56112004-09-18 09:08:52 +00001945 end = len(members)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001946 else:
Martin v. Löwisf3c56112004-09-18 09:08:52 +00001947 end = members.index(tarinfo)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001948
1949 for i in xrange(end - 1, -1, -1):
Martin v. Löwisf3c56112004-09-18 09:08:52 +00001950 if name == members[i].name:
1951 return members[i]
Andrew M. Kuchling864bba12004-07-10 22:02:11 +00001952
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001953 def _load(self):
1954 """Read through the entire archive file and look for readable
1955 members.
1956 """
1957 while True:
1958 tarinfo = self.next()
1959 if tarinfo is None:
1960 break
1961 self._loaded = True
1962
1963 def _check(self, mode=None):
1964 """Check if TarFile is still open, and if the operation's mode
1965 corresponds to TarFile's mode.
1966 """
1967 if self.closed:
Georg Brandle4751e32006-05-18 06:11:19 +00001968 raise IOError("%s is closed" % self.__class__.__name__)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001969 if mode is not None and self._mode not in mode:
Georg Brandle4751e32006-05-18 06:11:19 +00001970 raise IOError("bad operation for mode %r" % self._mode)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001971
1972 def __iter__(self):
1973 """Provide an iterator object.
1974 """
1975 if self._loaded:
1976 return iter(self.members)
1977 else:
1978 return TarIter(self)
1979
1980 def _create_gnulong(self, name, type):
1981 """Write a GNU longname/longlink member to the TarFile.
1982 It consists of an extended tar header, with the length
1983 of the longname as size, followed by data blocks,
1984 which contain the longname as a null terminated string.
1985 """
Neal Norwitz0662f8a2004-07-20 21:54:18 +00001986 name += NUL
1987
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001988 tarinfo = TarInfo()
1989 tarinfo.name = "././@LongLink"
1990 tarinfo.type = type
1991 tarinfo.mode = 0
1992 tarinfo.size = len(name)
1993
1994 # write extended header
1995 self.fileobj.write(tarinfo.tobuf())
Neal Norwitz0662f8a2004-07-20 21:54:18 +00001996 self.offset += BLOCKSIZE
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001997 # write name blocks
1998 self.fileobj.write(name)
1999 blocks, remainder = divmod(tarinfo.size, BLOCKSIZE)
2000 if remainder > 0:
2001 self.fileobj.write(NUL * (BLOCKSIZE - remainder))
2002 blocks += 1
2003 self.offset += blocks * BLOCKSIZE
2004
2005 def _dbg(self, level, msg):
2006 """Write debugging output to sys.stderr.
2007 """
2008 if level <= self.debug:
2009 print >> sys.stderr, msg
2010# class TarFile
2011
2012class TarIter:
2013 """Iterator Class.
2014
2015 for tarinfo in TarFile(...):
2016 suite...
2017 """
2018
2019 def __init__(self, tarfile):
2020 """Construct a TarIter object.
2021 """
2022 self.tarfile = tarfile
Martin v. Löwis637431b2005-03-03 23:12:42 +00002023 self.index = 0
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002024 def __iter__(self):
2025 """Return iterator object.
2026 """
2027 return self
2028 def next(self):
2029 """Return the next item using TarFile's next() method.
2030 When all members have been read, set TarFile as _loaded.
2031 """
Martin v. Löwis637431b2005-03-03 23:12:42 +00002032 # Fix for SF #1100429: Under rare circumstances it can
2033 # happen that getmembers() is called during iteration,
2034 # which will cause TarIter to stop prematurely.
2035 if not self.tarfile._loaded:
2036 tarinfo = self.tarfile.next()
2037 if not tarinfo:
2038 self.tarfile._loaded = True
2039 raise StopIteration
2040 else:
2041 try:
2042 tarinfo = self.tarfile.members[self.index]
2043 except IndexError:
2044 raise StopIteration
2045 self.index += 1
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002046 return tarinfo
2047
2048# Helper classes for sparse file support
2049class _section:
2050 """Base class for _data and _hole.
2051 """
2052 def __init__(self, offset, size):
2053 self.offset = offset
2054 self.size = size
2055 def __contains__(self, offset):
2056 return self.offset <= offset < self.offset + self.size
2057
2058class _data(_section):
2059 """Represent a data section in a sparse file.
2060 """
2061 def __init__(self, offset, size, realpos):
2062 _section.__init__(self, offset, size)
2063 self.realpos = realpos
2064
2065class _hole(_section):
2066 """Represent a hole section in a sparse file.
2067 """
2068 pass
2069
2070class _ringbuffer(list):
2071 """Ringbuffer class which increases performance
2072 over a regular list.
2073 """
2074 def __init__(self):
2075 self.idx = 0
2076 def find(self, offset):
2077 idx = self.idx
2078 while True:
2079 item = self[idx]
2080 if offset in item:
2081 break
2082 idx += 1
2083 if idx == len(self):
2084 idx = 0
2085 if idx == self.idx:
2086 # End of File
2087 return None
2088 self.idx = idx
2089 return item
2090
2091#---------------------------------------------
2092# zipfile compatible TarFile class
2093#---------------------------------------------
2094TAR_PLAIN = 0 # zipfile.ZIP_STORED
2095TAR_GZIPPED = 8 # zipfile.ZIP_DEFLATED
2096class TarFileCompat:
2097 """TarFile class compatible with standard module zipfile's
2098 ZipFile class.
2099 """
2100 def __init__(self, file, mode="r", compression=TAR_PLAIN):
2101 if compression == TAR_PLAIN:
2102 self.tarfile = TarFile.taropen(file, mode)
2103 elif compression == TAR_GZIPPED:
2104 self.tarfile = TarFile.gzopen(file, mode)
2105 else:
Georg Brandle4751e32006-05-18 06:11:19 +00002106 raise ValueError("unknown compression constant")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002107 if mode[0:1] == "r":
2108 members = self.tarfile.getmembers()
Raymond Hettingera1d09e22005-09-11 16:34:05 +00002109 for m in members:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002110 m.filename = m.name
2111 m.file_size = m.size
2112 m.date_time = time.gmtime(m.mtime)[:6]
2113 def namelist(self):
2114 return map(lambda m: m.name, self.infolist())
2115 def infolist(self):
2116 return filter(lambda m: m.type in REGULAR_TYPES,
2117 self.tarfile.getmembers())
2118 def printdir(self):
2119 self.tarfile.list()
2120 def testzip(self):
2121 return
2122 def getinfo(self, name):
2123 return self.tarfile.getmember(name)
2124 def read(self, name):
2125 return self.tarfile.extractfile(self.tarfile.getmember(name)).read()
2126 def write(self, filename, arcname=None, compress_type=None):
2127 self.tarfile.add(filename, arcname)
2128 def writestr(self, zinfo, bytes):
Raymond Hettingera6172712004-12-31 19:15:26 +00002129 try:
2130 from cStringIO import StringIO
2131 except ImportError:
2132 from StringIO import StringIO
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002133 import calendar
2134 zinfo.name = zinfo.filename
2135 zinfo.size = zinfo.file_size
2136 zinfo.mtime = calendar.timegm(zinfo.date_time)
Raymond Hettingera6172712004-12-31 19:15:26 +00002137 self.tarfile.addfile(zinfo, StringIO(bytes))
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002138 def close(self):
2139 self.tarfile.close()
2140#class TarFileCompat
2141
2142#--------------------
2143# exported functions
2144#--------------------
2145def is_tarfile(name):
2146 """Return True if name points to a tar archive that we
2147 are able to handle, else return False.
2148 """
2149 try:
2150 t = open(name)
2151 t.close()
2152 return True
2153 except TarError:
2154 return False
2155
2156open = TarFile.open