blob: 38cccae1d5ee06fd708cea681ff4c8d2092d5501 [file] [log] [blame]
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001#!/usr/bin/env python
2# -*- coding: iso-8859-1 -*-
3#-------------------------------------------------------------------
4# tarfile.py
5#-------------------------------------------------------------------
6# Copyright (C) 2002 Lars Gustäbel <lars@gustaebel.de>
7# All rights reserved.
8#
9# Permission is hereby granted, free of charge, to any person
10# obtaining a copy of this software and associated documentation
11# files (the "Software"), to deal in the Software without
12# restriction, including without limitation the rights to use,
13# copy, modify, merge, publish, distribute, sublicense, and/or sell
14# copies of the Software, and to permit persons to whom the
15# Software is furnished to do so, subject to the following
16# conditions:
17#
18# The above copyright notice and this permission notice shall be
19# included in all copies or substantial portions of the Software.
20#
21# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
22# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
23# OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
24# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
25# HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
26# WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
27# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
28# OTHER DEALINGS IN THE SOFTWARE.
29#
30"""Read from and write to tar format archives.
31"""
32
33__version__ = "$Revision$"
34# $Source$
35
Georg Brandl38c6a222006-05-10 16:26:03 +000036version = "0.8.0"
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +000037__author__ = "Lars Gustäbel (lars@gustaebel.de)"
38__date__ = "$Date$"
39__cvsid__ = "$Id$"
40__credits__ = "Gustavo Niemeyer, Niels Gustäbel, Richard Townsend."
41
42#---------
43# Imports
44#---------
45import sys
46import os
47import shutil
48import stat
49import errno
50import time
51import struct
52
Jack Jansencfc49022003-03-07 13:37:32 +000053if sys.platform == 'mac':
54 # This module needs work for MacOS9, especially in the area of pathname
55 # handling. In many places it is assumed a simple substitution of / by the
56 # local os.path.sep is good enough to convert pathnames, but this does not
57 # work with the mac rooted:path:name versus :nonrooted:path:name syntax
58 raise ImportError, "tarfile does not work for platform==mac"
59
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +000060try:
61 import grp, pwd
62except ImportError:
63 grp = pwd = None
64
65# from tarfile import *
66__all__ = ["TarFile", "TarInfo", "is_tarfile", "TarError"]
67
68#---------------------------------------------------------
69# tar constants
70#---------------------------------------------------------
71NUL = "\0" # the null character
72BLOCKSIZE = 512 # length of processing blocks
73RECORDSIZE = BLOCKSIZE * 20 # length of records
74MAGIC = "ustar" # magic tar string
75VERSION = "00" # version number
76
77LENGTH_NAME = 100 # maximum length of a filename
78LENGTH_LINK = 100 # maximum length of a linkname
79LENGTH_PREFIX = 155 # maximum length of the prefix field
80MAXSIZE_MEMBER = 077777777777L # maximum size of a file (11 octal digits)
81
82REGTYPE = "0" # regular file
83AREGTYPE = "\0" # regular file
84LNKTYPE = "1" # link (inside tarfile)
85SYMTYPE = "2" # symbolic link
86CHRTYPE = "3" # character special device
87BLKTYPE = "4" # block special device
88DIRTYPE = "5" # directory
89FIFOTYPE = "6" # fifo special device
90CONTTYPE = "7" # contiguous file
91
92GNUTYPE_LONGNAME = "L" # GNU tar extension for longnames
93GNUTYPE_LONGLINK = "K" # GNU tar extension for longlink
94GNUTYPE_SPARSE = "S" # GNU tar extension for sparse file
95
96#---------------------------------------------------------
97# tarfile constants
98#---------------------------------------------------------
99SUPPORTED_TYPES = (REGTYPE, AREGTYPE, LNKTYPE, # file types that tarfile
100 SYMTYPE, DIRTYPE, FIFOTYPE, # can cope with.
101 CONTTYPE, CHRTYPE, BLKTYPE,
102 GNUTYPE_LONGNAME, GNUTYPE_LONGLINK,
103 GNUTYPE_SPARSE)
104
105REGULAR_TYPES = (REGTYPE, AREGTYPE, # file types that somehow
106 CONTTYPE, GNUTYPE_SPARSE) # represent regular files
107
108#---------------------------------------------------------
109# Bits used in the mode field, values in octal.
110#---------------------------------------------------------
111S_IFLNK = 0120000 # symbolic link
112S_IFREG = 0100000 # regular file
113S_IFBLK = 0060000 # block device
114S_IFDIR = 0040000 # directory
115S_IFCHR = 0020000 # character device
116S_IFIFO = 0010000 # fifo
117
118TSUID = 04000 # set UID on execution
119TSGID = 02000 # set GID on execution
120TSVTX = 01000 # reserved
121
122TUREAD = 0400 # read by owner
123TUWRITE = 0200 # write by owner
124TUEXEC = 0100 # execute/search by owner
125TGREAD = 0040 # read by group
126TGWRITE = 0020 # write by group
127TGEXEC = 0010 # execute/search by group
128TOREAD = 0004 # read by other
129TOWRITE = 0002 # write by other
130TOEXEC = 0001 # execute/search by other
131
132#---------------------------------------------------------
133# Some useful functions
134#---------------------------------------------------------
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000135
Georg Brandl38c6a222006-05-10 16:26:03 +0000136def stn(s, length):
137 """Convert a python string to a null-terminated string buffer.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000138 """
Georg Brandle8953182006-05-27 14:02:03 +0000139 return s[:length-1] + (length - len(s) - 1) * NUL + NUL
Georg Brandl38c6a222006-05-10 16:26:03 +0000140
141def nti(s):
142 """Convert a number field to a python number.
143 """
144 # There are two possible encodings for a number field, see
145 # itn() below.
146 if s[0] != chr(0200):
147 n = int(s.rstrip(NUL) or "0", 8)
148 else:
149 n = 0L
150 for i in xrange(len(s) - 1):
151 n <<= 8
152 n += ord(s[i + 1])
153 return n
154
155def itn(n, digits=8, posix=False):
156 """Convert a python number to a number field.
157 """
158 # POSIX 1003.1-1988 requires numbers to be encoded as a string of
159 # octal digits followed by a null-byte, this allows values up to
160 # (8**(digits-1))-1. GNU tar allows storing numbers greater than
161 # that if necessary. A leading 0200 byte indicates this particular
162 # encoding, the following digits-1 bytes are a big-endian
163 # representation. This allows values up to (256**(digits-1))-1.
164 if 0 <= n < 8 ** (digits - 1):
165 s = "%0*o" % (digits - 1, n) + NUL
166 else:
167 if posix:
Georg Brandle4751e32006-05-18 06:11:19 +0000168 raise ValueError("overflow in number field")
Georg Brandl38c6a222006-05-10 16:26:03 +0000169
170 if n < 0:
171 # XXX We mimic GNU tar's behaviour with negative numbers,
172 # this could raise OverflowError.
173 n = struct.unpack("L", struct.pack("l", n))[0]
174
175 s = ""
176 for i in xrange(digits - 1):
177 s = chr(n & 0377) + s
178 n >>= 8
179 s = chr(0200) + s
180 return s
181
182def calc_chksums(buf):
183 """Calculate the checksum for a member's header by summing up all
184 characters except for the chksum field which is treated as if
185 it was filled with spaces. According to the GNU tar sources,
186 some tars (Sun and NeXT) calculate chksum with signed char,
187 which will be different if there are chars in the buffer with
188 the high bit set. So we calculate two checksums, unsigned and
189 signed.
190 """
191 unsigned_chksum = 256 + sum(struct.unpack("148B", buf[:148]) + struct.unpack("356B", buf[156:512]))
192 signed_chksum = 256 + sum(struct.unpack("148b", buf[:148]) + struct.unpack("356b", buf[156:512]))
193 return unsigned_chksum, signed_chksum
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000194
195def copyfileobj(src, dst, length=None):
196 """Copy length bytes from fileobj src to fileobj dst.
197 If length is None, copy the entire content.
198 """
199 if length == 0:
200 return
201 if length is None:
202 shutil.copyfileobj(src, dst)
203 return
204
205 BUFSIZE = 16 * 1024
206 blocks, remainder = divmod(length, BUFSIZE)
207 for b in xrange(blocks):
208 buf = src.read(BUFSIZE)
209 if len(buf) < BUFSIZE:
Georg Brandle4751e32006-05-18 06:11:19 +0000210 raise IOError("end of file reached")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000211 dst.write(buf)
212
213 if remainder != 0:
214 buf = src.read(remainder)
215 if len(buf) < remainder:
Georg Brandle4751e32006-05-18 06:11:19 +0000216 raise IOError("end of file reached")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000217 dst.write(buf)
218 return
219
220filemode_table = (
Andrew M. Kuchling8bc462f2004-10-20 11:48:42 +0000221 ((S_IFLNK, "l"),
222 (S_IFREG, "-"),
223 (S_IFBLK, "b"),
224 (S_IFDIR, "d"),
225 (S_IFCHR, "c"),
226 (S_IFIFO, "p")),
227
228 ((TUREAD, "r"),),
229 ((TUWRITE, "w"),),
230 ((TUEXEC|TSUID, "s"),
231 (TSUID, "S"),
232 (TUEXEC, "x")),
233
234 ((TGREAD, "r"),),
235 ((TGWRITE, "w"),),
236 ((TGEXEC|TSGID, "s"),
237 (TSGID, "S"),
238 (TGEXEC, "x")),
239
240 ((TOREAD, "r"),),
241 ((TOWRITE, "w"),),
242 ((TOEXEC|TSVTX, "t"),
243 (TSVTX, "T"),
244 (TOEXEC, "x"))
245)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000246
247def filemode(mode):
248 """Convert a file's mode to a string of the form
249 -rwxrwxrwx.
250 Used by TarFile.list()
251 """
Andrew M. Kuchling8bc462f2004-10-20 11:48:42 +0000252 perm = []
253 for table in filemode_table:
254 for bit, char in table:
255 if mode & bit == bit:
256 perm.append(char)
257 break
258 else:
259 perm.append("-")
260 return "".join(perm)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000261
262if os.sep != "/":
263 normpath = lambda path: os.path.normpath(path).replace(os.sep, "/")
264else:
265 normpath = os.path.normpath
266
267class TarError(Exception):
268 """Base exception."""
269 pass
270class ExtractError(TarError):
271 """General exception for extract errors."""
272 pass
273class ReadError(TarError):
274 """Exception for unreadble tar archives."""
275 pass
276class CompressionError(TarError):
277 """Exception for unavailable compression methods."""
278 pass
279class StreamError(TarError):
280 """Exception for unsupported operations on stream-like TarFiles."""
281 pass
282
283#---------------------------
284# internal stream interface
285#---------------------------
286class _LowLevelFile:
287 """Low-level file object. Supports reading and writing.
288 It is used instead of a regular file object for streaming
289 access.
290 """
291
292 def __init__(self, name, mode):
293 mode = {
294 "r": os.O_RDONLY,
295 "w": os.O_WRONLY | os.O_CREAT | os.O_TRUNC,
296 }[mode]
297 if hasattr(os, "O_BINARY"):
298 mode |= os.O_BINARY
299 self.fd = os.open(name, mode)
300
301 def close(self):
302 os.close(self.fd)
303
304 def read(self, size):
305 return os.read(self.fd, size)
306
307 def write(self, s):
308 os.write(self.fd, s)
309
310class _Stream:
311 """Class that serves as an adapter between TarFile and
312 a stream-like object. The stream-like object only
313 needs to have a read() or write() method and is accessed
314 blockwise. Use of gzip or bzip2 compression is possible.
315 A stream-like object could be for example: sys.stdin,
316 sys.stdout, a socket, a tape device etc.
317
318 _Stream is intended to be used only internally.
319 """
320
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000321 def __init__(self, name, mode, comptype, fileobj, bufsize):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000322 """Construct a _Stream object.
323 """
324 self._extfileobj = True
325 if fileobj is None:
326 fileobj = _LowLevelFile(name, mode)
327 self._extfileobj = False
328
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000329 if comptype == '*':
330 # Enable transparent compression detection for the
331 # stream interface
332 fileobj = _StreamProxy(fileobj)
333 comptype = fileobj.getcomptype()
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000334
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000335 self.name = name or ""
336 self.mode = mode
337 self.comptype = comptype
338 self.fileobj = fileobj
339 self.bufsize = bufsize
340 self.buf = ""
341 self.pos = 0L
342 self.closed = False
343
344 if comptype == "gz":
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000345 try:
346 import zlib
347 except ImportError:
Georg Brandle4751e32006-05-18 06:11:19 +0000348 raise CompressionError("zlib module is not available")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000349 self.zlib = zlib
350 self.crc = zlib.crc32("")
351 if mode == "r":
352 self._init_read_gz()
353 else:
354 self._init_write_gz()
355
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000356 if comptype == "bz2":
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000357 try:
358 import bz2
359 except ImportError:
Georg Brandle4751e32006-05-18 06:11:19 +0000360 raise CompressionError("bz2 module is not available")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000361 if mode == "r":
362 self.dbuf = ""
363 self.cmp = bz2.BZ2Decompressor()
364 else:
365 self.cmp = bz2.BZ2Compressor()
366
367 def __del__(self):
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000368 if hasattr(self, "closed") and not self.closed:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000369 self.close()
370
371 def _init_write_gz(self):
372 """Initialize for writing with gzip compression.
373 """
374 self.cmp = self.zlib.compressobj(9, self.zlib.DEFLATED,
375 -self.zlib.MAX_WBITS,
376 self.zlib.DEF_MEM_LEVEL,
377 0)
378 timestamp = struct.pack("<L", long(time.time()))
379 self.__write("\037\213\010\010%s\002\377" % timestamp)
380 if self.name.endswith(".gz"):
381 self.name = self.name[:-3]
382 self.__write(self.name + NUL)
383
384 def write(self, s):
385 """Write string s to the stream.
386 """
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000387 if self.comptype == "gz":
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000388 self.crc = self.zlib.crc32(s, self.crc)
389 self.pos += len(s)
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000390 if self.comptype != "tar":
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000391 s = self.cmp.compress(s)
392 self.__write(s)
393
394 def __write(self, s):
395 """Write string s to the stream if a whole new block
396 is ready to be written.
397 """
398 self.buf += s
399 while len(self.buf) > self.bufsize:
400 self.fileobj.write(self.buf[:self.bufsize])
401 self.buf = self.buf[self.bufsize:]
402
403 def close(self):
404 """Close the _Stream object. No operation should be
405 done on it afterwards.
406 """
407 if self.closed:
408 return
409
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000410 if self.mode == "w" and self.comptype != "tar":
Martin v. Löwisc234a522004-08-22 21:28:33 +0000411 self.buf += self.cmp.flush()
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000412
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000413 if self.mode == "w" and self.buf:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000414 self.fileobj.write(self.buf)
415 self.buf = ""
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000416 if self.comptype == "gz":
Tim Petersa05f6e22006-08-02 05:20:08 +0000417 # The native zlib crc is an unsigned 32-bit integer, but
418 # the Python wrapper implicitly casts that to a signed C
419 # long. So, on a 32-bit box self.crc may "look negative",
420 # while the same crc on a 64-bit box may "look positive".
421 # To avoid irksome warnings from the `struct` module, force
422 # it to look positive on all boxes.
423 self.fileobj.write(struct.pack("<L", self.crc & 0xffffffffL))
Andrew M. Kuchling10a44492003-10-24 17:38:34 +0000424 self.fileobj.write(struct.pack("<L", self.pos & 0xffffFFFFL))
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000425
426 if not self._extfileobj:
427 self.fileobj.close()
428
429 self.closed = True
430
431 def _init_read_gz(self):
432 """Initialize for reading a gzip compressed fileobj.
433 """
434 self.cmp = self.zlib.decompressobj(-self.zlib.MAX_WBITS)
435 self.dbuf = ""
436
437 # taken from gzip.GzipFile with some alterations
438 if self.__read(2) != "\037\213":
Georg Brandle4751e32006-05-18 06:11:19 +0000439 raise ReadError("not a gzip file")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000440 if self.__read(1) != "\010":
Georg Brandle4751e32006-05-18 06:11:19 +0000441 raise CompressionError("unsupported compression method")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000442
443 flag = ord(self.__read(1))
444 self.__read(6)
445
446 if flag & 4:
447 xlen = ord(self.__read(1)) + 256 * ord(self.__read(1))
448 self.read(xlen)
449 if flag & 8:
450 while True:
451 s = self.__read(1)
452 if not s or s == NUL:
453 break
454 if flag & 16:
455 while True:
456 s = self.__read(1)
457 if not s or s == NUL:
458 break
459 if flag & 2:
460 self.__read(2)
461
462 def tell(self):
463 """Return the stream's file pointer position.
464 """
465 return self.pos
466
467 def seek(self, pos=0):
468 """Set the stream's file pointer to pos. Negative seeking
469 is forbidden.
470 """
471 if pos - self.pos >= 0:
472 blocks, remainder = divmod(pos - self.pos, self.bufsize)
473 for i in xrange(blocks):
474 self.read(self.bufsize)
475 self.read(remainder)
476 else:
Georg Brandle4751e32006-05-18 06:11:19 +0000477 raise StreamError("seeking backwards is not allowed")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000478 return self.pos
479
480 def read(self, size=None):
481 """Return the next size number of bytes from the stream.
482 If size is not defined, return all bytes of the stream
483 up to EOF.
484 """
485 if size is None:
486 t = []
487 while True:
488 buf = self._read(self.bufsize)
489 if not buf:
490 break
491 t.append(buf)
492 buf = "".join(t)
493 else:
494 buf = self._read(size)
495 self.pos += len(buf)
496 return buf
497
498 def _read(self, size):
499 """Return size bytes from the stream.
500 """
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000501 if self.comptype == "tar":
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000502 return self.__read(size)
503
504 c = len(self.dbuf)
505 t = [self.dbuf]
506 while c < size:
507 buf = self.__read(self.bufsize)
508 if not buf:
509 break
510 buf = self.cmp.decompress(buf)
511 t.append(buf)
512 c += len(buf)
513 t = "".join(t)
514 self.dbuf = t[size:]
515 return t[:size]
516
517 def __read(self, size):
518 """Return size bytes from stream. If internal buffer is empty,
519 read another block from the stream.
520 """
521 c = len(self.buf)
522 t = [self.buf]
523 while c < size:
524 buf = self.fileobj.read(self.bufsize)
525 if not buf:
526 break
527 t.append(buf)
528 c += len(buf)
529 t = "".join(t)
530 self.buf = t[size:]
531 return t[:size]
532# class _Stream
533
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000534class _StreamProxy(object):
535 """Small proxy class that enables transparent compression
536 detection for the Stream interface (mode 'r|*').
537 """
538
539 def __init__(self, fileobj):
540 self.fileobj = fileobj
541 self.buf = self.fileobj.read(BLOCKSIZE)
542
543 def read(self, size):
544 self.read = self.fileobj.read
545 return self.buf
546
547 def getcomptype(self):
548 if self.buf.startswith("\037\213\010"):
549 return "gz"
550 if self.buf.startswith("BZh91"):
551 return "bz2"
552 return "tar"
553
554 def close(self):
555 self.fileobj.close()
556# class StreamProxy
557
Georg Brandl49c8f4c2006-05-15 19:30:35 +0000558class _BZ2Proxy(object):
559 """Small proxy class that enables external file object
560 support for "r:bz2" and "w:bz2" modes. This is actually
561 a workaround for a limitation in bz2 module's BZ2File
562 class which (unlike gzip.GzipFile) has no support for
563 a file object argument.
564 """
565
566 blocksize = 16 * 1024
567
568 def __init__(self, fileobj, mode):
569 self.fileobj = fileobj
570 self.mode = mode
571 self.init()
572
573 def init(self):
574 import bz2
575 self.pos = 0
576 if self.mode == "r":
577 self.bz2obj = bz2.BZ2Decompressor()
578 self.fileobj.seek(0)
579 self.buf = ""
580 else:
581 self.bz2obj = bz2.BZ2Compressor()
582
583 def read(self, size):
584 b = [self.buf]
585 x = len(self.buf)
586 while x < size:
587 try:
588 raw = self.fileobj.read(self.blocksize)
589 data = self.bz2obj.decompress(raw)
590 b.append(data)
591 except EOFError:
592 break
593 x += len(data)
594 self.buf = "".join(b)
595
596 buf = self.buf[:size]
597 self.buf = self.buf[size:]
598 self.pos += len(buf)
599 return buf
600
601 def seek(self, pos):
602 if pos < self.pos:
603 self.init()
604 self.read(pos - self.pos)
605
606 def tell(self):
607 return self.pos
608
609 def write(self, data):
610 self.pos += len(data)
611 raw = self.bz2obj.compress(data)
612 self.fileobj.write(raw)
613
614 def close(self):
615 if self.mode == "w":
616 raw = self.bz2obj.flush()
617 self.fileobj.write(raw)
Georg Brandle8953182006-05-27 14:02:03 +0000618 self.fileobj.close()
Georg Brandl49c8f4c2006-05-15 19:30:35 +0000619# class _BZ2Proxy
620
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000621#------------------------
622# Extraction file object
623#------------------------
624class ExFileObject(object):
625 """File-like object for reading an archive member.
626 Is returned by TarFile.extractfile(). Support for
627 sparse files included.
628 """
629
630 def __init__(self, tarfile, tarinfo):
631 self.fileobj = tarfile.fileobj
632 self.name = tarinfo.name
633 self.mode = "r"
634 self.closed = False
635 self.offset = tarinfo.offset_data
636 self.size = tarinfo.size
637 self.pos = 0L
638 self.linebuffer = ""
639 if tarinfo.issparse():
640 self.sparse = tarinfo.sparse
641 self.read = self._readsparse
642 else:
643 self.read = self._readnormal
644
645 def __read(self, size):
646 """Overloadable read method.
647 """
648 return self.fileobj.read(size)
649
650 def readline(self, size=-1):
651 """Read a line with approx. size. If size is negative,
652 read a whole line. readline() and read() must not
653 be mixed up (!).
654 """
655 if size < 0:
656 size = sys.maxint
657
658 nl = self.linebuffer.find("\n")
659 if nl >= 0:
660 nl = min(nl, size)
661 else:
662 size -= len(self.linebuffer)
Martin v. Löwisc11d6f12004-08-25 10:52:58 +0000663 while (nl < 0 and size > 0):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000664 buf = self.read(min(size, 100))
665 if not buf:
666 break
667 self.linebuffer += buf
668 size -= len(buf)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000669 nl = self.linebuffer.find("\n")
670 if nl == -1:
671 s = self.linebuffer
672 self.linebuffer = ""
673 return s
674 buf = self.linebuffer[:nl]
675 self.linebuffer = self.linebuffer[nl + 1:]
676 while buf[-1:] == "\r":
677 buf = buf[:-1]
678 return buf + "\n"
679
680 def readlines(self):
681 """Return a list with all (following) lines.
682 """
683 result = []
684 while True:
685 line = self.readline()
686 if not line: break
687 result.append(line)
688 return result
689
690 def _readnormal(self, size=None):
691 """Read operation for regular files.
692 """
693 if self.closed:
Georg Brandle4751e32006-05-18 06:11:19 +0000694 raise ValueError("file is closed")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000695 self.fileobj.seek(self.offset + self.pos)
696 bytesleft = self.size - self.pos
697 if size is None:
698 bytestoread = bytesleft
699 else:
700 bytestoread = min(size, bytesleft)
701 self.pos += bytestoread
702 return self.__read(bytestoread)
703
704 def _readsparse(self, size=None):
705 """Read operation for sparse files.
706 """
707 if self.closed:
Georg Brandle4751e32006-05-18 06:11:19 +0000708 raise ValueError("file is closed")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000709
710 if size is None:
711 size = self.size - self.pos
712
713 data = []
714 while size > 0:
715 buf = self._readsparsesection(size)
716 if not buf:
717 break
718 size -= len(buf)
719 data.append(buf)
720 return "".join(data)
721
722 def _readsparsesection(self, size):
723 """Read a single section of a sparse file.
724 """
725 section = self.sparse.find(self.pos)
726
727 if section is None:
728 return ""
729
730 toread = min(size, section.offset + section.size - self.pos)
731 if isinstance(section, _data):
732 realpos = section.realpos + self.pos - section.offset
733 self.pos += toread
734 self.fileobj.seek(self.offset + realpos)
735 return self.__read(toread)
736 else:
737 self.pos += toread
738 return NUL * toread
739
740 def tell(self):
741 """Return the current file position.
742 """
743 return self.pos
744
745 def seek(self, pos, whence=0):
746 """Seek to a position in the file.
747 """
748 self.linebuffer = ""
749 if whence == 0:
750 self.pos = min(max(pos, 0), self.size)
751 if whence == 1:
752 if pos < 0:
753 self.pos = max(self.pos + pos, 0)
754 else:
755 self.pos = min(self.pos + pos, self.size)
756 if whence == 2:
757 self.pos = max(min(self.size + pos, self.size), 0)
758
759 def close(self):
760 """Close the file object.
761 """
762 self.closed = True
Martin v. Löwisdf241532005-03-03 08:17:42 +0000763
764 def __iter__(self):
765 """Get an iterator over the file object.
766 """
767 if self.closed:
Georg Brandle4751e32006-05-18 06:11:19 +0000768 raise ValueError("I/O operation on closed file")
Martin v. Löwisdf241532005-03-03 08:17:42 +0000769 return self
770
771 def next(self):
772 """Get the next item from the file iterator.
773 """
774 result = self.readline()
775 if not result:
776 raise StopIteration
777 return result
Tim Peterseba28be2005-03-28 01:08:02 +0000778
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000779#class ExFileObject
780
781#------------------
782# Exported Classes
783#------------------
784class TarInfo(object):
785 """Informational class which holds the details about an
786 archive member given by a tar header block.
787 TarInfo objects are returned by TarFile.getmember(),
788 TarFile.getmembers() and TarFile.gettarinfo() and are
789 usually created internally.
790 """
791
792 def __init__(self, name=""):
793 """Construct a TarInfo object. name is the optional name
794 of the member.
795 """
796
Georg Brandl38c6a222006-05-10 16:26:03 +0000797 self.name = name # member name (dirnames must end with '/')
798 self.mode = 0666 # file permissions
799 self.uid = 0 # user id
800 self.gid = 0 # group id
801 self.size = 0 # file size
802 self.mtime = 0 # modification time
803 self.chksum = 0 # header checksum
804 self.type = REGTYPE # member type
805 self.linkname = "" # link name
806 self.uname = "user" # user name
807 self.gname = "group" # group name
808 self.devmajor = 0 # device major number
809 self.devminor = 0 # device minor number
810 self.prefix = "" # prefix to filename or information
811 # about sparse files
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000812
Georg Brandl38c6a222006-05-10 16:26:03 +0000813 self.offset = 0 # the tar header starts here
814 self.offset_data = 0 # the file's data starts here
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000815
816 def __repr__(self):
817 return "<%s %r at %#x>" % (self.__class__.__name__,self.name,id(self))
818
Guido van Rossum75b64e62005-01-16 00:16:11 +0000819 @classmethod
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000820 def frombuf(cls, buf):
821 """Construct a TarInfo object from a 512 byte string buffer.
822 """
Georg Brandl38c6a222006-05-10 16:26:03 +0000823 if len(buf) != BLOCKSIZE:
Georg Brandle4751e32006-05-18 06:11:19 +0000824 raise ValueError("truncated header")
Georg Brandl38c6a222006-05-10 16:26:03 +0000825 if buf.count(NUL) == BLOCKSIZE:
Georg Brandle4751e32006-05-18 06:11:19 +0000826 raise ValueError("empty header")
Georg Brandl38c6a222006-05-10 16:26:03 +0000827
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000828 tarinfo = cls()
Georg Brandl38c6a222006-05-10 16:26:03 +0000829 tarinfo.buf = buf
Georg Brandle8953182006-05-27 14:02:03 +0000830 tarinfo.name = buf[0:100].rstrip(NUL)
Georg Brandl38c6a222006-05-10 16:26:03 +0000831 tarinfo.mode = nti(buf[100:108])
832 tarinfo.uid = nti(buf[108:116])
833 tarinfo.gid = nti(buf[116:124])
834 tarinfo.size = nti(buf[124:136])
835 tarinfo.mtime = nti(buf[136:148])
836 tarinfo.chksum = nti(buf[148:156])
837 tarinfo.type = buf[156:157]
Georg Brandle8953182006-05-27 14:02:03 +0000838 tarinfo.linkname = buf[157:257].rstrip(NUL)
839 tarinfo.uname = buf[265:297].rstrip(NUL)
840 tarinfo.gname = buf[297:329].rstrip(NUL)
Georg Brandl38c6a222006-05-10 16:26:03 +0000841 tarinfo.devmajor = nti(buf[329:337])
842 tarinfo.devminor = nti(buf[337:345])
Jack Jansen2b4b5a52003-04-22 22:03:11 +0000843 tarinfo.prefix = buf[345:500]
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000844
Georg Brandl38c6a222006-05-10 16:26:03 +0000845 if tarinfo.chksum not in calc_chksums(buf):
Georg Brandle4751e32006-05-18 06:11:19 +0000846 raise ValueError("invalid header")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000847 return tarinfo
848
Georg Brandl38c6a222006-05-10 16:26:03 +0000849 def tobuf(self, posix=False):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000850 """Return a tar header block as a 512 byte string.
851 """
Georg Brandl38c6a222006-05-10 16:26:03 +0000852 parts = [
853 stn(self.name, 100),
854 itn(self.mode & 07777, 8, posix),
855 itn(self.uid, 8, posix),
856 itn(self.gid, 8, posix),
857 itn(self.size, 12, posix),
858 itn(self.mtime, 12, posix),
859 " ", # checksum field
860 self.type,
861 stn(self.linkname, 100),
862 stn(MAGIC, 6),
863 stn(VERSION, 2),
864 stn(self.uname, 32),
865 stn(self.gname, 32),
866 itn(self.devmajor, 8, posix),
867 itn(self.devminor, 8, posix),
868 stn(self.prefix, 155)
869 ]
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000870
Georg Brandl38c6a222006-05-10 16:26:03 +0000871 buf = struct.pack("%ds" % BLOCKSIZE, "".join(parts))
872 chksum = calc_chksums(buf)[0]
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000873 buf = buf[:148] + "%06o\0" % chksum + buf[155:]
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000874 self.buf = buf
875 return buf
876
877 def isreg(self):
878 return self.type in REGULAR_TYPES
879 def isfile(self):
880 return self.isreg()
881 def isdir(self):
882 return self.type == DIRTYPE
883 def issym(self):
884 return self.type == SYMTYPE
885 def islnk(self):
886 return self.type == LNKTYPE
887 def ischr(self):
888 return self.type == CHRTYPE
889 def isblk(self):
890 return self.type == BLKTYPE
891 def isfifo(self):
892 return self.type == FIFOTYPE
893 def issparse(self):
894 return self.type == GNUTYPE_SPARSE
895 def isdev(self):
896 return self.type in (CHRTYPE, BLKTYPE, FIFOTYPE)
897# class TarInfo
898
899class TarFile(object):
900 """The TarFile Class provides an interface to tar archives.
901 """
902
903 debug = 0 # May be set from 0 (no msgs) to 3 (all msgs)
904
905 dereference = False # If true, add content of linked file to the
906 # tar file, else the link.
907
908 ignore_zeros = False # If true, skips empty or invalid blocks and
909 # continues processing.
910
911 errorlevel = 0 # If 0, fatal errors only appear in debug
912 # messages (if debug >= 0). If > 0, errors
913 # are passed to the caller as exceptions.
914
Martin v. Löwis75b9da42004-08-18 13:57:44 +0000915 posix = False # If True, generates POSIX.1-1990-compliant
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000916 # archives (no GNU extensions!)
917
918 fileobject = ExFileObject
919
920 def __init__(self, name=None, mode="r", fileobj=None):
921 """Open an (uncompressed) tar archive `name'. `mode' is either 'r' to
922 read from an existing archive, 'a' to append data to an existing
923 file or 'w' to create a new file overwriting an existing one. `mode'
924 defaults to 'r'.
925 If `fileobj' is given, it is used for reading or writing data. If it
926 can be determined, `mode' is overridden by `fileobj's mode.
927 `fileobj' is not closed, when TarFile is closed.
928 """
Martin v. Löwisfaffa152005-08-24 06:43:09 +0000929 self.name = name
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000930
931 if len(mode) > 1 or mode not in "raw":
Georg Brandle4751e32006-05-18 06:11:19 +0000932 raise ValueError("mode must be 'r', 'a' or 'w'")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000933 self._mode = mode
934 self.mode = {"r": "rb", "a": "r+b", "w": "wb"}[mode]
935
936 if not fileobj:
937 fileobj = file(self.name, self.mode)
938 self._extfileobj = False
939 else:
940 if self.name is None and hasattr(fileobj, "name"):
Martin v. Löwisfaffa152005-08-24 06:43:09 +0000941 self.name = fileobj.name
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000942 if hasattr(fileobj, "mode"):
943 self.mode = fileobj.mode
944 self._extfileobj = True
945 self.fileobj = fileobj
946
947 # Init datastructures
Georg Brandl38c6a222006-05-10 16:26:03 +0000948 self.closed = False
949 self.members = [] # list of members as TarInfo objects
950 self._loaded = False # flag if all members have been read
951 self.offset = 0L # current position in the archive file
952 self.inodes = {} # dictionary caching the inodes of
953 # archive members already added
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000954
955 if self._mode == "r":
956 self.firstmember = None
957 self.firstmember = self.next()
958
959 if self._mode == "a":
960 # Move to the end of the archive,
961 # before the first empty block.
962 self.firstmember = None
963 while True:
964 try:
965 tarinfo = self.next()
966 except ReadError:
967 self.fileobj.seek(0)
968 break
969 if tarinfo is None:
970 self.fileobj.seek(- BLOCKSIZE, 1)
971 break
972
973 if self._mode in "aw":
974 self._loaded = True
975
976 #--------------------------------------------------------------------------
977 # Below are the classmethods which act as alternate constructors to the
978 # TarFile class. The open() method is the only one that is needed for
979 # public use; it is the "super"-constructor and is able to select an
980 # adequate "sub"-constructor for a particular compression using the mapping
981 # from OPEN_METH.
982 #
983 # This concept allows one to subclass TarFile without losing the comfort of
984 # the super-constructor. A sub-constructor is registered and made available
985 # by adding it to the mapping in OPEN_METH.
986
Guido van Rossum75b64e62005-01-16 00:16:11 +0000987 @classmethod
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000988 def open(cls, name=None, mode="r", fileobj=None, bufsize=20*512):
989 """Open a tar archive for reading, writing or appending. Return
990 an appropriate TarFile class.
991
992 mode:
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000993 'r' or 'r:*' open for reading with transparent compression
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000994 'r:' open for reading exclusively uncompressed
995 'r:gz' open for reading with gzip compression
996 'r:bz2' open for reading with bzip2 compression
997 'a' or 'a:' open for appending
998 'w' or 'w:' open for writing without compression
999 'w:gz' open for writing with gzip compression
1000 'w:bz2' open for writing with bzip2 compression
Martin v. Löwis78be7df2005-03-05 12:47:42 +00001001
1002 'r|*' open a stream of tar blocks with transparent compression
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001003 'r|' open an uncompressed stream of tar blocks for reading
1004 'r|gz' open a gzip compressed stream of tar blocks
1005 'r|bz2' open a bzip2 compressed stream of tar blocks
1006 'w|' open an uncompressed stream for writing
1007 'w|gz' open a gzip compressed stream for writing
1008 'w|bz2' open a bzip2 compressed stream for writing
1009 """
1010
1011 if not name and not fileobj:
Georg Brandle4751e32006-05-18 06:11:19 +00001012 raise ValueError("nothing to open")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001013
Martin v. Löwis78be7df2005-03-05 12:47:42 +00001014 if mode in ("r", "r:*"):
1015 # Find out which *open() is appropriate for opening the file.
1016 for comptype in cls.OPEN_METH:
1017 func = getattr(cls, cls.OPEN_METH[comptype])
1018 try:
1019 return func(name, "r", fileobj)
1020 except (ReadError, CompressionError):
1021 continue
Georg Brandle4751e32006-05-18 06:11:19 +00001022 raise ReadError("file could not be opened successfully")
Martin v. Löwis78be7df2005-03-05 12:47:42 +00001023
1024 elif ":" in mode:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001025 filemode, comptype = mode.split(":", 1)
1026 filemode = filemode or "r"
1027 comptype = comptype or "tar"
1028
1029 # Select the *open() function according to
1030 # given compression.
1031 if comptype in cls.OPEN_METH:
1032 func = getattr(cls, cls.OPEN_METH[comptype])
1033 else:
Georg Brandle4751e32006-05-18 06:11:19 +00001034 raise CompressionError("unknown compression type %r" % comptype)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001035 return func(name, filemode, fileobj)
1036
1037 elif "|" in mode:
1038 filemode, comptype = mode.split("|", 1)
1039 filemode = filemode or "r"
1040 comptype = comptype or "tar"
1041
1042 if filemode not in "rw":
Georg Brandle4751e32006-05-18 06:11:19 +00001043 raise ValueError("mode must be 'r' or 'w'")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001044
1045 t = cls(name, filemode,
1046 _Stream(name, filemode, comptype, fileobj, bufsize))
1047 t._extfileobj = False
1048 return t
1049
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001050 elif mode in "aw":
1051 return cls.taropen(name, mode, fileobj)
1052
Georg Brandle4751e32006-05-18 06:11:19 +00001053 raise ValueError("undiscernible mode")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001054
Guido van Rossum75b64e62005-01-16 00:16:11 +00001055 @classmethod
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001056 def taropen(cls, name, mode="r", fileobj=None):
1057 """Open uncompressed tar archive name for reading or writing.
1058 """
1059 if len(mode) > 1 or mode not in "raw":
Georg Brandle4751e32006-05-18 06:11:19 +00001060 raise ValueError("mode must be 'r', 'a' or 'w'")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001061 return cls(name, mode, fileobj)
1062
Guido van Rossum75b64e62005-01-16 00:16:11 +00001063 @classmethod
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001064 def gzopen(cls, name, mode="r", fileobj=None, compresslevel=9):
1065 """Open gzip compressed tar archive name for reading or writing.
1066 Appending is not allowed.
1067 """
1068 if len(mode) > 1 or mode not in "rw":
Georg Brandle4751e32006-05-18 06:11:19 +00001069 raise ValueError("mode must be 'r' or 'w'")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001070
1071 try:
1072 import gzip
Neal Norwitz4ec68242003-04-11 03:05:56 +00001073 gzip.GzipFile
1074 except (ImportError, AttributeError):
Georg Brandle4751e32006-05-18 06:11:19 +00001075 raise CompressionError("gzip module is not available")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001076
1077 pre, ext = os.path.splitext(name)
Martin v. Löwisfaffa152005-08-24 06:43:09 +00001078 pre = os.path.basename(pre)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001079 if ext == ".tgz":
1080 ext = ".tar"
1081 if ext == ".gz":
1082 ext = ""
Martin v. Löwisfaffa152005-08-24 06:43:09 +00001083 tarname = pre + ext
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001084
1085 if fileobj is None:
1086 fileobj = file(name, mode + "b")
1087
Martin v. Löwisfaffa152005-08-24 06:43:09 +00001088 if mode != "r":
1089 name = tarname
1090
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001091 try:
Martin v. Löwisfaffa152005-08-24 06:43:09 +00001092 t = cls.taropen(tarname, mode,
1093 gzip.GzipFile(name, mode, compresslevel, fileobj)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001094 )
1095 except IOError:
Georg Brandle4751e32006-05-18 06:11:19 +00001096 raise ReadError("not a gzip file")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001097 t._extfileobj = False
1098 return t
1099
Guido van Rossum75b64e62005-01-16 00:16:11 +00001100 @classmethod
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001101 def bz2open(cls, name, mode="r", fileobj=None, compresslevel=9):
1102 """Open bzip2 compressed tar archive name for reading or writing.
1103 Appending is not allowed.
1104 """
1105 if len(mode) > 1 or mode not in "rw":
Georg Brandle4751e32006-05-18 06:11:19 +00001106 raise ValueError("mode must be 'r' or 'w'.")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001107
1108 try:
1109 import bz2
1110 except ImportError:
Georg Brandle4751e32006-05-18 06:11:19 +00001111 raise CompressionError("bz2 module is not available")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001112
Martin v. Löwisfaffa152005-08-24 06:43:09 +00001113 pre, ext = os.path.splitext(name)
1114 pre = os.path.basename(pre)
1115 if ext == ".tbz2":
1116 ext = ".tar"
1117 if ext == ".bz2":
1118 ext = ""
1119 tarname = pre + ext
1120
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001121 if fileobj is not None:
Georg Brandl49c8f4c2006-05-15 19:30:35 +00001122 fileobj = _BZ2Proxy(fileobj, mode)
1123 else:
1124 fileobj = bz2.BZ2File(name, mode, compresslevel=compresslevel)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001125
1126 try:
Georg Brandl49c8f4c2006-05-15 19:30:35 +00001127 t = cls.taropen(tarname, mode, fileobj)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001128 except IOError:
Georg Brandle4751e32006-05-18 06:11:19 +00001129 raise ReadError("not a bzip2 file")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001130 t._extfileobj = False
1131 return t
1132
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001133 # All *open() methods are registered here.
1134 OPEN_METH = {
1135 "tar": "taropen", # uncompressed tar
1136 "gz": "gzopen", # gzip compressed tar
1137 "bz2": "bz2open" # bzip2 compressed tar
1138 }
1139
1140 #--------------------------------------------------------------------------
1141 # The public methods which TarFile provides:
1142
1143 def close(self):
1144 """Close the TarFile. In write-mode, two finishing zero blocks are
1145 appended to the archive.
1146 """
1147 if self.closed:
1148 return
1149
1150 if self._mode in "aw":
1151 self.fileobj.write(NUL * (BLOCKSIZE * 2))
1152 self.offset += (BLOCKSIZE * 2)
1153 # fill up the end with zero-blocks
1154 # (like option -b20 for tar does)
1155 blocks, remainder = divmod(self.offset, RECORDSIZE)
1156 if remainder > 0:
1157 self.fileobj.write(NUL * (RECORDSIZE - remainder))
1158
1159 if not self._extfileobj:
1160 self.fileobj.close()
1161 self.closed = True
1162
1163 def getmember(self, name):
1164 """Return a TarInfo object for member `name'. If `name' can not be
1165 found in the archive, KeyError is raised. If a member occurs more
1166 than once in the archive, its last occurence is assumed to be the
1167 most up-to-date version.
1168 """
Martin v. Löwisf3c56112004-09-18 09:08:52 +00001169 tarinfo = self._getmember(name)
1170 if tarinfo is None:
Georg Brandle4751e32006-05-18 06:11:19 +00001171 raise KeyError("filename %r not found" % name)
Martin v. Löwisf3c56112004-09-18 09:08:52 +00001172 return tarinfo
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001173
1174 def getmembers(self):
1175 """Return the members of the archive as a list of TarInfo objects. The
1176 list has the same order as the members in the archive.
1177 """
1178 self._check()
1179 if not self._loaded: # if we want to obtain a list of
1180 self._load() # all members, we first have to
1181 # scan the whole archive.
1182 return self.members
1183
1184 def getnames(self):
1185 """Return the members of the archive as a list of their names. It has
1186 the same order as the list returned by getmembers().
1187 """
Martin v. Löwisf3c56112004-09-18 09:08:52 +00001188 return [tarinfo.name for tarinfo in self.getmembers()]
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001189
1190 def gettarinfo(self, name=None, arcname=None, fileobj=None):
1191 """Create a TarInfo object for either the file `name' or the file
1192 object `fileobj' (using os.fstat on its file descriptor). You can
1193 modify some of the TarInfo's attributes before you add it using
1194 addfile(). If given, `arcname' specifies an alternative name for the
1195 file in the archive.
1196 """
1197 self._check("aw")
1198
1199 # When fileobj is given, replace name by
1200 # fileobj's real name.
1201 if fileobj is not None:
1202 name = fileobj.name
1203
1204 # Building the name of the member in the archive.
1205 # Backward slashes are converted to forward slashes,
1206 # Absolute paths are turned to relative paths.
1207 if arcname is None:
1208 arcname = name
1209 arcname = normpath(arcname)
1210 drv, arcname = os.path.splitdrive(arcname)
1211 while arcname[0:1] == "/":
1212 arcname = arcname[1:]
1213
1214 # Now, fill the TarInfo object with
1215 # information specific for the file.
1216 tarinfo = TarInfo()
1217
1218 # Use os.stat or os.lstat, depending on platform
1219 # and if symlinks shall be resolved.
1220 if fileobj is None:
1221 if hasattr(os, "lstat") and not self.dereference:
1222 statres = os.lstat(name)
1223 else:
1224 statres = os.stat(name)
1225 else:
1226 statres = os.fstat(fileobj.fileno())
1227 linkname = ""
1228
1229 stmd = statres.st_mode
1230 if stat.S_ISREG(stmd):
1231 inode = (statres.st_ino, statres.st_dev)
Neal Norwitzb0e32e22005-10-20 04:50:13 +00001232 if not self.dereference and \
1233 statres.st_nlink > 1 and inode in self.inodes:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001234 # Is it a hardlink to an already
1235 # archived file?
1236 type = LNKTYPE
1237 linkname = self.inodes[inode]
1238 else:
1239 # The inode is added only if its valid.
1240 # For win32 it is always 0.
1241 type = REGTYPE
1242 if inode[0]:
1243 self.inodes[inode] = arcname
1244 elif stat.S_ISDIR(stmd):
1245 type = DIRTYPE
1246 if arcname[-1:] != "/":
1247 arcname += "/"
1248 elif stat.S_ISFIFO(stmd):
1249 type = FIFOTYPE
1250 elif stat.S_ISLNK(stmd):
1251 type = SYMTYPE
1252 linkname = os.readlink(name)
1253 elif stat.S_ISCHR(stmd):
1254 type = CHRTYPE
1255 elif stat.S_ISBLK(stmd):
1256 type = BLKTYPE
1257 else:
1258 return None
1259
1260 # Fill the TarInfo object with all
1261 # information we can get.
Martin v. Löwis5dbdc592005-08-27 10:07:56 +00001262 tarinfo.name = arcname
1263 tarinfo.mode = stmd
1264 tarinfo.uid = statres.st_uid
1265 tarinfo.gid = statres.st_gid
1266 if stat.S_ISREG(stmd):
Martin v. Löwis61d77e02004-08-20 06:35:46 +00001267 tarinfo.size = statres.st_size
Martin v. Löwis5dbdc592005-08-27 10:07:56 +00001268 else:
1269 tarinfo.size = 0L
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001270 tarinfo.mtime = statres.st_mtime
Martin v. Löwis5dbdc592005-08-27 10:07:56 +00001271 tarinfo.type = type
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001272 tarinfo.linkname = linkname
1273 if pwd:
1274 try:
1275 tarinfo.uname = pwd.getpwuid(tarinfo.uid)[0]
1276 except KeyError:
1277 pass
1278 if grp:
1279 try:
1280 tarinfo.gname = grp.getgrgid(tarinfo.gid)[0]
1281 except KeyError:
1282 pass
1283
1284 if type in (CHRTYPE, BLKTYPE):
1285 if hasattr(os, "major") and hasattr(os, "minor"):
1286 tarinfo.devmajor = os.major(statres.st_rdev)
1287 tarinfo.devminor = os.minor(statres.st_rdev)
1288 return tarinfo
1289
1290 def list(self, verbose=True):
1291 """Print a table of contents to sys.stdout. If `verbose' is False, only
1292 the names of the members are printed. If it is True, an `ls -l'-like
1293 output is produced.
1294 """
1295 self._check()
1296
1297 for tarinfo in self:
1298 if verbose:
1299 print filemode(tarinfo.mode),
1300 print "%s/%s" % (tarinfo.uname or tarinfo.uid,
1301 tarinfo.gname or tarinfo.gid),
1302 if tarinfo.ischr() or tarinfo.isblk():
1303 print "%10s" % ("%d,%d" \
1304 % (tarinfo.devmajor, tarinfo.devminor)),
1305 else:
1306 print "%10d" % tarinfo.size,
1307 print "%d-%02d-%02d %02d:%02d:%02d" \
1308 % time.localtime(tarinfo.mtime)[:6],
1309
1310 print tarinfo.name,
1311
1312 if verbose:
1313 if tarinfo.issym():
1314 print "->", tarinfo.linkname,
1315 if tarinfo.islnk():
1316 print "link to", tarinfo.linkname,
1317 print
1318
1319 def add(self, name, arcname=None, recursive=True):
1320 """Add the file `name' to the archive. `name' may be any type of file
1321 (directory, fifo, symbolic link, etc.). If given, `arcname'
1322 specifies an alternative name for the file in the archive.
1323 Directories are added recursively by default. This can be avoided by
1324 setting `recursive' to False.
1325 """
1326 self._check("aw")
1327
1328 if arcname is None:
1329 arcname = name
1330
1331 # Skip if somebody tries to archive the archive...
Martin v. Löwisfaffa152005-08-24 06:43:09 +00001332 if self.name is not None \
1333 and os.path.abspath(name) == os.path.abspath(self.name):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001334 self._dbg(2, "tarfile: Skipped %r" % name)
1335 return
1336
1337 # Special case: The user wants to add the current
1338 # working directory.
1339 if name == ".":
1340 if recursive:
1341 if arcname == ".":
1342 arcname = ""
1343 for f in os.listdir("."):
1344 self.add(f, os.path.join(arcname, f))
1345 return
1346
1347 self._dbg(1, name)
1348
1349 # Create a TarInfo object from the file.
1350 tarinfo = self.gettarinfo(name, arcname)
1351
1352 if tarinfo is None:
1353 self._dbg(1, "tarfile: Unsupported type %r" % name)
1354 return
1355
1356 # Append the tar header and data to the archive.
1357 if tarinfo.isreg():
1358 f = file(name, "rb")
1359 self.addfile(tarinfo, f)
1360 f.close()
1361
Martin v. Löwis5dbdc592005-08-27 10:07:56 +00001362 elif tarinfo.isdir():
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001363 self.addfile(tarinfo)
1364 if recursive:
1365 for f in os.listdir(name):
1366 self.add(os.path.join(name, f), os.path.join(arcname, f))
1367
Martin v. Löwis5dbdc592005-08-27 10:07:56 +00001368 else:
1369 self.addfile(tarinfo)
1370
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001371 def addfile(self, tarinfo, fileobj=None):
1372 """Add the TarInfo object `tarinfo' to the archive. If `fileobj' is
1373 given, tarinfo.size bytes are read from it and added to the archive.
1374 You can create TarInfo objects using gettarinfo().
1375 On Windows platforms, `fileobj' should always be opened with mode
1376 'rb' to avoid irritation about the file size.
1377 """
1378 self._check("aw")
1379
1380 tarinfo.name = normpath(tarinfo.name)
1381 if tarinfo.isdir():
1382 # directories should end with '/'
1383 tarinfo.name += "/"
1384
1385 if tarinfo.linkname:
1386 tarinfo.linkname = normpath(tarinfo.linkname)
1387
1388 if tarinfo.size > MAXSIZE_MEMBER:
Neal Norwitzd96d1012004-07-20 22:23:02 +00001389 if self.posix:
Georg Brandle4751e32006-05-18 06:11:19 +00001390 raise ValueError("file is too large (>= 8 GB)")
Neal Norwitzd96d1012004-07-20 22:23:02 +00001391 else:
1392 self._dbg(2, "tarfile: Created GNU tar largefile header")
1393
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001394
1395 if len(tarinfo.linkname) > LENGTH_LINK:
1396 if self.posix:
Georg Brandle4751e32006-05-18 06:11:19 +00001397 raise ValueError("linkname is too long (>%d)" % (LENGTH_LINK))
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001398 else:
1399 self._create_gnulong(tarinfo.linkname, GNUTYPE_LONGLINK)
1400 tarinfo.linkname = tarinfo.linkname[:LENGTH_LINK -1]
1401 self._dbg(2, "tarfile: Created GNU tar extension LONGLINK")
1402
1403 if len(tarinfo.name) > LENGTH_NAME:
1404 if self.posix:
1405 prefix = tarinfo.name[:LENGTH_PREFIX + 1]
1406 while prefix and prefix[-1] != "/":
Tim Peters2c60f7a2003-01-29 03:49:43 +00001407 prefix = prefix[:-1]
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001408
1409 name = tarinfo.name[len(prefix):]
1410 prefix = prefix[:-1]
1411
1412 if not prefix or len(name) > LENGTH_NAME:
Georg Brandle4751e32006-05-18 06:11:19 +00001413 raise ValueError("name is too long (>%d)" % (LENGTH_NAME))
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001414
1415 tarinfo.name = name
1416 tarinfo.prefix = prefix
1417 else:
1418 self._create_gnulong(tarinfo.name, GNUTYPE_LONGNAME)
1419 tarinfo.name = tarinfo.name[:LENGTH_NAME - 1]
1420 self._dbg(2, "tarfile: Created GNU tar extension LONGNAME")
1421
Georg Brandl38c6a222006-05-10 16:26:03 +00001422 self.fileobj.write(tarinfo.tobuf(self.posix))
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001423 self.offset += BLOCKSIZE
1424
1425 # If there's data to follow, append it.
1426 if fileobj is not None:
1427 copyfileobj(fileobj, self.fileobj, tarinfo.size)
1428 blocks, remainder = divmod(tarinfo.size, BLOCKSIZE)
1429 if remainder > 0:
1430 self.fileobj.write(NUL * (BLOCKSIZE - remainder))
1431 blocks += 1
1432 self.offset += blocks * BLOCKSIZE
1433
Martin v. Löwisf3c56112004-09-18 09:08:52 +00001434 self.members.append(tarinfo)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001435
Martin v. Löwis00a73e72005-03-04 19:40:34 +00001436 def extractall(self, path=".", members=None):
1437 """Extract all members from the archive to the current working
1438 directory and set owner, modification time and permissions on
1439 directories afterwards. `path' specifies a different directory
1440 to extract to. `members' is optional and must be a subset of the
1441 list returned by getmembers().
1442 """
1443 directories = []
1444
1445 if members is None:
1446 members = self
1447
1448 for tarinfo in members:
1449 if tarinfo.isdir():
1450 # Extract directory with a safe mode, so that
1451 # all files below can be extracted as well.
1452 try:
1453 os.makedirs(os.path.join(path, tarinfo.name), 0777)
1454 except EnvironmentError:
1455 pass
1456 directories.append(tarinfo)
1457 else:
1458 self.extract(tarinfo, path)
1459
1460 # Reverse sort directories.
1461 directories.sort(lambda a, b: cmp(a.name, b.name))
1462 directories.reverse()
1463
1464 # Set correct owner, mtime and filemode on directories.
1465 for tarinfo in directories:
1466 path = os.path.join(path, tarinfo.name)
1467 try:
1468 self.chown(tarinfo, path)
1469 self.utime(tarinfo, path)
1470 self.chmod(tarinfo, path)
1471 except ExtractError, e:
1472 if self.errorlevel > 1:
1473 raise
1474 else:
1475 self._dbg(1, "tarfile: %s" % e)
1476
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001477 def extract(self, member, path=""):
1478 """Extract a member from the archive to the current working directory,
1479 using its full name. Its file information is extracted as accurately
1480 as possible. `member' may be a filename or a TarInfo object. You can
1481 specify a different directory using `path'.
1482 """
1483 self._check("r")
1484
1485 if isinstance(member, TarInfo):
1486 tarinfo = member
1487 else:
1488 tarinfo = self.getmember(member)
1489
Neal Norwitza4f651a2004-07-20 22:07:44 +00001490 # Prepare the link target for makelink().
1491 if tarinfo.islnk():
1492 tarinfo._link_target = os.path.join(path, tarinfo.linkname)
1493
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001494 try:
1495 self._extract_member(tarinfo, os.path.join(path, tarinfo.name))
1496 except EnvironmentError, e:
1497 if self.errorlevel > 0:
1498 raise
1499 else:
1500 if e.filename is None:
1501 self._dbg(1, "tarfile: %s" % e.strerror)
1502 else:
1503 self._dbg(1, "tarfile: %s %r" % (e.strerror, e.filename))
1504 except ExtractError, e:
1505 if self.errorlevel > 1:
1506 raise
1507 else:
1508 self._dbg(1, "tarfile: %s" % e)
1509
1510 def extractfile(self, member):
1511 """Extract a member from the archive as a file object. `member' may be
1512 a filename or a TarInfo object. If `member' is a regular file, a
1513 file-like object is returned. If `member' is a link, a file-like
1514 object is constructed from the link's target. If `member' is none of
1515 the above, None is returned.
1516 The file-like object is read-only and provides the following
1517 methods: read(), readline(), readlines(), seek() and tell()
1518 """
1519 self._check("r")
1520
1521 if isinstance(member, TarInfo):
1522 tarinfo = member
1523 else:
1524 tarinfo = self.getmember(member)
1525
1526 if tarinfo.isreg():
1527 return self.fileobject(self, tarinfo)
1528
1529 elif tarinfo.type not in SUPPORTED_TYPES:
1530 # If a member's type is unknown, it is treated as a
1531 # regular file.
1532 return self.fileobject(self, tarinfo)
1533
1534 elif tarinfo.islnk() or tarinfo.issym():
1535 if isinstance(self.fileobj, _Stream):
1536 # A small but ugly workaround for the case that someone tries
1537 # to extract a (sym)link as a file-object from a non-seekable
1538 # stream of tar blocks.
Georg Brandle4751e32006-05-18 06:11:19 +00001539 raise StreamError("cannot extract (sym)link as file object")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001540 else:
Georg Brandl7eb4b7d2005-07-22 21:49:32 +00001541 # A (sym)link's file object is its target's file object.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001542 return self.extractfile(self._getmember(tarinfo.linkname,
1543 tarinfo))
1544 else:
1545 # If there's no data associated with the member (directory, chrdev,
1546 # blkdev, etc.), return None instead of a file object.
1547 return None
1548
1549 def _extract_member(self, tarinfo, targetpath):
1550 """Extract the TarInfo object tarinfo to a physical
1551 file called targetpath.
1552 """
1553 # Fetch the TarInfo object for the given name
1554 # and build the destination pathname, replacing
1555 # forward slashes to platform specific separators.
1556 if targetpath[-1:] == "/":
1557 targetpath = targetpath[:-1]
1558 targetpath = os.path.normpath(targetpath)
1559
1560 # Create all upper directories.
1561 upperdirs = os.path.dirname(targetpath)
1562 if upperdirs and not os.path.exists(upperdirs):
1563 ti = TarInfo()
1564 ti.name = upperdirs
1565 ti.type = DIRTYPE
1566 ti.mode = 0777
1567 ti.mtime = tarinfo.mtime
1568 ti.uid = tarinfo.uid
1569 ti.gid = tarinfo.gid
1570 ti.uname = tarinfo.uname
1571 ti.gname = tarinfo.gname
1572 try:
1573 self._extract_member(ti, ti.name)
1574 except:
1575 pass
1576
1577 if tarinfo.islnk() or tarinfo.issym():
1578 self._dbg(1, "%s -> %s" % (tarinfo.name, tarinfo.linkname))
1579 else:
1580 self._dbg(1, tarinfo.name)
1581
1582 if tarinfo.isreg():
1583 self.makefile(tarinfo, targetpath)
1584 elif tarinfo.isdir():
1585 self.makedir(tarinfo, targetpath)
1586 elif tarinfo.isfifo():
1587 self.makefifo(tarinfo, targetpath)
1588 elif tarinfo.ischr() or tarinfo.isblk():
1589 self.makedev(tarinfo, targetpath)
1590 elif tarinfo.islnk() or tarinfo.issym():
1591 self.makelink(tarinfo, targetpath)
1592 elif tarinfo.type not in SUPPORTED_TYPES:
1593 self.makeunknown(tarinfo, targetpath)
1594 else:
1595 self.makefile(tarinfo, targetpath)
1596
1597 self.chown(tarinfo, targetpath)
1598 if not tarinfo.issym():
1599 self.chmod(tarinfo, targetpath)
1600 self.utime(tarinfo, targetpath)
1601
1602 #--------------------------------------------------------------------------
1603 # Below are the different file methods. They are called via
1604 # _extract_member() when extract() is called. They can be replaced in a
1605 # subclass to implement other functionality.
1606
1607 def makedir(self, tarinfo, targetpath):
1608 """Make a directory called targetpath.
1609 """
1610 try:
1611 os.mkdir(targetpath)
1612 except EnvironmentError, e:
1613 if e.errno != errno.EEXIST:
1614 raise
1615
1616 def makefile(self, tarinfo, targetpath):
1617 """Make a file called targetpath.
1618 """
1619 source = self.extractfile(tarinfo)
1620 target = file(targetpath, "wb")
1621 copyfileobj(source, target)
1622 source.close()
1623 target.close()
1624
1625 def makeunknown(self, tarinfo, targetpath):
1626 """Make a file from a TarInfo object with an unknown type
1627 at targetpath.
1628 """
1629 self.makefile(tarinfo, targetpath)
1630 self._dbg(1, "tarfile: Unknown file type %r, " \
1631 "extracted as regular file." % tarinfo.type)
1632
1633 def makefifo(self, tarinfo, targetpath):
1634 """Make a fifo called targetpath.
1635 """
1636 if hasattr(os, "mkfifo"):
1637 os.mkfifo(targetpath)
1638 else:
Georg Brandle4751e32006-05-18 06:11:19 +00001639 raise ExtractError("fifo not supported by system")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001640
1641 def makedev(self, tarinfo, targetpath):
1642 """Make a character or block device called targetpath.
1643 """
1644 if not hasattr(os, "mknod") or not hasattr(os, "makedev"):
Georg Brandle4751e32006-05-18 06:11:19 +00001645 raise ExtractError("special devices not supported by system")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001646
1647 mode = tarinfo.mode
1648 if tarinfo.isblk():
1649 mode |= stat.S_IFBLK
1650 else:
1651 mode |= stat.S_IFCHR
1652
1653 os.mknod(targetpath, mode,
1654 os.makedev(tarinfo.devmajor, tarinfo.devminor))
1655
1656 def makelink(self, tarinfo, targetpath):
1657 """Make a (symbolic) link called targetpath. If it cannot be created
1658 (platform limitation), we try to make a copy of the referenced file
1659 instead of a link.
1660 """
1661 linkpath = tarinfo.linkname
1662 try:
1663 if tarinfo.issym():
1664 os.symlink(linkpath, targetpath)
1665 else:
Neal Norwitza4f651a2004-07-20 22:07:44 +00001666 # See extract().
1667 os.link(tarinfo._link_target, targetpath)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001668 except AttributeError:
1669 if tarinfo.issym():
1670 linkpath = os.path.join(os.path.dirname(tarinfo.name),
1671 linkpath)
1672 linkpath = normpath(linkpath)
1673
1674 try:
1675 self._extract_member(self.getmember(linkpath), targetpath)
1676 except (EnvironmentError, KeyError), e:
1677 linkpath = os.path.normpath(linkpath)
1678 try:
1679 shutil.copy2(linkpath, targetpath)
1680 except EnvironmentError, e:
Georg Brandle4751e32006-05-18 06:11:19 +00001681 raise IOError("link could not be created")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001682
1683 def chown(self, tarinfo, targetpath):
1684 """Set owner of targetpath according to tarinfo.
1685 """
1686 if pwd and hasattr(os, "geteuid") and os.geteuid() == 0:
1687 # We have to be root to do so.
1688 try:
1689 g = grp.getgrnam(tarinfo.gname)[2]
1690 except KeyError:
1691 try:
1692 g = grp.getgrgid(tarinfo.gid)[2]
1693 except KeyError:
1694 g = os.getgid()
1695 try:
1696 u = pwd.getpwnam(tarinfo.uname)[2]
1697 except KeyError:
1698 try:
1699 u = pwd.getpwuid(tarinfo.uid)[2]
1700 except KeyError:
1701 u = os.getuid()
1702 try:
1703 if tarinfo.issym() and hasattr(os, "lchown"):
1704 os.lchown(targetpath, u, g)
1705 else:
Andrew MacIntyre7970d202003-02-19 12:51:34 +00001706 if sys.platform != "os2emx":
1707 os.chown(targetpath, u, g)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001708 except EnvironmentError, e:
Georg Brandle4751e32006-05-18 06:11:19 +00001709 raise ExtractError("could not change owner")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001710
1711 def chmod(self, tarinfo, targetpath):
1712 """Set file permissions of targetpath according to tarinfo.
1713 """
Jack Jansen834eff62003-03-07 12:47:06 +00001714 if hasattr(os, 'chmod'):
1715 try:
1716 os.chmod(targetpath, tarinfo.mode)
1717 except EnvironmentError, e:
Georg Brandle4751e32006-05-18 06:11:19 +00001718 raise ExtractError("could not change mode")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001719
1720 def utime(self, tarinfo, targetpath):
1721 """Set modification time of targetpath according to tarinfo.
1722 """
Jack Jansen834eff62003-03-07 12:47:06 +00001723 if not hasattr(os, 'utime'):
Tim Petersf9347782003-03-07 15:36:41 +00001724 return
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001725 if sys.platform == "win32" and tarinfo.isdir():
1726 # According to msdn.microsoft.com, it is an error (EACCES)
1727 # to use utime() on directories.
1728 return
1729 try:
1730 os.utime(targetpath, (tarinfo.mtime, tarinfo.mtime))
1731 except EnvironmentError, e:
Georg Brandle4751e32006-05-18 06:11:19 +00001732 raise ExtractError("could not change modification time")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001733
1734 #--------------------------------------------------------------------------
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001735 def next(self):
1736 """Return the next member of the archive as a TarInfo object, when
1737 TarFile is opened for reading. Return None if there is no more
1738 available.
1739 """
1740 self._check("ra")
1741 if self.firstmember is not None:
1742 m = self.firstmember
1743 self.firstmember = None
1744 return m
1745
1746 # Read the next block.
Andrew M. Kuchling864bba12004-07-10 22:02:11 +00001747 self.fileobj.seek(self.offset)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001748 while True:
1749 buf = self.fileobj.read(BLOCKSIZE)
1750 if not buf:
1751 return None
Georg Brandl38c6a222006-05-10 16:26:03 +00001752
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001753 try:
1754 tarinfo = TarInfo.frombuf(buf)
Tim Peters8a299d22006-05-19 19:16:34 +00001755
Georg Brandl38c6a222006-05-10 16:26:03 +00001756 # Set the TarInfo object's offset to the current position of the
1757 # TarFile and set self.offset to the position where the data blocks
1758 # should begin.
1759 tarinfo.offset = self.offset
1760 self.offset += BLOCKSIZE
1761
1762 tarinfo = self.proc_member(tarinfo)
1763
1764 except ValueError, e:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001765 if self.ignore_zeros:
Georg Brandle4751e32006-05-18 06:11:19 +00001766 self._dbg(2, "0x%X: empty or invalid block: %s" %
1767 (self.offset, e))
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001768 self.offset += BLOCKSIZE
1769 continue
1770 else:
Andrew M. Kuchling864bba12004-07-10 22:02:11 +00001771 if self.offset == 0:
Georg Brandle4751e32006-05-18 06:11:19 +00001772 raise ReadError("empty, unreadable or compressed "
1773 "file: %s" % e)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001774 return None
1775 break
1776
Georg Brandl38c6a222006-05-10 16:26:03 +00001777 # Some old tar programs represent a directory as a regular
1778 # file with a trailing slash.
1779 if tarinfo.isreg() and tarinfo.name.endswith("/"):
1780 tarinfo.type = DIRTYPE
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001781
Georg Brandl38c6a222006-05-10 16:26:03 +00001782 # The prefix field is used for filenames > 100 in
1783 # the POSIX standard.
1784 # name = prefix + '/' + name
Georg Brandle8953182006-05-27 14:02:03 +00001785 tarinfo.name = normpath(os.path.join(tarinfo.prefix.rstrip(NUL),
1786 tarinfo.name))
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001787
Georg Brandl38c6a222006-05-10 16:26:03 +00001788 # Directory names should have a '/' at the end.
1789 if tarinfo.isdir():
1790 tarinfo.name += "/"
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001791
Martin v. Löwisf3c56112004-09-18 09:08:52 +00001792 self.members.append(tarinfo)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001793 return tarinfo
1794
1795 #--------------------------------------------------------------------------
Georg Brandl38c6a222006-05-10 16:26:03 +00001796 # The following are methods that are called depending on the type of a
1797 # member. The entry point is proc_member() which is called with a TarInfo
1798 # object created from the header block from the current offset. The
1799 # proc_member() method can be overridden in a subclass to add custom
1800 # proc_*() methods. A proc_*() method MUST implement the following
1801 # operations:
1802 # 1. Set tarinfo.offset_data to the position where the data blocks begin,
1803 # if there is data that follows.
1804 # 2. Set self.offset to the position where the next member's header will
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001805 # begin.
Georg Brandl38c6a222006-05-10 16:26:03 +00001806 # 3. Return tarinfo or another valid TarInfo object.
1807 def proc_member(self, tarinfo):
1808 """Choose the right processing method for tarinfo depending
1809 on its type and call it.
1810 """
1811 if tarinfo.type in (GNUTYPE_LONGNAME, GNUTYPE_LONGLINK):
1812 return self.proc_gnulong(tarinfo)
1813 elif tarinfo.type == GNUTYPE_SPARSE:
1814 return self.proc_sparse(tarinfo)
1815 else:
1816 return self.proc_builtin(tarinfo)
1817
1818 def proc_builtin(self, tarinfo):
1819 """Process a builtin type member or an unknown member
1820 which will be treated as a regular file.
1821 """
1822 tarinfo.offset_data = self.offset
1823 if tarinfo.isreg() or tarinfo.type not in SUPPORTED_TYPES:
1824 # Skip the following data blocks.
1825 self.offset += self._block(tarinfo.size)
1826 return tarinfo
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001827
1828 def proc_gnulong(self, tarinfo):
Georg Brandl38c6a222006-05-10 16:26:03 +00001829 """Process the blocks that hold a GNU longname
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001830 or longlink member.
1831 """
1832 buf = ""
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001833 count = tarinfo.size
1834 while count > 0:
1835 block = self.fileobj.read(BLOCKSIZE)
1836 buf += block
1837 self.offset += BLOCKSIZE
1838 count -= BLOCKSIZE
1839
Georg Brandl38c6a222006-05-10 16:26:03 +00001840 # Fetch the next header and process it.
1841 b = self.fileobj.read(BLOCKSIZE)
1842 t = TarInfo.frombuf(b)
1843 t.offset = self.offset
1844 self.offset += BLOCKSIZE
1845 next = self.proc_member(t)
Andrew M. Kuchling864bba12004-07-10 22:02:11 +00001846
Georg Brandl38c6a222006-05-10 16:26:03 +00001847 # Patch the TarInfo object from the next header with
1848 # the longname information.
Andrew M. Kuchling864bba12004-07-10 22:02:11 +00001849 next.offset = tarinfo.offset
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001850 if tarinfo.type == GNUTYPE_LONGNAME:
Georg Brandle8953182006-05-27 14:02:03 +00001851 next.name = buf.rstrip(NUL)
Andrew M. Kuchling864bba12004-07-10 22:02:11 +00001852 elif tarinfo.type == GNUTYPE_LONGLINK:
Georg Brandle8953182006-05-27 14:02:03 +00001853 next.linkname = buf.rstrip(NUL)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001854
Andrew M. Kuchling864bba12004-07-10 22:02:11 +00001855 return next
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001856
1857 def proc_sparse(self, tarinfo):
Georg Brandl38c6a222006-05-10 16:26:03 +00001858 """Process a GNU sparse header plus extra headers.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001859 """
Georg Brandl38c6a222006-05-10 16:26:03 +00001860 buf = tarinfo.buf
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001861 sp = _ringbuffer()
1862 pos = 386
1863 lastpos = 0L
1864 realpos = 0L
1865 # There are 4 possible sparse structs in the
1866 # first header.
1867 for i in xrange(4):
1868 try:
Georg Brandl38c6a222006-05-10 16:26:03 +00001869 offset = nti(buf[pos:pos + 12])
1870 numbytes = nti(buf[pos + 12:pos + 24])
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001871 except ValueError:
1872 break
1873 if offset > lastpos:
1874 sp.append(_hole(lastpos, offset - lastpos))
1875 sp.append(_data(offset, numbytes, realpos))
1876 realpos += numbytes
1877 lastpos = offset + numbytes
1878 pos += 24
1879
1880 isextended = ord(buf[482])
Georg Brandl38c6a222006-05-10 16:26:03 +00001881 origsize = nti(buf[483:495])
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001882
1883 # If the isextended flag is given,
1884 # there are extra headers to process.
1885 while isextended == 1:
1886 buf = self.fileobj.read(BLOCKSIZE)
1887 self.offset += BLOCKSIZE
1888 pos = 0
1889 for i in xrange(21):
1890 try:
Georg Brandl38c6a222006-05-10 16:26:03 +00001891 offset = nti(buf[pos:pos + 12])
1892 numbytes = nti(buf[pos + 12:pos + 24])
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001893 except ValueError:
1894 break
1895 if offset > lastpos:
1896 sp.append(_hole(lastpos, offset - lastpos))
1897 sp.append(_data(offset, numbytes, realpos))
1898 realpos += numbytes
1899 lastpos = offset + numbytes
1900 pos += 24
1901 isextended = ord(buf[504])
1902
1903 if lastpos < origsize:
1904 sp.append(_hole(lastpos, origsize - lastpos))
1905
1906 tarinfo.sparse = sp
1907
1908 tarinfo.offset_data = self.offset
1909 self.offset += self._block(tarinfo.size)
1910 tarinfo.size = origsize
Andrew M. Kuchling864bba12004-07-10 22:02:11 +00001911
Georg Brandl38c6a222006-05-10 16:26:03 +00001912 # Clear the prefix field so that it is not used
1913 # as a pathname in next().
1914 tarinfo.prefix = ""
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001915
Georg Brandl38c6a222006-05-10 16:26:03 +00001916 return tarinfo
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001917
1918 #--------------------------------------------------------------------------
1919 # Little helper methods:
1920
1921 def _block(self, count):
1922 """Round up a byte count by BLOCKSIZE and return it,
1923 e.g. _block(834) => 1024.
1924 """
1925 blocks, remainder = divmod(count, BLOCKSIZE)
1926 if remainder:
1927 blocks += 1
1928 return blocks * BLOCKSIZE
1929
1930 def _getmember(self, name, tarinfo=None):
1931 """Find an archive member by name from bottom to top.
1932 If tarinfo is given, it is used as the starting point.
1933 """
Martin v. Löwisf3c56112004-09-18 09:08:52 +00001934 # Ensure that all members have been loaded.
1935 members = self.getmembers()
1936
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001937 if tarinfo is None:
Martin v. Löwisf3c56112004-09-18 09:08:52 +00001938 end = len(members)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001939 else:
Martin v. Löwisf3c56112004-09-18 09:08:52 +00001940 end = members.index(tarinfo)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001941
1942 for i in xrange(end - 1, -1, -1):
Martin v. Löwisf3c56112004-09-18 09:08:52 +00001943 if name == members[i].name:
1944 return members[i]
Andrew M. Kuchling864bba12004-07-10 22:02:11 +00001945
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001946 def _load(self):
1947 """Read through the entire archive file and look for readable
1948 members.
1949 """
1950 while True:
1951 tarinfo = self.next()
1952 if tarinfo is None:
1953 break
1954 self._loaded = True
1955
1956 def _check(self, mode=None):
1957 """Check if TarFile is still open, and if the operation's mode
1958 corresponds to TarFile's mode.
1959 """
1960 if self.closed:
Georg Brandle4751e32006-05-18 06:11:19 +00001961 raise IOError("%s is closed" % self.__class__.__name__)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001962 if mode is not None and self._mode not in mode:
Georg Brandle4751e32006-05-18 06:11:19 +00001963 raise IOError("bad operation for mode %r" % self._mode)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001964
1965 def __iter__(self):
1966 """Provide an iterator object.
1967 """
1968 if self._loaded:
1969 return iter(self.members)
1970 else:
1971 return TarIter(self)
1972
1973 def _create_gnulong(self, name, type):
1974 """Write a GNU longname/longlink member to the TarFile.
1975 It consists of an extended tar header, with the length
1976 of the longname as size, followed by data blocks,
1977 which contain the longname as a null terminated string.
1978 """
Neal Norwitz0662f8a2004-07-20 21:54:18 +00001979 name += NUL
1980
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001981 tarinfo = TarInfo()
1982 tarinfo.name = "././@LongLink"
1983 tarinfo.type = type
1984 tarinfo.mode = 0
1985 tarinfo.size = len(name)
1986
1987 # write extended header
1988 self.fileobj.write(tarinfo.tobuf())
Neal Norwitz0662f8a2004-07-20 21:54:18 +00001989 self.offset += BLOCKSIZE
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001990 # write name blocks
1991 self.fileobj.write(name)
1992 blocks, remainder = divmod(tarinfo.size, BLOCKSIZE)
1993 if remainder > 0:
1994 self.fileobj.write(NUL * (BLOCKSIZE - remainder))
1995 blocks += 1
1996 self.offset += blocks * BLOCKSIZE
1997
1998 def _dbg(self, level, msg):
1999 """Write debugging output to sys.stderr.
2000 """
2001 if level <= self.debug:
2002 print >> sys.stderr, msg
2003# class TarFile
2004
2005class TarIter:
2006 """Iterator Class.
2007
2008 for tarinfo in TarFile(...):
2009 suite...
2010 """
2011
2012 def __init__(self, tarfile):
2013 """Construct a TarIter object.
2014 """
2015 self.tarfile = tarfile
Martin v. Löwis637431b2005-03-03 23:12:42 +00002016 self.index = 0
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002017 def __iter__(self):
2018 """Return iterator object.
2019 """
2020 return self
2021 def next(self):
2022 """Return the next item using TarFile's next() method.
2023 When all members have been read, set TarFile as _loaded.
2024 """
Martin v. Löwis637431b2005-03-03 23:12:42 +00002025 # Fix for SF #1100429: Under rare circumstances it can
2026 # happen that getmembers() is called during iteration,
2027 # which will cause TarIter to stop prematurely.
2028 if not self.tarfile._loaded:
2029 tarinfo = self.tarfile.next()
2030 if not tarinfo:
2031 self.tarfile._loaded = True
2032 raise StopIteration
2033 else:
2034 try:
2035 tarinfo = self.tarfile.members[self.index]
2036 except IndexError:
2037 raise StopIteration
2038 self.index += 1
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002039 return tarinfo
2040
2041# Helper classes for sparse file support
2042class _section:
2043 """Base class for _data and _hole.
2044 """
2045 def __init__(self, offset, size):
2046 self.offset = offset
2047 self.size = size
2048 def __contains__(self, offset):
2049 return self.offset <= offset < self.offset + self.size
2050
2051class _data(_section):
2052 """Represent a data section in a sparse file.
2053 """
2054 def __init__(self, offset, size, realpos):
2055 _section.__init__(self, offset, size)
2056 self.realpos = realpos
2057
2058class _hole(_section):
2059 """Represent a hole section in a sparse file.
2060 """
2061 pass
2062
2063class _ringbuffer(list):
2064 """Ringbuffer class which increases performance
2065 over a regular list.
2066 """
2067 def __init__(self):
2068 self.idx = 0
2069 def find(self, offset):
2070 idx = self.idx
2071 while True:
2072 item = self[idx]
2073 if offset in item:
2074 break
2075 idx += 1
2076 if idx == len(self):
2077 idx = 0
2078 if idx == self.idx:
2079 # End of File
2080 return None
2081 self.idx = idx
2082 return item
2083
2084#---------------------------------------------
2085# zipfile compatible TarFile class
2086#---------------------------------------------
2087TAR_PLAIN = 0 # zipfile.ZIP_STORED
2088TAR_GZIPPED = 8 # zipfile.ZIP_DEFLATED
2089class TarFileCompat:
2090 """TarFile class compatible with standard module zipfile's
2091 ZipFile class.
2092 """
2093 def __init__(self, file, mode="r", compression=TAR_PLAIN):
2094 if compression == TAR_PLAIN:
2095 self.tarfile = TarFile.taropen(file, mode)
2096 elif compression == TAR_GZIPPED:
2097 self.tarfile = TarFile.gzopen(file, mode)
2098 else:
Georg Brandle4751e32006-05-18 06:11:19 +00002099 raise ValueError("unknown compression constant")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002100 if mode[0:1] == "r":
2101 members = self.tarfile.getmembers()
Raymond Hettingera1d09e22005-09-11 16:34:05 +00002102 for m in members:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002103 m.filename = m.name
2104 m.file_size = m.size
2105 m.date_time = time.gmtime(m.mtime)[:6]
2106 def namelist(self):
2107 return map(lambda m: m.name, self.infolist())
2108 def infolist(self):
2109 return filter(lambda m: m.type in REGULAR_TYPES,
2110 self.tarfile.getmembers())
2111 def printdir(self):
2112 self.tarfile.list()
2113 def testzip(self):
2114 return
2115 def getinfo(self, name):
2116 return self.tarfile.getmember(name)
2117 def read(self, name):
2118 return self.tarfile.extractfile(self.tarfile.getmember(name)).read()
2119 def write(self, filename, arcname=None, compress_type=None):
2120 self.tarfile.add(filename, arcname)
2121 def writestr(self, zinfo, bytes):
Raymond Hettingera6172712004-12-31 19:15:26 +00002122 try:
2123 from cStringIO import StringIO
2124 except ImportError:
2125 from StringIO import StringIO
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002126 import calendar
2127 zinfo.name = zinfo.filename
2128 zinfo.size = zinfo.file_size
2129 zinfo.mtime = calendar.timegm(zinfo.date_time)
Raymond Hettingera6172712004-12-31 19:15:26 +00002130 self.tarfile.addfile(zinfo, StringIO(bytes))
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002131 def close(self):
2132 self.tarfile.close()
2133#class TarFileCompat
2134
2135#--------------------
2136# exported functions
2137#--------------------
2138def is_tarfile(name):
2139 """Return True if name points to a tar archive that we
2140 are able to handle, else return False.
2141 """
2142 try:
2143 t = open(name)
2144 t.close()
2145 return True
2146 except TarError:
2147 return False
2148
2149open = TarFile.open