blob: 14553a776d3e441aaa18c7603fe3763a98b0004c [file] [log] [blame]
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001#!/usr/bin/env python
2# -*- coding: iso-8859-1 -*-
3#-------------------------------------------------------------------
4# tarfile.py
5#-------------------------------------------------------------------
6# Copyright (C) 2002 Lars Gustäbel <lars@gustaebel.de>
7# All rights reserved.
8#
9# Permission is hereby granted, free of charge, to any person
10# obtaining a copy of this software and associated documentation
11# files (the "Software"), to deal in the Software without
12# restriction, including without limitation the rights to use,
13# copy, modify, merge, publish, distribute, sublicense, and/or sell
14# copies of the Software, and to permit persons to whom the
15# Software is furnished to do so, subject to the following
16# conditions:
17#
18# The above copyright notice and this permission notice shall be
19# included in all copies or substantial portions of the Software.
20#
21# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
22# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
23# OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
24# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
25# HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
26# WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
27# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
28# OTHER DEALINGS IN THE SOFTWARE.
29#
30"""Read from and write to tar format archives.
31"""
32
33__version__ = "$Revision$"
34# $Source$
35
Thomas Wouters477c8d52006-05-27 19:21:47 +000036version = "0.8.0"
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +000037__author__ = "Lars Gustäbel (lars@gustaebel.de)"
38__date__ = "$Date$"
39__cvsid__ = "$Id$"
40__credits__ = "Gustavo Niemeyer, Niels Gustäbel, Richard Townsend."
41
42#---------
43# Imports
44#---------
45import sys
46import os
47import shutil
48import stat
49import errno
50import time
51import struct
Thomas Wouters89f507f2006-12-13 04:49:30 +000052import copy
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +000053
Jack Jansencfc49022003-03-07 13:37:32 +000054if sys.platform == 'mac':
55 # This module needs work for MacOS9, especially in the area of pathname
56 # handling. In many places it is assumed a simple substitution of / by the
57 # local os.path.sep is good enough to convert pathnames, but this does not
58 # work with the mac rooted:path:name versus :nonrooted:path:name syntax
59 raise ImportError, "tarfile does not work for platform==mac"
60
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +000061try:
62 import grp, pwd
63except ImportError:
64 grp = pwd = None
65
66# from tarfile import *
67__all__ = ["TarFile", "TarInfo", "is_tarfile", "TarError"]
68
Guido van Rossum8f78fe92006-08-24 04:03:53 +000069from __builtin__ import open as _open # Since 'open' is TarFile.open
70
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +000071#---------------------------------------------------------
72# tar constants
73#---------------------------------------------------------
74NUL = "\0" # the null character
75BLOCKSIZE = 512 # length of processing blocks
76RECORDSIZE = BLOCKSIZE * 20 # length of records
77MAGIC = "ustar" # magic tar string
78VERSION = "00" # version number
79
80LENGTH_NAME = 100 # maximum length of a filename
81LENGTH_LINK = 100 # maximum length of a linkname
82LENGTH_PREFIX = 155 # maximum length of the prefix field
83MAXSIZE_MEMBER = 077777777777L # maximum size of a file (11 octal digits)
84
85REGTYPE = "0" # regular file
86AREGTYPE = "\0" # regular file
87LNKTYPE = "1" # link (inside tarfile)
88SYMTYPE = "2" # symbolic link
89CHRTYPE = "3" # character special device
90BLKTYPE = "4" # block special device
91DIRTYPE = "5" # directory
92FIFOTYPE = "6" # fifo special device
93CONTTYPE = "7" # contiguous file
94
95GNUTYPE_LONGNAME = "L" # GNU tar extension for longnames
96GNUTYPE_LONGLINK = "K" # GNU tar extension for longlink
97GNUTYPE_SPARSE = "S" # GNU tar extension for sparse file
98
99#---------------------------------------------------------
100# tarfile constants
101#---------------------------------------------------------
102SUPPORTED_TYPES = (REGTYPE, AREGTYPE, LNKTYPE, # file types that tarfile
103 SYMTYPE, DIRTYPE, FIFOTYPE, # can cope with.
104 CONTTYPE, CHRTYPE, BLKTYPE,
105 GNUTYPE_LONGNAME, GNUTYPE_LONGLINK,
106 GNUTYPE_SPARSE)
107
108REGULAR_TYPES = (REGTYPE, AREGTYPE, # file types that somehow
109 CONTTYPE, GNUTYPE_SPARSE) # represent regular files
110
111#---------------------------------------------------------
112# Bits used in the mode field, values in octal.
113#---------------------------------------------------------
114S_IFLNK = 0120000 # symbolic link
115S_IFREG = 0100000 # regular file
116S_IFBLK = 0060000 # block device
117S_IFDIR = 0040000 # directory
118S_IFCHR = 0020000 # character device
119S_IFIFO = 0010000 # fifo
120
121TSUID = 04000 # set UID on execution
122TSGID = 02000 # set GID on execution
123TSVTX = 01000 # reserved
124
125TUREAD = 0400 # read by owner
126TUWRITE = 0200 # write by owner
127TUEXEC = 0100 # execute/search by owner
128TGREAD = 0040 # read by group
129TGWRITE = 0020 # write by group
130TGEXEC = 0010 # execute/search by group
131TOREAD = 0004 # read by other
132TOWRITE = 0002 # write by other
133TOEXEC = 0001 # execute/search by other
134
135#---------------------------------------------------------
136# Some useful functions
137#---------------------------------------------------------
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000138
Thomas Wouters477c8d52006-05-27 19:21:47 +0000139def stn(s, length):
140 """Convert a python string to a null-terminated string buffer.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000141 """
Thomas Wouters89f507f2006-12-13 04:49:30 +0000142 return s[:length] + (length - len(s)) * NUL
Thomas Wouters477c8d52006-05-27 19:21:47 +0000143
144def nti(s):
145 """Convert a number field to a python number.
146 """
147 # There are two possible encodings for a number field, see
148 # itn() below.
149 if s[0] != chr(0200):
Thomas Wouters89f507f2006-12-13 04:49:30 +0000150 n = int(s.rstrip(NUL + " ") or "0", 8)
Thomas Wouters477c8d52006-05-27 19:21:47 +0000151 else:
152 n = 0L
153 for i in xrange(len(s) - 1):
154 n <<= 8
155 n += ord(s[i + 1])
156 return n
157
158def itn(n, digits=8, posix=False):
159 """Convert a python number to a number field.
160 """
161 # POSIX 1003.1-1988 requires numbers to be encoded as a string of
162 # octal digits followed by a null-byte, this allows values up to
163 # (8**(digits-1))-1. GNU tar allows storing numbers greater than
164 # that if necessary. A leading 0200 byte indicates this particular
165 # encoding, the following digits-1 bytes are a big-endian
166 # representation. This allows values up to (256**(digits-1))-1.
167 if 0 <= n < 8 ** (digits - 1):
168 s = "%0*o" % (digits - 1, n) + NUL
169 else:
170 if posix:
171 raise ValueError("overflow in number field")
172
173 if n < 0:
174 # XXX We mimic GNU tar's behaviour with negative numbers,
175 # this could raise OverflowError.
176 n = struct.unpack("L", struct.pack("l", n))[0]
177
178 s = ""
179 for i in xrange(digits - 1):
180 s = chr(n & 0377) + s
181 n >>= 8
182 s = chr(0200) + s
183 return s
184
185def calc_chksums(buf):
186 """Calculate the checksum for a member's header by summing up all
187 characters except for the chksum field which is treated as if
188 it was filled with spaces. According to the GNU tar sources,
189 some tars (Sun and NeXT) calculate chksum with signed char,
190 which will be different if there are chars in the buffer with
191 the high bit set. So we calculate two checksums, unsigned and
192 signed.
193 """
194 unsigned_chksum = 256 + sum(struct.unpack("148B", buf[:148]) + struct.unpack("356B", buf[156:512]))
195 signed_chksum = 256 + sum(struct.unpack("148b", buf[:148]) + struct.unpack("356b", buf[156:512]))
196 return unsigned_chksum, signed_chksum
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000197
198def copyfileobj(src, dst, length=None):
199 """Copy length bytes from fileobj src to fileobj dst.
200 If length is None, copy the entire content.
201 """
202 if length == 0:
203 return
204 if length is None:
205 shutil.copyfileobj(src, dst)
206 return
207
208 BUFSIZE = 16 * 1024
209 blocks, remainder = divmod(length, BUFSIZE)
210 for b in xrange(blocks):
211 buf = src.read(BUFSIZE)
212 if len(buf) < BUFSIZE:
Thomas Wouters477c8d52006-05-27 19:21:47 +0000213 raise IOError("end of file reached")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000214 dst.write(buf)
215
216 if remainder != 0:
217 buf = src.read(remainder)
218 if len(buf) < remainder:
Thomas Wouters477c8d52006-05-27 19:21:47 +0000219 raise IOError("end of file reached")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000220 dst.write(buf)
221 return
222
223filemode_table = (
Andrew M. Kuchling8bc462f2004-10-20 11:48:42 +0000224 ((S_IFLNK, "l"),
225 (S_IFREG, "-"),
226 (S_IFBLK, "b"),
227 (S_IFDIR, "d"),
228 (S_IFCHR, "c"),
229 (S_IFIFO, "p")),
230
231 ((TUREAD, "r"),),
232 ((TUWRITE, "w"),),
233 ((TUEXEC|TSUID, "s"),
234 (TSUID, "S"),
235 (TUEXEC, "x")),
236
237 ((TGREAD, "r"),),
238 ((TGWRITE, "w"),),
239 ((TGEXEC|TSGID, "s"),
240 (TSGID, "S"),
241 (TGEXEC, "x")),
242
243 ((TOREAD, "r"),),
244 ((TOWRITE, "w"),),
245 ((TOEXEC|TSVTX, "t"),
246 (TSVTX, "T"),
247 (TOEXEC, "x"))
248)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000249
250def filemode(mode):
251 """Convert a file's mode to a string of the form
252 -rwxrwxrwx.
253 Used by TarFile.list()
254 """
Andrew M. Kuchling8bc462f2004-10-20 11:48:42 +0000255 perm = []
256 for table in filemode_table:
257 for bit, char in table:
258 if mode & bit == bit:
259 perm.append(char)
260 break
261 else:
262 perm.append("-")
263 return "".join(perm)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000264
265if os.sep != "/":
266 normpath = lambda path: os.path.normpath(path).replace(os.sep, "/")
267else:
268 normpath = os.path.normpath
269
270class TarError(Exception):
271 """Base exception."""
272 pass
273class ExtractError(TarError):
274 """General exception for extract errors."""
275 pass
276class ReadError(TarError):
277 """Exception for unreadble tar archives."""
278 pass
279class CompressionError(TarError):
280 """Exception for unavailable compression methods."""
281 pass
282class StreamError(TarError):
283 """Exception for unsupported operations on stream-like TarFiles."""
284 pass
285
286#---------------------------
287# internal stream interface
288#---------------------------
289class _LowLevelFile:
290 """Low-level file object. Supports reading and writing.
291 It is used instead of a regular file object for streaming
292 access.
293 """
294
295 def __init__(self, name, mode):
296 mode = {
297 "r": os.O_RDONLY,
298 "w": os.O_WRONLY | os.O_CREAT | os.O_TRUNC,
299 }[mode]
300 if hasattr(os, "O_BINARY"):
301 mode |= os.O_BINARY
302 self.fd = os.open(name, mode)
303
304 def close(self):
305 os.close(self.fd)
306
307 def read(self, size):
308 return os.read(self.fd, size)
309
310 def write(self, s):
311 os.write(self.fd, s)
312
313class _Stream:
314 """Class that serves as an adapter between TarFile and
315 a stream-like object. The stream-like object only
316 needs to have a read() or write() method and is accessed
317 blockwise. Use of gzip or bzip2 compression is possible.
318 A stream-like object could be for example: sys.stdin,
319 sys.stdout, a socket, a tape device etc.
320
321 _Stream is intended to be used only internally.
322 """
323
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000324 def __init__(self, name, mode, comptype, fileobj, bufsize):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000325 """Construct a _Stream object.
326 """
327 self._extfileobj = True
328 if fileobj is None:
329 fileobj = _LowLevelFile(name, mode)
330 self._extfileobj = False
331
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000332 if comptype == '*':
333 # Enable transparent compression detection for the
334 # stream interface
335 fileobj = _StreamProxy(fileobj)
336 comptype = fileobj.getcomptype()
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000337
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000338 self.name = name or ""
339 self.mode = mode
340 self.comptype = comptype
341 self.fileobj = fileobj
342 self.bufsize = bufsize
343 self.buf = ""
344 self.pos = 0L
345 self.closed = False
346
347 if comptype == "gz":
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000348 try:
349 import zlib
350 except ImportError:
Thomas Wouters477c8d52006-05-27 19:21:47 +0000351 raise CompressionError("zlib module is not available")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000352 self.zlib = zlib
353 self.crc = zlib.crc32("")
354 if mode == "r":
355 self._init_read_gz()
356 else:
357 self._init_write_gz()
358
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000359 if comptype == "bz2":
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000360 try:
361 import bz2
362 except ImportError:
Thomas Wouters477c8d52006-05-27 19:21:47 +0000363 raise CompressionError("bz2 module is not available")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000364 if mode == "r":
365 self.dbuf = ""
366 self.cmp = bz2.BZ2Decompressor()
367 else:
368 self.cmp = bz2.BZ2Compressor()
369
370 def __del__(self):
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000371 if hasattr(self, "closed") and not self.closed:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000372 self.close()
373
374 def _init_write_gz(self):
375 """Initialize for writing with gzip compression.
376 """
377 self.cmp = self.zlib.compressobj(9, self.zlib.DEFLATED,
378 -self.zlib.MAX_WBITS,
379 self.zlib.DEF_MEM_LEVEL,
380 0)
381 timestamp = struct.pack("<L", long(time.time()))
382 self.__write("\037\213\010\010%s\002\377" % timestamp)
383 if self.name.endswith(".gz"):
384 self.name = self.name[:-3]
385 self.__write(self.name + NUL)
386
387 def write(self, s):
388 """Write string s to the stream.
389 """
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000390 if self.comptype == "gz":
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000391 self.crc = self.zlib.crc32(s, self.crc)
392 self.pos += len(s)
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000393 if self.comptype != "tar":
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000394 s = self.cmp.compress(s)
395 self.__write(s)
396
397 def __write(self, s):
398 """Write string s to the stream if a whole new block
399 is ready to be written.
400 """
401 self.buf += s
402 while len(self.buf) > self.bufsize:
403 self.fileobj.write(self.buf[:self.bufsize])
404 self.buf = self.buf[self.bufsize:]
405
406 def close(self):
407 """Close the _Stream object. No operation should be
408 done on it afterwards.
409 """
410 if self.closed:
411 return
412
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000413 if self.mode == "w" and self.comptype != "tar":
Martin v. Löwisc234a522004-08-22 21:28:33 +0000414 self.buf += self.cmp.flush()
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000415
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000416 if self.mode == "w" and self.buf:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000417 self.fileobj.write(self.buf)
418 self.buf = ""
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000419 if self.comptype == "gz":
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000420 # The native zlib crc is an unsigned 32-bit integer, but
421 # the Python wrapper implicitly casts that to a signed C
422 # long. So, on a 32-bit box self.crc may "look negative",
423 # while the same crc on a 64-bit box may "look positive".
424 # To avoid irksome warnings from the `struct` module, force
425 # it to look positive on all boxes.
426 self.fileobj.write(struct.pack("<L", self.crc & 0xffffffffL))
Andrew M. Kuchling10a44492003-10-24 17:38:34 +0000427 self.fileobj.write(struct.pack("<L", self.pos & 0xffffFFFFL))
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000428
429 if not self._extfileobj:
430 self.fileobj.close()
431
432 self.closed = True
433
434 def _init_read_gz(self):
435 """Initialize for reading a gzip compressed fileobj.
436 """
437 self.cmp = self.zlib.decompressobj(-self.zlib.MAX_WBITS)
438 self.dbuf = ""
439
440 # taken from gzip.GzipFile with some alterations
441 if self.__read(2) != "\037\213":
Thomas Wouters477c8d52006-05-27 19:21:47 +0000442 raise ReadError("not a gzip file")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000443 if self.__read(1) != "\010":
Thomas Wouters477c8d52006-05-27 19:21:47 +0000444 raise CompressionError("unsupported compression method")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000445
446 flag = ord(self.__read(1))
447 self.__read(6)
448
449 if flag & 4:
450 xlen = ord(self.__read(1)) + 256 * ord(self.__read(1))
451 self.read(xlen)
452 if flag & 8:
453 while True:
454 s = self.__read(1)
455 if not s or s == NUL:
456 break
457 if flag & 16:
458 while True:
459 s = self.__read(1)
460 if not s or s == NUL:
461 break
462 if flag & 2:
463 self.__read(2)
464
465 def tell(self):
466 """Return the stream's file pointer position.
467 """
468 return self.pos
469
470 def seek(self, pos=0):
471 """Set the stream's file pointer to pos. Negative seeking
472 is forbidden.
473 """
474 if pos - self.pos >= 0:
475 blocks, remainder = divmod(pos - self.pos, self.bufsize)
476 for i in xrange(blocks):
477 self.read(self.bufsize)
478 self.read(remainder)
479 else:
Thomas Wouters477c8d52006-05-27 19:21:47 +0000480 raise StreamError("seeking backwards is not allowed")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000481 return self.pos
482
483 def read(self, size=None):
484 """Return the next size number of bytes from the stream.
485 If size is not defined, return all bytes of the stream
486 up to EOF.
487 """
488 if size is None:
489 t = []
490 while True:
491 buf = self._read(self.bufsize)
492 if not buf:
493 break
494 t.append(buf)
495 buf = "".join(t)
496 else:
497 buf = self._read(size)
498 self.pos += len(buf)
499 return buf
500
501 def _read(self, size):
502 """Return size bytes from the stream.
503 """
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000504 if self.comptype == "tar":
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000505 return self.__read(size)
506
507 c = len(self.dbuf)
508 t = [self.dbuf]
509 while c < size:
510 buf = self.__read(self.bufsize)
511 if not buf:
512 break
513 buf = self.cmp.decompress(buf)
514 t.append(buf)
515 c += len(buf)
516 t = "".join(t)
517 self.dbuf = t[size:]
518 return t[:size]
519
520 def __read(self, size):
521 """Return size bytes from stream. If internal buffer is empty,
522 read another block from the stream.
523 """
524 c = len(self.buf)
525 t = [self.buf]
526 while c < size:
527 buf = self.fileobj.read(self.bufsize)
528 if not buf:
529 break
530 t.append(buf)
531 c += len(buf)
532 t = "".join(t)
533 self.buf = t[size:]
534 return t[:size]
535# class _Stream
536
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000537class _StreamProxy(object):
538 """Small proxy class that enables transparent compression
539 detection for the Stream interface (mode 'r|*').
540 """
541
542 def __init__(self, fileobj):
543 self.fileobj = fileobj
544 self.buf = self.fileobj.read(BLOCKSIZE)
545
546 def read(self, size):
547 self.read = self.fileobj.read
548 return self.buf
549
550 def getcomptype(self):
551 if self.buf.startswith("\037\213\010"):
552 return "gz"
553 if self.buf.startswith("BZh91"):
554 return "bz2"
555 return "tar"
556
557 def close(self):
558 self.fileobj.close()
559# class StreamProxy
560
Thomas Wouters477c8d52006-05-27 19:21:47 +0000561class _BZ2Proxy(object):
562 """Small proxy class that enables external file object
563 support for "r:bz2" and "w:bz2" modes. This is actually
564 a workaround for a limitation in bz2 module's BZ2File
565 class which (unlike gzip.GzipFile) has no support for
566 a file object argument.
567 """
568
569 blocksize = 16 * 1024
570
571 def __init__(self, fileobj, mode):
572 self.fileobj = fileobj
573 self.mode = mode
574 self.init()
575
576 def init(self):
577 import bz2
578 self.pos = 0
579 if self.mode == "r":
580 self.bz2obj = bz2.BZ2Decompressor()
581 self.fileobj.seek(0)
582 self.buf = ""
583 else:
584 self.bz2obj = bz2.BZ2Compressor()
585
586 def read(self, size):
587 b = [self.buf]
588 x = len(self.buf)
589 while x < size:
590 try:
591 raw = self.fileobj.read(self.blocksize)
592 data = self.bz2obj.decompress(raw)
593 b.append(data)
594 except EOFError:
595 break
596 x += len(data)
597 self.buf = "".join(b)
598
599 buf = self.buf[:size]
600 self.buf = self.buf[size:]
601 self.pos += len(buf)
602 return buf
603
604 def seek(self, pos):
605 if pos < self.pos:
606 self.init()
607 self.read(pos - self.pos)
608
609 def tell(self):
610 return self.pos
611
612 def write(self, data):
613 self.pos += len(data)
614 raw = self.bz2obj.compress(data)
615 self.fileobj.write(raw)
616
617 def close(self):
618 if self.mode == "w":
619 raw = self.bz2obj.flush()
620 self.fileobj.write(raw)
621 self.fileobj.close()
622# class _BZ2Proxy
623
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000624#------------------------
625# Extraction file object
626#------------------------
627class ExFileObject(object):
628 """File-like object for reading an archive member.
629 Is returned by TarFile.extractfile(). Support for
630 sparse files included.
631 """
632
633 def __init__(self, tarfile, tarinfo):
634 self.fileobj = tarfile.fileobj
635 self.name = tarinfo.name
636 self.mode = "r"
637 self.closed = False
638 self.offset = tarinfo.offset_data
639 self.size = tarinfo.size
640 self.pos = 0L
641 self.linebuffer = ""
642 if tarinfo.issparse():
643 self.sparse = tarinfo.sparse
644 self.read = self._readsparse
645 else:
646 self.read = self._readnormal
647
648 def __read(self, size):
649 """Overloadable read method.
650 """
651 return self.fileobj.read(size)
652
653 def readline(self, size=-1):
654 """Read a line with approx. size. If size is negative,
655 read a whole line. readline() and read() must not
656 be mixed up (!).
657 """
658 if size < 0:
659 size = sys.maxint
660
661 nl = self.linebuffer.find("\n")
662 if nl >= 0:
663 nl = min(nl, size)
664 else:
665 size -= len(self.linebuffer)
Martin v. Löwisc11d6f12004-08-25 10:52:58 +0000666 while (nl < 0 and size > 0):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000667 buf = self.read(min(size, 100))
668 if not buf:
669 break
670 self.linebuffer += buf
671 size -= len(buf)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000672 nl = self.linebuffer.find("\n")
673 if nl == -1:
674 s = self.linebuffer
675 self.linebuffer = ""
676 return s
677 buf = self.linebuffer[:nl]
678 self.linebuffer = self.linebuffer[nl + 1:]
679 while buf[-1:] == "\r":
680 buf = buf[:-1]
681 return buf + "\n"
682
683 def readlines(self):
684 """Return a list with all (following) lines.
685 """
686 result = []
687 while True:
688 line = self.readline()
689 if not line: break
690 result.append(line)
691 return result
692
693 def _readnormal(self, size=None):
694 """Read operation for regular files.
695 """
696 if self.closed:
Thomas Wouters477c8d52006-05-27 19:21:47 +0000697 raise ValueError("file is closed")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000698 self.fileobj.seek(self.offset + self.pos)
699 bytesleft = self.size - self.pos
700 if size is None:
701 bytestoread = bytesleft
702 else:
703 bytestoread = min(size, bytesleft)
704 self.pos += bytestoread
705 return self.__read(bytestoread)
706
707 def _readsparse(self, size=None):
708 """Read operation for sparse files.
709 """
710 if self.closed:
Thomas Wouters477c8d52006-05-27 19:21:47 +0000711 raise ValueError("file is closed")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000712
713 if size is None:
714 size = self.size - self.pos
715
716 data = []
717 while size > 0:
718 buf = self._readsparsesection(size)
719 if not buf:
720 break
721 size -= len(buf)
722 data.append(buf)
723 return "".join(data)
724
725 def _readsparsesection(self, size):
726 """Read a single section of a sparse file.
727 """
728 section = self.sparse.find(self.pos)
729
730 if section is None:
731 return ""
732
733 toread = min(size, section.offset + section.size - self.pos)
734 if isinstance(section, _data):
735 realpos = section.realpos + self.pos - section.offset
736 self.pos += toread
737 self.fileobj.seek(self.offset + realpos)
738 return self.__read(toread)
739 else:
740 self.pos += toread
741 return NUL * toread
742
743 def tell(self):
744 """Return the current file position.
745 """
746 return self.pos
747
748 def seek(self, pos, whence=0):
749 """Seek to a position in the file.
750 """
751 self.linebuffer = ""
752 if whence == 0:
753 self.pos = min(max(pos, 0), self.size)
754 if whence == 1:
755 if pos < 0:
756 self.pos = max(self.pos + pos, 0)
757 else:
758 self.pos = min(self.pos + pos, self.size)
759 if whence == 2:
760 self.pos = max(min(self.size + pos, self.size), 0)
761
762 def close(self):
763 """Close the file object.
764 """
765 self.closed = True
Martin v. Löwisdf241532005-03-03 08:17:42 +0000766
767 def __iter__(self):
768 """Get an iterator over the file object.
769 """
770 if self.closed:
771 raise ValueError("I/O operation on closed file")
772 return self
773
774 def next(self):
775 """Get the next item from the file iterator.
776 """
777 result = self.readline()
778 if not result:
779 raise StopIteration
780 return result
Tim Peterseba28be2005-03-28 01:08:02 +0000781
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000782#class ExFileObject
783
784#------------------
785# Exported Classes
786#------------------
787class TarInfo(object):
788 """Informational class which holds the details about an
789 archive member given by a tar header block.
790 TarInfo objects are returned by TarFile.getmember(),
791 TarFile.getmembers() and TarFile.gettarinfo() and are
792 usually created internally.
793 """
794
795 def __init__(self, name=""):
796 """Construct a TarInfo object. name is the optional name
797 of the member.
798 """
Thomas Wouters477c8d52006-05-27 19:21:47 +0000799 self.name = name # member name (dirnames must end with '/')
800 self.mode = 0666 # file permissions
801 self.uid = 0 # user id
802 self.gid = 0 # group id
803 self.size = 0 # file size
804 self.mtime = 0 # modification time
805 self.chksum = 0 # header checksum
806 self.type = REGTYPE # member type
807 self.linkname = "" # link name
808 self.uname = "user" # user name
809 self.gname = "group" # group name
810 self.devmajor = 0 # device major number
811 self.devminor = 0 # device minor number
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000812
Thomas Wouters477c8d52006-05-27 19:21:47 +0000813 self.offset = 0 # the tar header starts here
814 self.offset_data = 0 # the file's data starts here
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000815
816 def __repr__(self):
817 return "<%s %r at %#x>" % (self.__class__.__name__,self.name,id(self))
818
Guido van Rossum75b64e62005-01-16 00:16:11 +0000819 @classmethod
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000820 def frombuf(cls, buf):
821 """Construct a TarInfo object from a 512 byte string buffer.
822 """
Thomas Wouters477c8d52006-05-27 19:21:47 +0000823 if len(buf) != BLOCKSIZE:
824 raise ValueError("truncated header")
825 if buf.count(NUL) == BLOCKSIZE:
826 raise ValueError("empty header")
827
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000828 tarinfo = cls()
Thomas Wouters477c8d52006-05-27 19:21:47 +0000829 tarinfo.buf = buf
830 tarinfo.name = buf[0:100].rstrip(NUL)
831 tarinfo.mode = nti(buf[100:108])
832 tarinfo.uid = nti(buf[108:116])
833 tarinfo.gid = nti(buf[116:124])
834 tarinfo.size = nti(buf[124:136])
835 tarinfo.mtime = nti(buf[136:148])
836 tarinfo.chksum = nti(buf[148:156])
837 tarinfo.type = buf[156:157]
838 tarinfo.linkname = buf[157:257].rstrip(NUL)
839 tarinfo.uname = buf[265:297].rstrip(NUL)
840 tarinfo.gname = buf[297:329].rstrip(NUL)
841 tarinfo.devmajor = nti(buf[329:337])
842 tarinfo.devminor = nti(buf[337:345])
Thomas Wouters89f507f2006-12-13 04:49:30 +0000843 prefix = buf[345:500].rstrip(NUL)
844
845 if prefix and not tarinfo.issparse():
846 tarinfo.name = prefix + "/" + tarinfo.name
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000847
Thomas Wouters477c8d52006-05-27 19:21:47 +0000848 if tarinfo.chksum not in calc_chksums(buf):
849 raise ValueError("invalid header")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000850 return tarinfo
851
Thomas Wouters477c8d52006-05-27 19:21:47 +0000852 def tobuf(self, posix=False):
Thomas Wouters89f507f2006-12-13 04:49:30 +0000853 """Return a tar header as a string of 512 byte blocks.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000854 """
Thomas Wouters89f507f2006-12-13 04:49:30 +0000855 buf = ""
856 type = self.type
857 prefix = ""
858
859 if self.name.endswith("/"):
860 type = DIRTYPE
861
862 if type in (GNUTYPE_LONGNAME, GNUTYPE_LONGLINK):
863 # Prevent "././@LongLink" from being normalized.
864 name = self.name
865 else:
866 name = normpath(self.name)
867
868 if type == DIRTYPE:
869 # directories should end with '/'
870 name += "/"
871
872 linkname = self.linkname
873 if linkname:
874 # if linkname is empty we end up with a '.'
875 linkname = normpath(linkname)
876
877 if posix:
878 if self.size > MAXSIZE_MEMBER:
879 raise ValueError("file is too large (>= 8 GB)")
880
881 if len(self.linkname) > LENGTH_LINK:
882 raise ValueError("linkname is too long (>%d)" % (LENGTH_LINK))
883
884 if len(name) > LENGTH_NAME:
885 prefix = name[:LENGTH_PREFIX + 1]
886 while prefix and prefix[-1] != "/":
887 prefix = prefix[:-1]
888
889 name = name[len(prefix):]
890 prefix = prefix[:-1]
891
892 if not prefix or len(name) > LENGTH_NAME:
893 raise ValueError("name is too long")
894
895 else:
896 if len(self.linkname) > LENGTH_LINK:
897 buf += self._create_gnulong(self.linkname, GNUTYPE_LONGLINK)
898
899 if len(name) > LENGTH_NAME:
900 buf += self._create_gnulong(name, GNUTYPE_LONGNAME)
901
Thomas Wouters477c8d52006-05-27 19:21:47 +0000902 parts = [
Thomas Wouters89f507f2006-12-13 04:49:30 +0000903 stn(name, 100),
Thomas Wouters477c8d52006-05-27 19:21:47 +0000904 itn(self.mode & 07777, 8, posix),
905 itn(self.uid, 8, posix),
906 itn(self.gid, 8, posix),
907 itn(self.size, 12, posix),
908 itn(self.mtime, 12, posix),
909 " ", # checksum field
Thomas Wouters89f507f2006-12-13 04:49:30 +0000910 type,
Thomas Wouters477c8d52006-05-27 19:21:47 +0000911 stn(self.linkname, 100),
912 stn(MAGIC, 6),
913 stn(VERSION, 2),
914 stn(self.uname, 32),
915 stn(self.gname, 32),
916 itn(self.devmajor, 8, posix),
917 itn(self.devminor, 8, posix),
Thomas Wouters89f507f2006-12-13 04:49:30 +0000918 stn(prefix, 155)
Thomas Wouters477c8d52006-05-27 19:21:47 +0000919 ]
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000920
Thomas Wouters89f507f2006-12-13 04:49:30 +0000921 buf += struct.pack("%ds" % BLOCKSIZE, "".join(parts))
922 chksum = calc_chksums(buf[-BLOCKSIZE:])[0]
923 buf = buf[:-364] + "%06o\0" % chksum + buf[-357:]
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000924 self.buf = buf
925 return buf
926
Thomas Wouters89f507f2006-12-13 04:49:30 +0000927 def _create_gnulong(self, name, type):
928 """Create a GNU longname/longlink header from name.
929 It consists of an extended tar header, with the length
930 of the longname as size, followed by data blocks,
931 which contain the longname as a null terminated string.
932 """
933 name += NUL
934
935 tarinfo = self.__class__()
936 tarinfo.name = "././@LongLink"
937 tarinfo.type = type
938 tarinfo.mode = 0
939 tarinfo.size = len(name)
940
941 # create extended header
942 buf = tarinfo.tobuf()
943 # create name blocks
944 buf += name
945 blocks, remainder = divmod(len(name), BLOCKSIZE)
946 if remainder > 0:
947 buf += (BLOCKSIZE - remainder) * NUL
948 return buf
949
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000950 def isreg(self):
951 return self.type in REGULAR_TYPES
952 def isfile(self):
953 return self.isreg()
954 def isdir(self):
955 return self.type == DIRTYPE
956 def issym(self):
957 return self.type == SYMTYPE
958 def islnk(self):
959 return self.type == LNKTYPE
960 def ischr(self):
961 return self.type == CHRTYPE
962 def isblk(self):
963 return self.type == BLKTYPE
964 def isfifo(self):
965 return self.type == FIFOTYPE
966 def issparse(self):
967 return self.type == GNUTYPE_SPARSE
968 def isdev(self):
969 return self.type in (CHRTYPE, BLKTYPE, FIFOTYPE)
970# class TarInfo
971
972class TarFile(object):
973 """The TarFile Class provides an interface to tar archives.
974 """
975
976 debug = 0 # May be set from 0 (no msgs) to 3 (all msgs)
977
978 dereference = False # If true, add content of linked file to the
979 # tar file, else the link.
980
981 ignore_zeros = False # If true, skips empty or invalid blocks and
982 # continues processing.
983
984 errorlevel = 0 # If 0, fatal errors only appear in debug
985 # messages (if debug >= 0). If > 0, errors
986 # are passed to the caller as exceptions.
987
Martin v. Löwis75b9da42004-08-18 13:57:44 +0000988 posix = False # If True, generates POSIX.1-1990-compliant
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000989 # archives (no GNU extensions!)
990
991 fileobject = ExFileObject
992
993 def __init__(self, name=None, mode="r", fileobj=None):
994 """Open an (uncompressed) tar archive `name'. `mode' is either 'r' to
995 read from an existing archive, 'a' to append data to an existing
996 file or 'w' to create a new file overwriting an existing one. `mode'
997 defaults to 'r'.
998 If `fileobj' is given, it is used for reading or writing data. If it
999 can be determined, `mode' is overridden by `fileobj's mode.
1000 `fileobj' is not closed, when TarFile is closed.
1001 """
Martin v. Löwisfaffa152005-08-24 06:43:09 +00001002 self.name = name
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001003
1004 if len(mode) > 1 or mode not in "raw":
Thomas Wouters477c8d52006-05-27 19:21:47 +00001005 raise ValueError("mode must be 'r', 'a' or 'w'")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001006 self._mode = mode
1007 self.mode = {"r": "rb", "a": "r+b", "w": "wb"}[mode]
1008
1009 if not fileobj:
Guido van Rossum8f78fe92006-08-24 04:03:53 +00001010 fileobj = _open(self.name, self.mode)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001011 self._extfileobj = False
1012 else:
1013 if self.name is None and hasattr(fileobj, "name"):
Martin v. Löwisfaffa152005-08-24 06:43:09 +00001014 self.name = fileobj.name
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001015 if hasattr(fileobj, "mode"):
1016 self.mode = fileobj.mode
1017 self._extfileobj = True
1018 self.fileobj = fileobj
1019
1020 # Init datastructures
Thomas Wouters477c8d52006-05-27 19:21:47 +00001021 self.closed = False
1022 self.members = [] # list of members as TarInfo objects
1023 self._loaded = False # flag if all members have been read
1024 self.offset = 0L # current position in the archive file
1025 self.inodes = {} # dictionary caching the inodes of
1026 # archive members already added
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001027
1028 if self._mode == "r":
1029 self.firstmember = None
1030 self.firstmember = self.next()
1031
1032 if self._mode == "a":
1033 # Move to the end of the archive,
1034 # before the first empty block.
1035 self.firstmember = None
1036 while True:
1037 try:
1038 tarinfo = self.next()
1039 except ReadError:
1040 self.fileobj.seek(0)
1041 break
1042 if tarinfo is None:
1043 self.fileobj.seek(- BLOCKSIZE, 1)
1044 break
1045
1046 if self._mode in "aw":
1047 self._loaded = True
1048
1049 #--------------------------------------------------------------------------
1050 # Below are the classmethods which act as alternate constructors to the
1051 # TarFile class. The open() method is the only one that is needed for
1052 # public use; it is the "super"-constructor and is able to select an
1053 # adequate "sub"-constructor for a particular compression using the mapping
1054 # from OPEN_METH.
1055 #
1056 # This concept allows one to subclass TarFile without losing the comfort of
1057 # the super-constructor. A sub-constructor is registered and made available
1058 # by adding it to the mapping in OPEN_METH.
1059
Guido van Rossum75b64e62005-01-16 00:16:11 +00001060 @classmethod
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001061 def open(cls, name=None, mode="r", fileobj=None, bufsize=20*512):
1062 """Open a tar archive for reading, writing or appending. Return
1063 an appropriate TarFile class.
1064
1065 mode:
Martin v. Löwis78be7df2005-03-05 12:47:42 +00001066 'r' or 'r:*' open for reading with transparent compression
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001067 'r:' open for reading exclusively uncompressed
1068 'r:gz' open for reading with gzip compression
1069 'r:bz2' open for reading with bzip2 compression
1070 'a' or 'a:' open for appending
1071 'w' or 'w:' open for writing without compression
1072 'w:gz' open for writing with gzip compression
1073 'w:bz2' open for writing with bzip2 compression
Martin v. Löwis78be7df2005-03-05 12:47:42 +00001074
1075 'r|*' open a stream of tar blocks with transparent compression
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001076 'r|' open an uncompressed stream of tar blocks for reading
1077 'r|gz' open a gzip compressed stream of tar blocks
1078 'r|bz2' open a bzip2 compressed stream of tar blocks
1079 'w|' open an uncompressed stream for writing
1080 'w|gz' open a gzip compressed stream for writing
1081 'w|bz2' open a bzip2 compressed stream for writing
1082 """
1083
1084 if not name and not fileobj:
Thomas Wouters477c8d52006-05-27 19:21:47 +00001085 raise ValueError("nothing to open")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001086
Martin v. Löwis78be7df2005-03-05 12:47:42 +00001087 if mode in ("r", "r:*"):
1088 # Find out which *open() is appropriate for opening the file.
1089 for comptype in cls.OPEN_METH:
1090 func = getattr(cls, cls.OPEN_METH[comptype])
1091 try:
1092 return func(name, "r", fileobj)
1093 except (ReadError, CompressionError):
1094 continue
Thomas Wouters477c8d52006-05-27 19:21:47 +00001095 raise ReadError("file could not be opened successfully")
Martin v. Löwis78be7df2005-03-05 12:47:42 +00001096
1097 elif ":" in mode:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001098 filemode, comptype = mode.split(":", 1)
1099 filemode = filemode or "r"
1100 comptype = comptype or "tar"
1101
1102 # Select the *open() function according to
1103 # given compression.
1104 if comptype in cls.OPEN_METH:
1105 func = getattr(cls, cls.OPEN_METH[comptype])
1106 else:
Thomas Wouters477c8d52006-05-27 19:21:47 +00001107 raise CompressionError("unknown compression type %r" % comptype)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001108 return func(name, filemode, fileobj)
1109
1110 elif "|" in mode:
1111 filemode, comptype = mode.split("|", 1)
1112 filemode = filemode or "r"
1113 comptype = comptype or "tar"
1114
1115 if filemode not in "rw":
Thomas Wouters477c8d52006-05-27 19:21:47 +00001116 raise ValueError("mode must be 'r' or 'w'")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001117
1118 t = cls(name, filemode,
1119 _Stream(name, filemode, comptype, fileobj, bufsize))
1120 t._extfileobj = False
1121 return t
1122
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001123 elif mode in "aw":
1124 return cls.taropen(name, mode, fileobj)
1125
Thomas Wouters477c8d52006-05-27 19:21:47 +00001126 raise ValueError("undiscernible mode")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001127
Guido van Rossum75b64e62005-01-16 00:16:11 +00001128 @classmethod
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001129 def taropen(cls, name, mode="r", fileobj=None):
1130 """Open uncompressed tar archive name for reading or writing.
1131 """
1132 if len(mode) > 1 or mode not in "raw":
Thomas Wouters477c8d52006-05-27 19:21:47 +00001133 raise ValueError("mode must be 'r', 'a' or 'w'")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001134 return cls(name, mode, fileobj)
1135
Guido van Rossum75b64e62005-01-16 00:16:11 +00001136 @classmethod
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001137 def gzopen(cls, name, mode="r", fileobj=None, compresslevel=9):
1138 """Open gzip compressed tar archive name for reading or writing.
1139 Appending is not allowed.
1140 """
1141 if len(mode) > 1 or mode not in "rw":
Thomas Wouters477c8d52006-05-27 19:21:47 +00001142 raise ValueError("mode must be 'r' or 'w'")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001143
1144 try:
1145 import gzip
Neal Norwitz4ec68242003-04-11 03:05:56 +00001146 gzip.GzipFile
1147 except (ImportError, AttributeError):
Thomas Wouters477c8d52006-05-27 19:21:47 +00001148 raise CompressionError("gzip module is not available")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001149
1150 pre, ext = os.path.splitext(name)
Martin v. Löwisfaffa152005-08-24 06:43:09 +00001151 pre = os.path.basename(pre)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001152 if ext == ".tgz":
1153 ext = ".tar"
1154 if ext == ".gz":
1155 ext = ""
Martin v. Löwisfaffa152005-08-24 06:43:09 +00001156 tarname = pre + ext
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001157
1158 if fileobj is None:
Guido van Rossum8f78fe92006-08-24 04:03:53 +00001159 fileobj = _open(name, mode + "b")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001160
Martin v. Löwisfaffa152005-08-24 06:43:09 +00001161 if mode != "r":
1162 name = tarname
1163
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001164 try:
Martin v. Löwisfaffa152005-08-24 06:43:09 +00001165 t = cls.taropen(tarname, mode,
1166 gzip.GzipFile(name, mode, compresslevel, fileobj)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001167 )
1168 except IOError:
Thomas Wouters477c8d52006-05-27 19:21:47 +00001169 raise ReadError("not a gzip file")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001170 t._extfileobj = False
1171 return t
1172
Guido van Rossum75b64e62005-01-16 00:16:11 +00001173 @classmethod
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001174 def bz2open(cls, name, mode="r", fileobj=None, compresslevel=9):
1175 """Open bzip2 compressed tar archive name for reading or writing.
1176 Appending is not allowed.
1177 """
1178 if len(mode) > 1 or mode not in "rw":
Thomas Wouters477c8d52006-05-27 19:21:47 +00001179 raise ValueError("mode must be 'r' or 'w'.")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001180
1181 try:
1182 import bz2
1183 except ImportError:
Thomas Wouters477c8d52006-05-27 19:21:47 +00001184 raise CompressionError("bz2 module is not available")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001185
Martin v. Löwisfaffa152005-08-24 06:43:09 +00001186 pre, ext = os.path.splitext(name)
1187 pre = os.path.basename(pre)
1188 if ext == ".tbz2":
1189 ext = ".tar"
1190 if ext == ".bz2":
1191 ext = ""
1192 tarname = pre + ext
1193
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001194 if fileobj is not None:
Thomas Wouters477c8d52006-05-27 19:21:47 +00001195 fileobj = _BZ2Proxy(fileobj, mode)
1196 else:
1197 fileobj = bz2.BZ2File(name, mode, compresslevel=compresslevel)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001198
1199 try:
Thomas Wouters477c8d52006-05-27 19:21:47 +00001200 t = cls.taropen(tarname, mode, fileobj)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001201 except IOError:
Thomas Wouters477c8d52006-05-27 19:21:47 +00001202 raise ReadError("not a bzip2 file")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001203 t._extfileobj = False
1204 return t
1205
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001206 # All *open() methods are registered here.
1207 OPEN_METH = {
1208 "tar": "taropen", # uncompressed tar
1209 "gz": "gzopen", # gzip compressed tar
1210 "bz2": "bz2open" # bzip2 compressed tar
1211 }
1212
1213 #--------------------------------------------------------------------------
1214 # The public methods which TarFile provides:
1215
1216 def close(self):
1217 """Close the TarFile. In write-mode, two finishing zero blocks are
1218 appended to the archive.
1219 """
1220 if self.closed:
1221 return
1222
1223 if self._mode in "aw":
1224 self.fileobj.write(NUL * (BLOCKSIZE * 2))
1225 self.offset += (BLOCKSIZE * 2)
1226 # fill up the end with zero-blocks
1227 # (like option -b20 for tar does)
1228 blocks, remainder = divmod(self.offset, RECORDSIZE)
1229 if remainder > 0:
1230 self.fileobj.write(NUL * (RECORDSIZE - remainder))
1231
1232 if not self._extfileobj:
1233 self.fileobj.close()
1234 self.closed = True
1235
1236 def getmember(self, name):
1237 """Return a TarInfo object for member `name'. If `name' can not be
1238 found in the archive, KeyError is raised. If a member occurs more
1239 than once in the archive, its last occurence is assumed to be the
1240 most up-to-date version.
1241 """
Martin v. Löwisf3c56112004-09-18 09:08:52 +00001242 tarinfo = self._getmember(name)
1243 if tarinfo is None:
Thomas Wouters477c8d52006-05-27 19:21:47 +00001244 raise KeyError("filename %r not found" % name)
Martin v. Löwisf3c56112004-09-18 09:08:52 +00001245 return tarinfo
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001246
1247 def getmembers(self):
1248 """Return the members of the archive as a list of TarInfo objects. The
1249 list has the same order as the members in the archive.
1250 """
1251 self._check()
1252 if not self._loaded: # if we want to obtain a list of
1253 self._load() # all members, we first have to
1254 # scan the whole archive.
1255 return self.members
1256
1257 def getnames(self):
1258 """Return the members of the archive as a list of their names. It has
1259 the same order as the list returned by getmembers().
1260 """
Martin v. Löwisf3c56112004-09-18 09:08:52 +00001261 return [tarinfo.name for tarinfo in self.getmembers()]
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001262
1263 def gettarinfo(self, name=None, arcname=None, fileobj=None):
1264 """Create a TarInfo object for either the file `name' or the file
1265 object `fileobj' (using os.fstat on its file descriptor). You can
1266 modify some of the TarInfo's attributes before you add it using
1267 addfile(). If given, `arcname' specifies an alternative name for the
1268 file in the archive.
1269 """
1270 self._check("aw")
1271
1272 # When fileobj is given, replace name by
1273 # fileobj's real name.
1274 if fileobj is not None:
1275 name = fileobj.name
1276
1277 # Building the name of the member in the archive.
1278 # Backward slashes are converted to forward slashes,
1279 # Absolute paths are turned to relative paths.
1280 if arcname is None:
1281 arcname = name
1282 arcname = normpath(arcname)
1283 drv, arcname = os.path.splitdrive(arcname)
1284 while arcname[0:1] == "/":
1285 arcname = arcname[1:]
1286
1287 # Now, fill the TarInfo object with
1288 # information specific for the file.
1289 tarinfo = TarInfo()
1290
1291 # Use os.stat or os.lstat, depending on platform
1292 # and if symlinks shall be resolved.
1293 if fileobj is None:
1294 if hasattr(os, "lstat") and not self.dereference:
1295 statres = os.lstat(name)
1296 else:
1297 statres = os.stat(name)
1298 else:
1299 statres = os.fstat(fileobj.fileno())
1300 linkname = ""
1301
1302 stmd = statres.st_mode
1303 if stat.S_ISREG(stmd):
1304 inode = (statres.st_ino, statres.st_dev)
Neal Norwitzb0e32e22005-10-20 04:50:13 +00001305 if not self.dereference and \
1306 statres.st_nlink > 1 and inode in self.inodes:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001307 # Is it a hardlink to an already
1308 # archived file?
1309 type = LNKTYPE
1310 linkname = self.inodes[inode]
1311 else:
1312 # The inode is added only if its valid.
1313 # For win32 it is always 0.
1314 type = REGTYPE
1315 if inode[0]:
1316 self.inodes[inode] = arcname
1317 elif stat.S_ISDIR(stmd):
1318 type = DIRTYPE
1319 if arcname[-1:] != "/":
1320 arcname += "/"
1321 elif stat.S_ISFIFO(stmd):
1322 type = FIFOTYPE
1323 elif stat.S_ISLNK(stmd):
1324 type = SYMTYPE
1325 linkname = os.readlink(name)
1326 elif stat.S_ISCHR(stmd):
1327 type = CHRTYPE
1328 elif stat.S_ISBLK(stmd):
1329 type = BLKTYPE
1330 else:
1331 return None
1332
1333 # Fill the TarInfo object with all
1334 # information we can get.
Martin v. Löwis5dbdc592005-08-27 10:07:56 +00001335 tarinfo.name = arcname
1336 tarinfo.mode = stmd
1337 tarinfo.uid = statres.st_uid
1338 tarinfo.gid = statres.st_gid
1339 if stat.S_ISREG(stmd):
Martin v. Löwis61d77e02004-08-20 06:35:46 +00001340 tarinfo.size = statres.st_size
Martin v. Löwis5dbdc592005-08-27 10:07:56 +00001341 else:
1342 tarinfo.size = 0L
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001343 tarinfo.mtime = statres.st_mtime
Martin v. Löwis5dbdc592005-08-27 10:07:56 +00001344 tarinfo.type = type
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001345 tarinfo.linkname = linkname
1346 if pwd:
1347 try:
1348 tarinfo.uname = pwd.getpwuid(tarinfo.uid)[0]
1349 except KeyError:
1350 pass
1351 if grp:
1352 try:
1353 tarinfo.gname = grp.getgrgid(tarinfo.gid)[0]
1354 except KeyError:
1355 pass
1356
1357 if type in (CHRTYPE, BLKTYPE):
1358 if hasattr(os, "major") and hasattr(os, "minor"):
1359 tarinfo.devmajor = os.major(statres.st_rdev)
1360 tarinfo.devminor = os.minor(statres.st_rdev)
1361 return tarinfo
1362
1363 def list(self, verbose=True):
1364 """Print a table of contents to sys.stdout. If `verbose' is False, only
1365 the names of the members are printed. If it is True, an `ls -l'-like
1366 output is produced.
1367 """
1368 self._check()
1369
1370 for tarinfo in self:
1371 if verbose:
1372 print filemode(tarinfo.mode),
1373 print "%s/%s" % (tarinfo.uname or tarinfo.uid,
1374 tarinfo.gname or tarinfo.gid),
1375 if tarinfo.ischr() or tarinfo.isblk():
1376 print "%10s" % ("%d,%d" \
1377 % (tarinfo.devmajor, tarinfo.devminor)),
1378 else:
1379 print "%10d" % tarinfo.size,
1380 print "%d-%02d-%02d %02d:%02d:%02d" \
1381 % time.localtime(tarinfo.mtime)[:6],
1382
1383 print tarinfo.name,
1384
1385 if verbose:
1386 if tarinfo.issym():
1387 print "->", tarinfo.linkname,
1388 if tarinfo.islnk():
1389 print "link to", tarinfo.linkname,
1390 print
1391
1392 def add(self, name, arcname=None, recursive=True):
1393 """Add the file `name' to the archive. `name' may be any type of file
1394 (directory, fifo, symbolic link, etc.). If given, `arcname'
1395 specifies an alternative name for the file in the archive.
1396 Directories are added recursively by default. This can be avoided by
1397 setting `recursive' to False.
1398 """
1399 self._check("aw")
1400
1401 if arcname is None:
1402 arcname = name
1403
1404 # Skip if somebody tries to archive the archive...
Martin v. Löwisfaffa152005-08-24 06:43:09 +00001405 if self.name is not None \
1406 and os.path.abspath(name) == os.path.abspath(self.name):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001407 self._dbg(2, "tarfile: Skipped %r" % name)
1408 return
1409
1410 # Special case: The user wants to add the current
1411 # working directory.
1412 if name == ".":
1413 if recursive:
1414 if arcname == ".":
1415 arcname = ""
1416 for f in os.listdir("."):
1417 self.add(f, os.path.join(arcname, f))
1418 return
1419
1420 self._dbg(1, name)
1421
1422 # Create a TarInfo object from the file.
1423 tarinfo = self.gettarinfo(name, arcname)
1424
1425 if tarinfo is None:
1426 self._dbg(1, "tarfile: Unsupported type %r" % name)
1427 return
1428
1429 # Append the tar header and data to the archive.
1430 if tarinfo.isreg():
Guido van Rossum8f78fe92006-08-24 04:03:53 +00001431 f = _open(name, "rb")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001432 self.addfile(tarinfo, f)
1433 f.close()
1434
Martin v. Löwis5dbdc592005-08-27 10:07:56 +00001435 elif tarinfo.isdir():
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001436 self.addfile(tarinfo)
1437 if recursive:
1438 for f in os.listdir(name):
1439 self.add(os.path.join(name, f), os.path.join(arcname, f))
1440
Martin v. Löwis5dbdc592005-08-27 10:07:56 +00001441 else:
1442 self.addfile(tarinfo)
1443
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001444 def addfile(self, tarinfo, fileobj=None):
1445 """Add the TarInfo object `tarinfo' to the archive. If `fileobj' is
1446 given, tarinfo.size bytes are read from it and added to the archive.
1447 You can create TarInfo objects using gettarinfo().
1448 On Windows platforms, `fileobj' should always be opened with mode
1449 'rb' to avoid irritation about the file size.
1450 """
1451 self._check("aw")
1452
Thomas Wouters89f507f2006-12-13 04:49:30 +00001453 tarinfo = copy.copy(tarinfo)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001454
Thomas Wouters89f507f2006-12-13 04:49:30 +00001455 buf = tarinfo.tobuf(self.posix)
1456 self.fileobj.write(buf)
1457 self.offset += len(buf)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001458
1459 # If there's data to follow, append it.
1460 if fileobj is not None:
1461 copyfileobj(fileobj, self.fileobj, tarinfo.size)
1462 blocks, remainder = divmod(tarinfo.size, BLOCKSIZE)
1463 if remainder > 0:
1464 self.fileobj.write(NUL * (BLOCKSIZE - remainder))
1465 blocks += 1
1466 self.offset += blocks * BLOCKSIZE
1467
Martin v. Löwisf3c56112004-09-18 09:08:52 +00001468 self.members.append(tarinfo)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001469
Martin v. Löwis00a73e72005-03-04 19:40:34 +00001470 def extractall(self, path=".", members=None):
1471 """Extract all members from the archive to the current working
1472 directory and set owner, modification time and permissions on
1473 directories afterwards. `path' specifies a different directory
1474 to extract to. `members' is optional and must be a subset of the
1475 list returned by getmembers().
1476 """
1477 directories = []
1478
1479 if members is None:
1480 members = self
1481
1482 for tarinfo in members:
1483 if tarinfo.isdir():
1484 # Extract directory with a safe mode, so that
1485 # all files below can be extracted as well.
1486 try:
1487 os.makedirs(os.path.join(path, tarinfo.name), 0777)
1488 except EnvironmentError:
1489 pass
1490 directories.append(tarinfo)
1491 else:
1492 self.extract(tarinfo, path)
1493
1494 # Reverse sort directories.
1495 directories.sort(lambda a, b: cmp(a.name, b.name))
1496 directories.reverse()
1497
1498 # Set correct owner, mtime and filemode on directories.
1499 for tarinfo in directories:
1500 path = os.path.join(path, tarinfo.name)
1501 try:
1502 self.chown(tarinfo, path)
1503 self.utime(tarinfo, path)
1504 self.chmod(tarinfo, path)
1505 except ExtractError, e:
1506 if self.errorlevel > 1:
1507 raise
1508 else:
1509 self._dbg(1, "tarfile: %s" % e)
1510
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001511 def extract(self, member, path=""):
1512 """Extract a member from the archive to the current working directory,
1513 using its full name. Its file information is extracted as accurately
1514 as possible. `member' may be a filename or a TarInfo object. You can
1515 specify a different directory using `path'.
1516 """
1517 self._check("r")
1518
1519 if isinstance(member, TarInfo):
1520 tarinfo = member
1521 else:
1522 tarinfo = self.getmember(member)
1523
Neal Norwitza4f651a2004-07-20 22:07:44 +00001524 # Prepare the link target for makelink().
1525 if tarinfo.islnk():
1526 tarinfo._link_target = os.path.join(path, tarinfo.linkname)
1527
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001528 try:
1529 self._extract_member(tarinfo, os.path.join(path, tarinfo.name))
1530 except EnvironmentError, e:
1531 if self.errorlevel > 0:
1532 raise
1533 else:
1534 if e.filename is None:
1535 self._dbg(1, "tarfile: %s" % e.strerror)
1536 else:
1537 self._dbg(1, "tarfile: %s %r" % (e.strerror, e.filename))
1538 except ExtractError, e:
1539 if self.errorlevel > 1:
1540 raise
1541 else:
1542 self._dbg(1, "tarfile: %s" % e)
1543
1544 def extractfile(self, member):
1545 """Extract a member from the archive as a file object. `member' may be
1546 a filename or a TarInfo object. If `member' is a regular file, a
1547 file-like object is returned. If `member' is a link, a file-like
1548 object is constructed from the link's target. If `member' is none of
1549 the above, None is returned.
1550 The file-like object is read-only and provides the following
1551 methods: read(), readline(), readlines(), seek() and tell()
1552 """
1553 self._check("r")
1554
1555 if isinstance(member, TarInfo):
1556 tarinfo = member
1557 else:
1558 tarinfo = self.getmember(member)
1559
1560 if tarinfo.isreg():
1561 return self.fileobject(self, tarinfo)
1562
1563 elif tarinfo.type not in SUPPORTED_TYPES:
1564 # If a member's type is unknown, it is treated as a
1565 # regular file.
1566 return self.fileobject(self, tarinfo)
1567
1568 elif tarinfo.islnk() or tarinfo.issym():
1569 if isinstance(self.fileobj, _Stream):
1570 # A small but ugly workaround for the case that someone tries
1571 # to extract a (sym)link as a file-object from a non-seekable
1572 # stream of tar blocks.
Thomas Wouters477c8d52006-05-27 19:21:47 +00001573 raise StreamError("cannot extract (sym)link as file object")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001574 else:
Georg Brandl7eb4b7d2005-07-22 21:49:32 +00001575 # A (sym)link's file object is its target's file object.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001576 return self.extractfile(self._getmember(tarinfo.linkname,
1577 tarinfo))
1578 else:
1579 # If there's no data associated with the member (directory, chrdev,
1580 # blkdev, etc.), return None instead of a file object.
1581 return None
1582
1583 def _extract_member(self, tarinfo, targetpath):
1584 """Extract the TarInfo object tarinfo to a physical
1585 file called targetpath.
1586 """
1587 # Fetch the TarInfo object for the given name
1588 # and build the destination pathname, replacing
1589 # forward slashes to platform specific separators.
1590 if targetpath[-1:] == "/":
1591 targetpath = targetpath[:-1]
1592 targetpath = os.path.normpath(targetpath)
1593
1594 # Create all upper directories.
1595 upperdirs = os.path.dirname(targetpath)
1596 if upperdirs and not os.path.exists(upperdirs):
1597 ti = TarInfo()
1598 ti.name = upperdirs
1599 ti.type = DIRTYPE
1600 ti.mode = 0777
1601 ti.mtime = tarinfo.mtime
1602 ti.uid = tarinfo.uid
1603 ti.gid = tarinfo.gid
1604 ti.uname = tarinfo.uname
1605 ti.gname = tarinfo.gname
1606 try:
1607 self._extract_member(ti, ti.name)
1608 except:
1609 pass
1610
1611 if tarinfo.islnk() or tarinfo.issym():
1612 self._dbg(1, "%s -> %s" % (tarinfo.name, tarinfo.linkname))
1613 else:
1614 self._dbg(1, tarinfo.name)
1615
1616 if tarinfo.isreg():
1617 self.makefile(tarinfo, targetpath)
1618 elif tarinfo.isdir():
1619 self.makedir(tarinfo, targetpath)
1620 elif tarinfo.isfifo():
1621 self.makefifo(tarinfo, targetpath)
1622 elif tarinfo.ischr() or tarinfo.isblk():
1623 self.makedev(tarinfo, targetpath)
1624 elif tarinfo.islnk() or tarinfo.issym():
1625 self.makelink(tarinfo, targetpath)
1626 elif tarinfo.type not in SUPPORTED_TYPES:
1627 self.makeunknown(tarinfo, targetpath)
1628 else:
1629 self.makefile(tarinfo, targetpath)
1630
1631 self.chown(tarinfo, targetpath)
1632 if not tarinfo.issym():
1633 self.chmod(tarinfo, targetpath)
1634 self.utime(tarinfo, targetpath)
1635
1636 #--------------------------------------------------------------------------
1637 # Below are the different file methods. They are called via
1638 # _extract_member() when extract() is called. They can be replaced in a
1639 # subclass to implement other functionality.
1640
1641 def makedir(self, tarinfo, targetpath):
1642 """Make a directory called targetpath.
1643 """
1644 try:
1645 os.mkdir(targetpath)
1646 except EnvironmentError, e:
1647 if e.errno != errno.EEXIST:
1648 raise
1649
1650 def makefile(self, tarinfo, targetpath):
1651 """Make a file called targetpath.
1652 """
1653 source = self.extractfile(tarinfo)
Guido van Rossum8f78fe92006-08-24 04:03:53 +00001654 target = _open(targetpath, "wb")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001655 copyfileobj(source, target)
1656 source.close()
1657 target.close()
1658
1659 def makeunknown(self, tarinfo, targetpath):
1660 """Make a file from a TarInfo object with an unknown type
1661 at targetpath.
1662 """
1663 self.makefile(tarinfo, targetpath)
1664 self._dbg(1, "tarfile: Unknown file type %r, " \
1665 "extracted as regular file." % tarinfo.type)
1666
1667 def makefifo(self, tarinfo, targetpath):
1668 """Make a fifo called targetpath.
1669 """
1670 if hasattr(os, "mkfifo"):
1671 os.mkfifo(targetpath)
1672 else:
Thomas Wouters477c8d52006-05-27 19:21:47 +00001673 raise ExtractError("fifo not supported by system")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001674
1675 def makedev(self, tarinfo, targetpath):
1676 """Make a character or block device called targetpath.
1677 """
1678 if not hasattr(os, "mknod") or not hasattr(os, "makedev"):
Thomas Wouters477c8d52006-05-27 19:21:47 +00001679 raise ExtractError("special devices not supported by system")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001680
1681 mode = tarinfo.mode
1682 if tarinfo.isblk():
1683 mode |= stat.S_IFBLK
1684 else:
1685 mode |= stat.S_IFCHR
1686
1687 os.mknod(targetpath, mode,
1688 os.makedev(tarinfo.devmajor, tarinfo.devminor))
1689
1690 def makelink(self, tarinfo, targetpath):
1691 """Make a (symbolic) link called targetpath. If it cannot be created
1692 (platform limitation), we try to make a copy of the referenced file
1693 instead of a link.
1694 """
1695 linkpath = tarinfo.linkname
1696 try:
1697 if tarinfo.issym():
1698 os.symlink(linkpath, targetpath)
1699 else:
Neal Norwitza4f651a2004-07-20 22:07:44 +00001700 # See extract().
1701 os.link(tarinfo._link_target, targetpath)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001702 except AttributeError:
1703 if tarinfo.issym():
1704 linkpath = os.path.join(os.path.dirname(tarinfo.name),
1705 linkpath)
1706 linkpath = normpath(linkpath)
1707
1708 try:
1709 self._extract_member(self.getmember(linkpath), targetpath)
1710 except (EnvironmentError, KeyError), e:
1711 linkpath = os.path.normpath(linkpath)
1712 try:
1713 shutil.copy2(linkpath, targetpath)
1714 except EnvironmentError, e:
Thomas Wouters477c8d52006-05-27 19:21:47 +00001715 raise IOError("link could not be created")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001716
1717 def chown(self, tarinfo, targetpath):
1718 """Set owner of targetpath according to tarinfo.
1719 """
1720 if pwd and hasattr(os, "geteuid") and os.geteuid() == 0:
1721 # We have to be root to do so.
1722 try:
1723 g = grp.getgrnam(tarinfo.gname)[2]
1724 except KeyError:
1725 try:
1726 g = grp.getgrgid(tarinfo.gid)[2]
1727 except KeyError:
1728 g = os.getgid()
1729 try:
1730 u = pwd.getpwnam(tarinfo.uname)[2]
1731 except KeyError:
1732 try:
1733 u = pwd.getpwuid(tarinfo.uid)[2]
1734 except KeyError:
1735 u = os.getuid()
1736 try:
1737 if tarinfo.issym() and hasattr(os, "lchown"):
1738 os.lchown(targetpath, u, g)
1739 else:
Andrew MacIntyre7970d202003-02-19 12:51:34 +00001740 if sys.platform != "os2emx":
1741 os.chown(targetpath, u, g)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001742 except EnvironmentError, e:
Thomas Wouters477c8d52006-05-27 19:21:47 +00001743 raise ExtractError("could not change owner")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001744
1745 def chmod(self, tarinfo, targetpath):
1746 """Set file permissions of targetpath according to tarinfo.
1747 """
Jack Jansen834eff62003-03-07 12:47:06 +00001748 if hasattr(os, 'chmod'):
1749 try:
1750 os.chmod(targetpath, tarinfo.mode)
1751 except EnvironmentError, e:
Thomas Wouters477c8d52006-05-27 19:21:47 +00001752 raise ExtractError("could not change mode")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001753
1754 def utime(self, tarinfo, targetpath):
1755 """Set modification time of targetpath according to tarinfo.
1756 """
Jack Jansen834eff62003-03-07 12:47:06 +00001757 if not hasattr(os, 'utime'):
Tim Petersf9347782003-03-07 15:36:41 +00001758 return
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001759 if sys.platform == "win32" and tarinfo.isdir():
1760 # According to msdn.microsoft.com, it is an error (EACCES)
1761 # to use utime() on directories.
1762 return
1763 try:
1764 os.utime(targetpath, (tarinfo.mtime, tarinfo.mtime))
1765 except EnvironmentError, e:
Thomas Wouters477c8d52006-05-27 19:21:47 +00001766 raise ExtractError("could not change modification time")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001767
1768 #--------------------------------------------------------------------------
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001769 def next(self):
1770 """Return the next member of the archive as a TarInfo object, when
1771 TarFile is opened for reading. Return None if there is no more
1772 available.
1773 """
1774 self._check("ra")
1775 if self.firstmember is not None:
1776 m = self.firstmember
1777 self.firstmember = None
1778 return m
1779
1780 # Read the next block.
Andrew M. Kuchling864bba12004-07-10 22:02:11 +00001781 self.fileobj.seek(self.offset)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001782 while True:
1783 buf = self.fileobj.read(BLOCKSIZE)
1784 if not buf:
1785 return None
Thomas Wouters477c8d52006-05-27 19:21:47 +00001786
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001787 try:
1788 tarinfo = TarInfo.frombuf(buf)
Thomas Wouters477c8d52006-05-27 19:21:47 +00001789
Thomas Wouters477c8d52006-05-27 19:21:47 +00001790 # Set the TarInfo object's offset to the current position of the
1791 # TarFile and set self.offset to the position where the data blocks
1792 # should begin.
1793 tarinfo.offset = self.offset
1794 self.offset += BLOCKSIZE
1795
1796 tarinfo = self.proc_member(tarinfo)
1797
1798 except ValueError, e:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001799 if self.ignore_zeros:
Thomas Wouters477c8d52006-05-27 19:21:47 +00001800 self._dbg(2, "0x%X: empty or invalid block: %s" %
1801 (self.offset, e))
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001802 self.offset += BLOCKSIZE
1803 continue
1804 else:
Andrew M. Kuchling864bba12004-07-10 22:02:11 +00001805 if self.offset == 0:
Thomas Wouters477c8d52006-05-27 19:21:47 +00001806 raise ReadError("empty, unreadable or compressed "
1807 "file: %s" % e)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001808 return None
1809 break
1810
Thomas Wouters477c8d52006-05-27 19:21:47 +00001811 # Some old tar programs represent a directory as a regular
1812 # file with a trailing slash.
1813 if tarinfo.isreg() and tarinfo.name.endswith("/"):
1814 tarinfo.type = DIRTYPE
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001815
Thomas Wouters477c8d52006-05-27 19:21:47 +00001816 # Directory names should have a '/' at the end.
1817 if tarinfo.isdir():
1818 tarinfo.name += "/"
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001819
Martin v. Löwisf3c56112004-09-18 09:08:52 +00001820 self.members.append(tarinfo)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001821 return tarinfo
1822
1823 #--------------------------------------------------------------------------
Thomas Wouters477c8d52006-05-27 19:21:47 +00001824 # The following are methods that are called depending on the type of a
1825 # member. The entry point is proc_member() which is called with a TarInfo
1826 # object created from the header block from the current offset. The
1827 # proc_member() method can be overridden in a subclass to add custom
1828 # proc_*() methods. A proc_*() method MUST implement the following
1829 # operations:
1830 # 1. Set tarinfo.offset_data to the position where the data blocks begin,
1831 # if there is data that follows.
1832 # 2. Set self.offset to the position where the next member's header will
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001833 # begin.
Thomas Wouters477c8d52006-05-27 19:21:47 +00001834 # 3. Return tarinfo or another valid TarInfo object.
1835 def proc_member(self, tarinfo):
1836 """Choose the right processing method for tarinfo depending
1837 on its type and call it.
1838 """
1839 if tarinfo.type in (GNUTYPE_LONGNAME, GNUTYPE_LONGLINK):
1840 return self.proc_gnulong(tarinfo)
1841 elif tarinfo.type == GNUTYPE_SPARSE:
1842 return self.proc_sparse(tarinfo)
1843 else:
1844 return self.proc_builtin(tarinfo)
1845
1846 def proc_builtin(self, tarinfo):
1847 """Process a builtin type member or an unknown member
1848 which will be treated as a regular file.
1849 """
1850 tarinfo.offset_data = self.offset
1851 if tarinfo.isreg() or tarinfo.type not in SUPPORTED_TYPES:
1852 # Skip the following data blocks.
1853 self.offset += self._block(tarinfo.size)
1854 return tarinfo
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001855
1856 def proc_gnulong(self, tarinfo):
Thomas Wouters477c8d52006-05-27 19:21:47 +00001857 """Process the blocks that hold a GNU longname
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001858 or longlink member.
1859 """
1860 buf = ""
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001861 count = tarinfo.size
1862 while count > 0:
1863 block = self.fileobj.read(BLOCKSIZE)
1864 buf += block
1865 self.offset += BLOCKSIZE
1866 count -= BLOCKSIZE
1867
Thomas Wouters477c8d52006-05-27 19:21:47 +00001868 # Fetch the next header and process it.
1869 b = self.fileobj.read(BLOCKSIZE)
1870 t = TarInfo.frombuf(b)
1871 t.offset = self.offset
1872 self.offset += BLOCKSIZE
1873 next = self.proc_member(t)
Andrew M. Kuchling864bba12004-07-10 22:02:11 +00001874
Thomas Wouters477c8d52006-05-27 19:21:47 +00001875 # Patch the TarInfo object from the next header with
1876 # the longname information.
Andrew M. Kuchling864bba12004-07-10 22:02:11 +00001877 next.offset = tarinfo.offset
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001878 if tarinfo.type == GNUTYPE_LONGNAME:
Thomas Wouters477c8d52006-05-27 19:21:47 +00001879 next.name = buf.rstrip(NUL)
Andrew M. Kuchling864bba12004-07-10 22:02:11 +00001880 elif tarinfo.type == GNUTYPE_LONGLINK:
Thomas Wouters477c8d52006-05-27 19:21:47 +00001881 next.linkname = buf.rstrip(NUL)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001882
Andrew M. Kuchling864bba12004-07-10 22:02:11 +00001883 return next
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001884
1885 def proc_sparse(self, tarinfo):
Thomas Wouters477c8d52006-05-27 19:21:47 +00001886 """Process a GNU sparse header plus extra headers.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001887 """
Thomas Wouters477c8d52006-05-27 19:21:47 +00001888 buf = tarinfo.buf
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001889 sp = _ringbuffer()
1890 pos = 386
1891 lastpos = 0L
1892 realpos = 0L
1893 # There are 4 possible sparse structs in the
1894 # first header.
1895 for i in xrange(4):
1896 try:
Thomas Wouters477c8d52006-05-27 19:21:47 +00001897 offset = nti(buf[pos:pos + 12])
1898 numbytes = nti(buf[pos + 12:pos + 24])
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001899 except ValueError:
1900 break
1901 if offset > lastpos:
1902 sp.append(_hole(lastpos, offset - lastpos))
1903 sp.append(_data(offset, numbytes, realpos))
1904 realpos += numbytes
1905 lastpos = offset + numbytes
1906 pos += 24
1907
1908 isextended = ord(buf[482])
Thomas Wouters477c8d52006-05-27 19:21:47 +00001909 origsize = nti(buf[483:495])
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001910
1911 # If the isextended flag is given,
1912 # there are extra headers to process.
1913 while isextended == 1:
1914 buf = self.fileobj.read(BLOCKSIZE)
1915 self.offset += BLOCKSIZE
1916 pos = 0
1917 for i in xrange(21):
1918 try:
Thomas Wouters477c8d52006-05-27 19:21:47 +00001919 offset = nti(buf[pos:pos + 12])
1920 numbytes = nti(buf[pos + 12:pos + 24])
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001921 except ValueError:
1922 break
1923 if offset > lastpos:
1924 sp.append(_hole(lastpos, offset - lastpos))
1925 sp.append(_data(offset, numbytes, realpos))
1926 realpos += numbytes
1927 lastpos = offset + numbytes
1928 pos += 24
1929 isextended = ord(buf[504])
1930
1931 if lastpos < origsize:
1932 sp.append(_hole(lastpos, origsize - lastpos))
1933
1934 tarinfo.sparse = sp
1935
1936 tarinfo.offset_data = self.offset
1937 self.offset += self._block(tarinfo.size)
1938 tarinfo.size = origsize
Andrew M. Kuchling864bba12004-07-10 22:02:11 +00001939
Thomas Wouters477c8d52006-05-27 19:21:47 +00001940 return tarinfo
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001941
1942 #--------------------------------------------------------------------------
1943 # Little helper methods:
1944
1945 def _block(self, count):
1946 """Round up a byte count by BLOCKSIZE and return it,
1947 e.g. _block(834) => 1024.
1948 """
1949 blocks, remainder = divmod(count, BLOCKSIZE)
1950 if remainder:
1951 blocks += 1
1952 return blocks * BLOCKSIZE
1953
1954 def _getmember(self, name, tarinfo=None):
1955 """Find an archive member by name from bottom to top.
1956 If tarinfo is given, it is used as the starting point.
1957 """
Martin v. Löwisf3c56112004-09-18 09:08:52 +00001958 # Ensure that all members have been loaded.
1959 members = self.getmembers()
1960
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001961 if tarinfo is None:
Martin v. Löwisf3c56112004-09-18 09:08:52 +00001962 end = len(members)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001963 else:
Martin v. Löwisf3c56112004-09-18 09:08:52 +00001964 end = members.index(tarinfo)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001965
1966 for i in xrange(end - 1, -1, -1):
Martin v. Löwisf3c56112004-09-18 09:08:52 +00001967 if name == members[i].name:
1968 return members[i]
Andrew M. Kuchling864bba12004-07-10 22:02:11 +00001969
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001970 def _load(self):
1971 """Read through the entire archive file and look for readable
1972 members.
1973 """
1974 while True:
1975 tarinfo = self.next()
1976 if tarinfo is None:
1977 break
1978 self._loaded = True
1979
1980 def _check(self, mode=None):
1981 """Check if TarFile is still open, and if the operation's mode
1982 corresponds to TarFile's mode.
1983 """
1984 if self.closed:
Thomas Wouters477c8d52006-05-27 19:21:47 +00001985 raise IOError("%s is closed" % self.__class__.__name__)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001986 if mode is not None and self._mode not in mode:
Thomas Wouters477c8d52006-05-27 19:21:47 +00001987 raise IOError("bad operation for mode %r" % self._mode)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001988
1989 def __iter__(self):
1990 """Provide an iterator object.
1991 """
1992 if self._loaded:
1993 return iter(self.members)
1994 else:
1995 return TarIter(self)
1996
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001997 def _dbg(self, level, msg):
1998 """Write debugging output to sys.stderr.
1999 """
2000 if level <= self.debug:
2001 print >> sys.stderr, msg
2002# class TarFile
2003
2004class TarIter:
2005 """Iterator Class.
2006
2007 for tarinfo in TarFile(...):
2008 suite...
2009 """
2010
2011 def __init__(self, tarfile):
2012 """Construct a TarIter object.
2013 """
2014 self.tarfile = tarfile
Martin v. Löwis637431b2005-03-03 23:12:42 +00002015 self.index = 0
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002016 def __iter__(self):
2017 """Return iterator object.
2018 """
2019 return self
2020 def next(self):
2021 """Return the next item using TarFile's next() method.
2022 When all members have been read, set TarFile as _loaded.
2023 """
Martin v. Löwis637431b2005-03-03 23:12:42 +00002024 # Fix for SF #1100429: Under rare circumstances it can
2025 # happen that getmembers() is called during iteration,
2026 # which will cause TarIter to stop prematurely.
2027 if not self.tarfile._loaded:
2028 tarinfo = self.tarfile.next()
2029 if not tarinfo:
2030 self.tarfile._loaded = True
2031 raise StopIteration
2032 else:
2033 try:
2034 tarinfo = self.tarfile.members[self.index]
2035 except IndexError:
2036 raise StopIteration
2037 self.index += 1
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002038 return tarinfo
2039
2040# Helper classes for sparse file support
2041class _section:
2042 """Base class for _data and _hole.
2043 """
2044 def __init__(self, offset, size):
2045 self.offset = offset
2046 self.size = size
2047 def __contains__(self, offset):
2048 return self.offset <= offset < self.offset + self.size
2049
2050class _data(_section):
2051 """Represent a data section in a sparse file.
2052 """
2053 def __init__(self, offset, size, realpos):
2054 _section.__init__(self, offset, size)
2055 self.realpos = realpos
2056
2057class _hole(_section):
2058 """Represent a hole section in a sparse file.
2059 """
2060 pass
2061
2062class _ringbuffer(list):
2063 """Ringbuffer class which increases performance
2064 over a regular list.
2065 """
2066 def __init__(self):
2067 self.idx = 0
2068 def find(self, offset):
2069 idx = self.idx
2070 while True:
2071 item = self[idx]
2072 if offset in item:
2073 break
2074 idx += 1
2075 if idx == len(self):
2076 idx = 0
2077 if idx == self.idx:
2078 # End of File
2079 return None
2080 self.idx = idx
2081 return item
2082
2083#---------------------------------------------
2084# zipfile compatible TarFile class
2085#---------------------------------------------
2086TAR_PLAIN = 0 # zipfile.ZIP_STORED
2087TAR_GZIPPED = 8 # zipfile.ZIP_DEFLATED
2088class TarFileCompat:
2089 """TarFile class compatible with standard module zipfile's
2090 ZipFile class.
2091 """
2092 def __init__(self, file, mode="r", compression=TAR_PLAIN):
2093 if compression == TAR_PLAIN:
2094 self.tarfile = TarFile.taropen(file, mode)
2095 elif compression == TAR_GZIPPED:
2096 self.tarfile = TarFile.gzopen(file, mode)
2097 else:
Thomas Wouters477c8d52006-05-27 19:21:47 +00002098 raise ValueError("unknown compression constant")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002099 if mode[0:1] == "r":
2100 members = self.tarfile.getmembers()
Raymond Hettingera1d09e22005-09-11 16:34:05 +00002101 for m in members:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002102 m.filename = m.name
2103 m.file_size = m.size
2104 m.date_time = time.gmtime(m.mtime)[:6]
2105 def namelist(self):
2106 return map(lambda m: m.name, self.infolist())
2107 def infolist(self):
2108 return filter(lambda m: m.type in REGULAR_TYPES,
2109 self.tarfile.getmembers())
2110 def printdir(self):
2111 self.tarfile.list()
2112 def testzip(self):
2113 return
2114 def getinfo(self, name):
2115 return self.tarfile.getmember(name)
2116 def read(self, name):
2117 return self.tarfile.extractfile(self.tarfile.getmember(name)).read()
2118 def write(self, filename, arcname=None, compress_type=None):
2119 self.tarfile.add(filename, arcname)
2120 def writestr(self, zinfo, bytes):
Raymond Hettingera6172712004-12-31 19:15:26 +00002121 try:
2122 from cStringIO import StringIO
2123 except ImportError:
2124 from StringIO import StringIO
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002125 import calendar
2126 zinfo.name = zinfo.filename
2127 zinfo.size = zinfo.file_size
2128 zinfo.mtime = calendar.timegm(zinfo.date_time)
Raymond Hettingera6172712004-12-31 19:15:26 +00002129 self.tarfile.addfile(zinfo, StringIO(bytes))
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002130 def close(self):
2131 self.tarfile.close()
2132#class TarFileCompat
2133
2134#--------------------
2135# exported functions
2136#--------------------
2137def is_tarfile(name):
2138 """Return True if name points to a tar archive that we
2139 are able to handle, else return False.
2140 """
2141 try:
2142 t = open(name)
2143 t.close()
2144 return True
2145 except TarError:
2146 return False
2147
2148open = TarFile.open