blob: 46031e121f30fd3cd24187ae8c752dbba42cd4c3 [file] [log] [blame]
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001#!/usr/bin/env python
2# -*- coding: iso-8859-1 -*-
3#-------------------------------------------------------------------
4# tarfile.py
5#-------------------------------------------------------------------
6# Copyright (C) 2002 Lars Gustäbel <lars@gustaebel.de>
7# All rights reserved.
8#
9# Permission is hereby granted, free of charge, to any person
10# obtaining a copy of this software and associated documentation
11# files (the "Software"), to deal in the Software without
12# restriction, including without limitation the rights to use,
13# copy, modify, merge, publish, distribute, sublicense, and/or sell
14# copies of the Software, and to permit persons to whom the
15# Software is furnished to do so, subject to the following
16# conditions:
17#
18# The above copyright notice and this permission notice shall be
19# included in all copies or substantial portions of the Software.
20#
21# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
22# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
23# OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
24# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
25# HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
26# WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
27# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
28# OTHER DEALINGS IN THE SOFTWARE.
29#
30"""Read from and write to tar format archives.
31"""
32
33__version__ = "$Revision$"
34# $Source$
35
Georg Brandl38c6a222006-05-10 16:26:03 +000036version = "0.8.0"
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +000037__author__ = "Lars Gustäbel (lars@gustaebel.de)"
38__date__ = "$Date$"
39__cvsid__ = "$Id$"
40__credits__ = "Gustavo Niemeyer, Niels Gustäbel, Richard Townsend."
41
42#---------
43# Imports
44#---------
45import sys
46import os
47import shutil
48import stat
49import errno
50import time
51import struct
Georg Brandl3354f282006-10-29 09:16:12 +000052import copy
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +000053
Jack Jansencfc49022003-03-07 13:37:32 +000054if sys.platform == 'mac':
55 # This module needs work for MacOS9, especially in the area of pathname
56 # handling. In many places it is assumed a simple substitution of / by the
57 # local os.path.sep is good enough to convert pathnames, but this does not
58 # work with the mac rooted:path:name versus :nonrooted:path:name syntax
59 raise ImportError, "tarfile does not work for platform==mac"
60
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +000061try:
62 import grp, pwd
63except ImportError:
64 grp = pwd = None
65
66# from tarfile import *
67__all__ = ["TarFile", "TarInfo", "is_tarfile", "TarError"]
68
69#---------------------------------------------------------
70# tar constants
71#---------------------------------------------------------
72NUL = "\0" # the null character
73BLOCKSIZE = 512 # length of processing blocks
74RECORDSIZE = BLOCKSIZE * 20 # length of records
75MAGIC = "ustar" # magic tar string
76VERSION = "00" # version number
77
78LENGTH_NAME = 100 # maximum length of a filename
79LENGTH_LINK = 100 # maximum length of a linkname
80LENGTH_PREFIX = 155 # maximum length of the prefix field
81MAXSIZE_MEMBER = 077777777777L # maximum size of a file (11 octal digits)
82
83REGTYPE = "0" # regular file
84AREGTYPE = "\0" # regular file
85LNKTYPE = "1" # link (inside tarfile)
86SYMTYPE = "2" # symbolic link
87CHRTYPE = "3" # character special device
88BLKTYPE = "4" # block special device
89DIRTYPE = "5" # directory
90FIFOTYPE = "6" # fifo special device
91CONTTYPE = "7" # contiguous file
92
93GNUTYPE_LONGNAME = "L" # GNU tar extension for longnames
94GNUTYPE_LONGLINK = "K" # GNU tar extension for longlink
95GNUTYPE_SPARSE = "S" # GNU tar extension for sparse file
96
97#---------------------------------------------------------
98# tarfile constants
99#---------------------------------------------------------
100SUPPORTED_TYPES = (REGTYPE, AREGTYPE, LNKTYPE, # file types that tarfile
101 SYMTYPE, DIRTYPE, FIFOTYPE, # can cope with.
102 CONTTYPE, CHRTYPE, BLKTYPE,
103 GNUTYPE_LONGNAME, GNUTYPE_LONGLINK,
104 GNUTYPE_SPARSE)
105
106REGULAR_TYPES = (REGTYPE, AREGTYPE, # file types that somehow
107 CONTTYPE, GNUTYPE_SPARSE) # represent regular files
108
109#---------------------------------------------------------
110# Bits used in the mode field, values in octal.
111#---------------------------------------------------------
112S_IFLNK = 0120000 # symbolic link
113S_IFREG = 0100000 # regular file
114S_IFBLK = 0060000 # block device
115S_IFDIR = 0040000 # directory
116S_IFCHR = 0020000 # character device
117S_IFIFO = 0010000 # fifo
118
119TSUID = 04000 # set UID on execution
120TSGID = 02000 # set GID on execution
121TSVTX = 01000 # reserved
122
123TUREAD = 0400 # read by owner
124TUWRITE = 0200 # write by owner
125TUEXEC = 0100 # execute/search by owner
126TGREAD = 0040 # read by group
127TGWRITE = 0020 # write by group
128TGEXEC = 0010 # execute/search by group
129TOREAD = 0004 # read by other
130TOWRITE = 0002 # write by other
131TOEXEC = 0001 # execute/search by other
132
133#---------------------------------------------------------
134# Some useful functions
135#---------------------------------------------------------
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000136
Georg Brandl38c6a222006-05-10 16:26:03 +0000137def stn(s, length):
138 """Convert a python string to a null-terminated string buffer.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000139 """
Georg Brandla32e0a02006-10-24 16:54:16 +0000140 return s[:length] + (length - len(s)) * NUL
Georg Brandl38c6a222006-05-10 16:26:03 +0000141
142def nti(s):
143 """Convert a number field to a python number.
144 """
145 # There are two possible encodings for a number field, see
146 # itn() below.
147 if s[0] != chr(0200):
Georg Brandlded1c4d2006-12-20 11:55:16 +0000148 try:
149 n = int(s.rstrip(NUL + " ") or "0", 8)
150 except ValueError:
151 raise HeaderError("invalid header")
Georg Brandl38c6a222006-05-10 16:26:03 +0000152 else:
153 n = 0L
154 for i in xrange(len(s) - 1):
155 n <<= 8
156 n += ord(s[i + 1])
157 return n
158
159def itn(n, digits=8, posix=False):
160 """Convert a python number to a number field.
161 """
162 # POSIX 1003.1-1988 requires numbers to be encoded as a string of
163 # octal digits followed by a null-byte, this allows values up to
164 # (8**(digits-1))-1. GNU tar allows storing numbers greater than
165 # that if necessary. A leading 0200 byte indicates this particular
166 # encoding, the following digits-1 bytes are a big-endian
167 # representation. This allows values up to (256**(digits-1))-1.
168 if 0 <= n < 8 ** (digits - 1):
169 s = "%0*o" % (digits - 1, n) + NUL
170 else:
171 if posix:
Georg Brandle4751e32006-05-18 06:11:19 +0000172 raise ValueError("overflow in number field")
Georg Brandl38c6a222006-05-10 16:26:03 +0000173
174 if n < 0:
175 # XXX We mimic GNU tar's behaviour with negative numbers,
176 # this could raise OverflowError.
177 n = struct.unpack("L", struct.pack("l", n))[0]
178
179 s = ""
180 for i in xrange(digits - 1):
181 s = chr(n & 0377) + s
182 n >>= 8
183 s = chr(0200) + s
184 return s
185
186def calc_chksums(buf):
187 """Calculate the checksum for a member's header by summing up all
188 characters except for the chksum field which is treated as if
189 it was filled with spaces. According to the GNU tar sources,
190 some tars (Sun and NeXT) calculate chksum with signed char,
191 which will be different if there are chars in the buffer with
192 the high bit set. So we calculate two checksums, unsigned and
193 signed.
194 """
195 unsigned_chksum = 256 + sum(struct.unpack("148B", buf[:148]) + struct.unpack("356B", buf[156:512]))
196 signed_chksum = 256 + sum(struct.unpack("148b", buf[:148]) + struct.unpack("356b", buf[156:512]))
197 return unsigned_chksum, signed_chksum
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000198
199def copyfileobj(src, dst, length=None):
200 """Copy length bytes from fileobj src to fileobj dst.
201 If length is None, copy the entire content.
202 """
203 if length == 0:
204 return
205 if length is None:
206 shutil.copyfileobj(src, dst)
207 return
208
209 BUFSIZE = 16 * 1024
210 blocks, remainder = divmod(length, BUFSIZE)
211 for b in xrange(blocks):
212 buf = src.read(BUFSIZE)
213 if len(buf) < BUFSIZE:
Georg Brandle4751e32006-05-18 06:11:19 +0000214 raise IOError("end of file reached")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000215 dst.write(buf)
216
217 if remainder != 0:
218 buf = src.read(remainder)
219 if len(buf) < remainder:
Georg Brandle4751e32006-05-18 06:11:19 +0000220 raise IOError("end of file reached")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000221 dst.write(buf)
222 return
223
224filemode_table = (
Andrew M. Kuchling8bc462f2004-10-20 11:48:42 +0000225 ((S_IFLNK, "l"),
226 (S_IFREG, "-"),
227 (S_IFBLK, "b"),
228 (S_IFDIR, "d"),
229 (S_IFCHR, "c"),
230 (S_IFIFO, "p")),
231
232 ((TUREAD, "r"),),
233 ((TUWRITE, "w"),),
234 ((TUEXEC|TSUID, "s"),
235 (TSUID, "S"),
236 (TUEXEC, "x")),
237
238 ((TGREAD, "r"),),
239 ((TGWRITE, "w"),),
240 ((TGEXEC|TSGID, "s"),
241 (TSGID, "S"),
242 (TGEXEC, "x")),
243
244 ((TOREAD, "r"),),
245 ((TOWRITE, "w"),),
246 ((TOEXEC|TSVTX, "t"),
247 (TSVTX, "T"),
248 (TOEXEC, "x"))
249)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000250
251def filemode(mode):
252 """Convert a file's mode to a string of the form
253 -rwxrwxrwx.
254 Used by TarFile.list()
255 """
Andrew M. Kuchling8bc462f2004-10-20 11:48:42 +0000256 perm = []
257 for table in filemode_table:
258 for bit, char in table:
259 if mode & bit == bit:
260 perm.append(char)
261 break
262 else:
263 perm.append("-")
264 return "".join(perm)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000265
266if os.sep != "/":
267 normpath = lambda path: os.path.normpath(path).replace(os.sep, "/")
268else:
269 normpath = os.path.normpath
270
271class TarError(Exception):
272 """Base exception."""
273 pass
274class ExtractError(TarError):
275 """General exception for extract errors."""
276 pass
277class ReadError(TarError):
278 """Exception for unreadble tar archives."""
279 pass
280class CompressionError(TarError):
281 """Exception for unavailable compression methods."""
282 pass
283class StreamError(TarError):
284 """Exception for unsupported operations on stream-like TarFiles."""
285 pass
Georg Brandlebbeed72006-12-19 22:06:46 +0000286class HeaderError(TarError):
287 """Exception for invalid headers."""
288 pass
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000289
290#---------------------------
291# internal stream interface
292#---------------------------
293class _LowLevelFile:
294 """Low-level file object. Supports reading and writing.
295 It is used instead of a regular file object for streaming
296 access.
297 """
298
299 def __init__(self, name, mode):
300 mode = {
301 "r": os.O_RDONLY,
302 "w": os.O_WRONLY | os.O_CREAT | os.O_TRUNC,
303 }[mode]
304 if hasattr(os, "O_BINARY"):
305 mode |= os.O_BINARY
306 self.fd = os.open(name, mode)
307
308 def close(self):
309 os.close(self.fd)
310
311 def read(self, size):
312 return os.read(self.fd, size)
313
314 def write(self, s):
315 os.write(self.fd, s)
316
317class _Stream:
318 """Class that serves as an adapter between TarFile and
319 a stream-like object. The stream-like object only
320 needs to have a read() or write() method and is accessed
321 blockwise. Use of gzip or bzip2 compression is possible.
322 A stream-like object could be for example: sys.stdin,
323 sys.stdout, a socket, a tape device etc.
324
325 _Stream is intended to be used only internally.
326 """
327
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000328 def __init__(self, name, mode, comptype, fileobj, bufsize):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000329 """Construct a _Stream object.
330 """
331 self._extfileobj = True
332 if fileobj is None:
333 fileobj = _LowLevelFile(name, mode)
334 self._extfileobj = False
335
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000336 if comptype == '*':
337 # Enable transparent compression detection for the
338 # stream interface
339 fileobj = _StreamProxy(fileobj)
340 comptype = fileobj.getcomptype()
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000341
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000342 self.name = name or ""
343 self.mode = mode
344 self.comptype = comptype
345 self.fileobj = fileobj
346 self.bufsize = bufsize
347 self.buf = ""
348 self.pos = 0L
349 self.closed = False
350
351 if comptype == "gz":
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000352 try:
353 import zlib
354 except ImportError:
Georg Brandle4751e32006-05-18 06:11:19 +0000355 raise CompressionError("zlib module is not available")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000356 self.zlib = zlib
357 self.crc = zlib.crc32("")
358 if mode == "r":
359 self._init_read_gz()
360 else:
361 self._init_write_gz()
362
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000363 if comptype == "bz2":
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000364 try:
365 import bz2
366 except ImportError:
Georg Brandle4751e32006-05-18 06:11:19 +0000367 raise CompressionError("bz2 module is not available")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000368 if mode == "r":
369 self.dbuf = ""
370 self.cmp = bz2.BZ2Decompressor()
371 else:
372 self.cmp = bz2.BZ2Compressor()
373
374 def __del__(self):
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000375 if hasattr(self, "closed") and not self.closed:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000376 self.close()
377
378 def _init_write_gz(self):
379 """Initialize for writing with gzip compression.
380 """
381 self.cmp = self.zlib.compressobj(9, self.zlib.DEFLATED,
382 -self.zlib.MAX_WBITS,
383 self.zlib.DEF_MEM_LEVEL,
384 0)
385 timestamp = struct.pack("<L", long(time.time()))
386 self.__write("\037\213\010\010%s\002\377" % timestamp)
387 if self.name.endswith(".gz"):
388 self.name = self.name[:-3]
389 self.__write(self.name + NUL)
390
391 def write(self, s):
392 """Write string s to the stream.
393 """
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000394 if self.comptype == "gz":
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000395 self.crc = self.zlib.crc32(s, self.crc)
396 self.pos += len(s)
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000397 if self.comptype != "tar":
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000398 s = self.cmp.compress(s)
399 self.__write(s)
400
401 def __write(self, s):
402 """Write string s to the stream if a whole new block
403 is ready to be written.
404 """
405 self.buf += s
406 while len(self.buf) > self.bufsize:
407 self.fileobj.write(self.buf[:self.bufsize])
408 self.buf = self.buf[self.bufsize:]
409
410 def close(self):
411 """Close the _Stream object. No operation should be
412 done on it afterwards.
413 """
414 if self.closed:
415 return
416
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000417 if self.mode == "w" and self.comptype != "tar":
Martin v. Löwisc234a522004-08-22 21:28:33 +0000418 self.buf += self.cmp.flush()
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000419
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000420 if self.mode == "w" and self.buf:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000421 self.fileobj.write(self.buf)
422 self.buf = ""
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000423 if self.comptype == "gz":
Tim Petersa05f6e22006-08-02 05:20:08 +0000424 # The native zlib crc is an unsigned 32-bit integer, but
425 # the Python wrapper implicitly casts that to a signed C
426 # long. So, on a 32-bit box self.crc may "look negative",
427 # while the same crc on a 64-bit box may "look positive".
428 # To avoid irksome warnings from the `struct` module, force
429 # it to look positive on all boxes.
430 self.fileobj.write(struct.pack("<L", self.crc & 0xffffffffL))
Andrew M. Kuchling10a44492003-10-24 17:38:34 +0000431 self.fileobj.write(struct.pack("<L", self.pos & 0xffffFFFFL))
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000432
433 if not self._extfileobj:
434 self.fileobj.close()
435
436 self.closed = True
437
438 def _init_read_gz(self):
439 """Initialize for reading a gzip compressed fileobj.
440 """
441 self.cmp = self.zlib.decompressobj(-self.zlib.MAX_WBITS)
442 self.dbuf = ""
443
444 # taken from gzip.GzipFile with some alterations
445 if self.__read(2) != "\037\213":
Georg Brandle4751e32006-05-18 06:11:19 +0000446 raise ReadError("not a gzip file")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000447 if self.__read(1) != "\010":
Georg Brandle4751e32006-05-18 06:11:19 +0000448 raise CompressionError("unsupported compression method")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000449
450 flag = ord(self.__read(1))
451 self.__read(6)
452
453 if flag & 4:
454 xlen = ord(self.__read(1)) + 256 * ord(self.__read(1))
455 self.read(xlen)
456 if flag & 8:
457 while True:
458 s = self.__read(1)
459 if not s or s == NUL:
460 break
461 if flag & 16:
462 while True:
463 s = self.__read(1)
464 if not s or s == NUL:
465 break
466 if flag & 2:
467 self.__read(2)
468
469 def tell(self):
470 """Return the stream's file pointer position.
471 """
472 return self.pos
473
474 def seek(self, pos=0):
475 """Set the stream's file pointer to pos. Negative seeking
476 is forbidden.
477 """
478 if pos - self.pos >= 0:
479 blocks, remainder = divmod(pos - self.pos, self.bufsize)
480 for i in xrange(blocks):
481 self.read(self.bufsize)
482 self.read(remainder)
483 else:
Georg Brandle4751e32006-05-18 06:11:19 +0000484 raise StreamError("seeking backwards is not allowed")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000485 return self.pos
486
487 def read(self, size=None):
488 """Return the next size number of bytes from the stream.
489 If size is not defined, return all bytes of the stream
490 up to EOF.
491 """
492 if size is None:
493 t = []
494 while True:
495 buf = self._read(self.bufsize)
496 if not buf:
497 break
498 t.append(buf)
499 buf = "".join(t)
500 else:
501 buf = self._read(size)
502 self.pos += len(buf)
503 return buf
504
505 def _read(self, size):
506 """Return size bytes from the stream.
507 """
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000508 if self.comptype == "tar":
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000509 return self.__read(size)
510
511 c = len(self.dbuf)
512 t = [self.dbuf]
513 while c < size:
514 buf = self.__read(self.bufsize)
515 if not buf:
516 break
517 buf = self.cmp.decompress(buf)
518 t.append(buf)
519 c += len(buf)
520 t = "".join(t)
521 self.dbuf = t[size:]
522 return t[:size]
523
524 def __read(self, size):
525 """Return size bytes from stream. If internal buffer is empty,
526 read another block from the stream.
527 """
528 c = len(self.buf)
529 t = [self.buf]
530 while c < size:
531 buf = self.fileobj.read(self.bufsize)
532 if not buf:
533 break
534 t.append(buf)
535 c += len(buf)
536 t = "".join(t)
537 self.buf = t[size:]
538 return t[:size]
539# class _Stream
540
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000541class _StreamProxy(object):
542 """Small proxy class that enables transparent compression
543 detection for the Stream interface (mode 'r|*').
544 """
545
546 def __init__(self, fileobj):
547 self.fileobj = fileobj
548 self.buf = self.fileobj.read(BLOCKSIZE)
549
550 def read(self, size):
551 self.read = self.fileobj.read
552 return self.buf
553
554 def getcomptype(self):
555 if self.buf.startswith("\037\213\010"):
556 return "gz"
557 if self.buf.startswith("BZh91"):
558 return "bz2"
559 return "tar"
560
561 def close(self):
562 self.fileobj.close()
563# class StreamProxy
564
Georg Brandl49c8f4c2006-05-15 19:30:35 +0000565class _BZ2Proxy(object):
566 """Small proxy class that enables external file object
567 support for "r:bz2" and "w:bz2" modes. This is actually
568 a workaround for a limitation in bz2 module's BZ2File
569 class which (unlike gzip.GzipFile) has no support for
570 a file object argument.
571 """
572
573 blocksize = 16 * 1024
574
575 def __init__(self, fileobj, mode):
576 self.fileobj = fileobj
577 self.mode = mode
578 self.init()
579
580 def init(self):
581 import bz2
582 self.pos = 0
583 if self.mode == "r":
584 self.bz2obj = bz2.BZ2Decompressor()
585 self.fileobj.seek(0)
586 self.buf = ""
587 else:
588 self.bz2obj = bz2.BZ2Compressor()
589
590 def read(self, size):
591 b = [self.buf]
592 x = len(self.buf)
593 while x < size:
594 try:
595 raw = self.fileobj.read(self.blocksize)
596 data = self.bz2obj.decompress(raw)
597 b.append(data)
598 except EOFError:
599 break
600 x += len(data)
601 self.buf = "".join(b)
602
603 buf = self.buf[:size]
604 self.buf = self.buf[size:]
605 self.pos += len(buf)
606 return buf
607
608 def seek(self, pos):
609 if pos < self.pos:
610 self.init()
611 self.read(pos - self.pos)
612
613 def tell(self):
614 return self.pos
615
616 def write(self, data):
617 self.pos += len(data)
618 raw = self.bz2obj.compress(data)
619 self.fileobj.write(raw)
620
621 def close(self):
622 if self.mode == "w":
623 raw = self.bz2obj.flush()
624 self.fileobj.write(raw)
Georg Brandle8953182006-05-27 14:02:03 +0000625 self.fileobj.close()
Georg Brandl49c8f4c2006-05-15 19:30:35 +0000626# class _BZ2Proxy
627
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000628#------------------------
629# Extraction file object
630#------------------------
Lars Gustäbel6baa5022006-12-23 16:40:13 +0000631class _FileInFile(object):
632 """A thin wrapper around an existing file object that
633 provides a part of its data as an individual file
634 object.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000635 """
636
Lars Gustäbel6baa5022006-12-23 16:40:13 +0000637 def __init__(self, fileobj, offset, size, sparse=None):
638 self.fileobj = fileobj
639 self.offset = offset
640 self.size = size
641 self.sparse = sparse
642 self.position = 0
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000643
Lars Gustäbel6baa5022006-12-23 16:40:13 +0000644 def tell(self):
645 """Return the current file position.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000646 """
Lars Gustäbel6baa5022006-12-23 16:40:13 +0000647 return self.position
648
649 def seek(self, position):
650 """Seek to a position in the file.
651 """
652 self.position = position
653
654 def read(self, size=None):
655 """Read data from the file.
656 """
657 if size is None:
658 size = self.size - self.position
659 else:
660 size = min(size, self.size - self.position)
661
662 if self.sparse is None:
663 return self.readnormal(size)
664 else:
665 return self.readsparse(size)
666
667 def readnormal(self, size):
668 """Read operation for regular files.
669 """
670 self.fileobj.seek(self.offset + self.position)
671 self.position += size
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000672 return self.fileobj.read(size)
673
Lars Gustäbel6baa5022006-12-23 16:40:13 +0000674 def readsparse(self, size):
675 """Read operation for sparse files.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000676 """
Lars Gustäbel6baa5022006-12-23 16:40:13 +0000677 data = []
678 while size > 0:
679 buf = self.readsparsesection(size)
680 if not buf:
681 break
682 size -= len(buf)
683 data.append(buf)
684 return "".join(data)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000685
Lars Gustäbel6baa5022006-12-23 16:40:13 +0000686 def readsparsesection(self, size):
687 """Read a single section of a sparse file.
688 """
689 section = self.sparse.find(self.position)
690
691 if section is None:
692 return ""
693
694 size = min(size, section.offset + section.size - self.position)
695
696 if isinstance(section, _data):
697 realpos = section.realpos + self.position - section.offset
698 self.fileobj.seek(self.offset + realpos)
699 self.position += size
700 return self.fileobj.read(size)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000701 else:
Lars Gustäbel6baa5022006-12-23 16:40:13 +0000702 self.position += size
703 return NUL * size
704#class _FileInFile
705
706
707class ExFileObject(object):
708 """File-like object for reading an archive member.
709 Is returned by TarFile.extractfile().
710 """
711 blocksize = 1024
712
713 def __init__(self, tarfile, tarinfo):
714 self.fileobj = _FileInFile(tarfile.fileobj,
715 tarinfo.offset_data,
716 tarinfo.size,
717 getattr(tarinfo, "sparse", None))
718 self.name = tarinfo.name
719 self.mode = "r"
720 self.closed = False
721 self.size = tarinfo.size
722
723 self.position = 0
724 self.buffer = ""
725
726 def read(self, size=None):
727 """Read at most size bytes from the file. If size is not
728 present or None, read all data until EOF is reached.
729 """
730 if self.closed:
731 raise ValueError("I/O operation on closed file")
732
733 buf = ""
734 if self.buffer:
735 if size is None:
736 buf = self.buffer
737 self.buffer = ""
738 else:
739 buf = self.buffer[:size]
740 self.buffer = self.buffer[size:]
741
742 if size is None:
743 buf += self.fileobj.read()
744 else:
745 buf += self.fileobj.read(size - len(buf))
746
747 self.position += len(buf)
748 return buf
749
750 def readline(self, size=-1):
751 """Read one entire line from the file. If size is present
752 and non-negative, return a string with at most that
753 size, which may be an incomplete line.
754 """
755 if self.closed:
756 raise ValueError("I/O operation on closed file")
757
758 if "\n" in self.buffer:
759 pos = self.buffer.find("\n") + 1
760 else:
761 buffers = [self.buffer]
762 while True:
763 buf = self.fileobj.read(self.blocksize)
764 buffers.append(buf)
765 if not buf or "\n" in buf:
766 self.buffer = "".join(buffers)
767 pos = self.buffer.find("\n") + 1
768 if pos == 0:
769 # no newline found.
770 pos = len(self.buffer)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000771 break
Lars Gustäbel6baa5022006-12-23 16:40:13 +0000772
773 if size != -1:
774 pos = min(size, pos)
775
776 buf = self.buffer[:pos]
777 self.buffer = self.buffer[pos:]
778 self.position += len(buf)
779 return buf
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000780
781 def readlines(self):
Lars Gustäbel6baa5022006-12-23 16:40:13 +0000782 """Return a list with all remaining lines.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000783 """
784 result = []
785 while True:
786 line = self.readline()
787 if not line: break
788 result.append(line)
789 return result
790
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000791 def tell(self):
792 """Return the current file position.
793 """
Lars Gustäbel6baa5022006-12-23 16:40:13 +0000794 if self.closed:
795 raise ValueError("I/O operation on closed file")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000796
Lars Gustäbel6baa5022006-12-23 16:40:13 +0000797 return self.position
798
799 def seek(self, pos, whence=os.SEEK_SET):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000800 """Seek to a position in the file.
801 """
Lars Gustäbel6baa5022006-12-23 16:40:13 +0000802 if self.closed:
803 raise ValueError("I/O operation on closed file")
804
805 if whence == os.SEEK_SET:
806 self.position = min(max(pos, 0), self.size)
807 elif whence == os.SEEK_CUR:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000808 if pos < 0:
Lars Gustäbel6baa5022006-12-23 16:40:13 +0000809 self.position = max(self.position + pos, 0)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000810 else:
Lars Gustäbel6baa5022006-12-23 16:40:13 +0000811 self.position = min(self.position + pos, self.size)
812 elif whence == os.SEEK_END:
813 self.position = max(min(self.size + pos, self.size), 0)
814 else:
815 raise ValueError("Invalid argument")
816
817 self.buffer = ""
818 self.fileobj.seek(self.position)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000819
820 def close(self):
821 """Close the file object.
822 """
823 self.closed = True
Martin v. Löwisdf241532005-03-03 08:17:42 +0000824
825 def __iter__(self):
Lars Gustäbel6baa5022006-12-23 16:40:13 +0000826 """Get an iterator over the file's lines.
Martin v. Löwisdf241532005-03-03 08:17:42 +0000827 """
Lars Gustäbel6baa5022006-12-23 16:40:13 +0000828 while True:
829 line = self.readline()
830 if not line:
831 break
832 yield line
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000833#class ExFileObject
834
835#------------------
836# Exported Classes
837#------------------
838class TarInfo(object):
839 """Informational class which holds the details about an
840 archive member given by a tar header block.
841 TarInfo objects are returned by TarFile.getmember(),
842 TarFile.getmembers() and TarFile.gettarinfo() and are
843 usually created internally.
844 """
845
846 def __init__(self, name=""):
847 """Construct a TarInfo object. name is the optional name
848 of the member.
849 """
Georg Brandl38c6a222006-05-10 16:26:03 +0000850 self.name = name # member name (dirnames must end with '/')
851 self.mode = 0666 # file permissions
852 self.uid = 0 # user id
853 self.gid = 0 # group id
854 self.size = 0 # file size
855 self.mtime = 0 # modification time
856 self.chksum = 0 # header checksum
857 self.type = REGTYPE # member type
858 self.linkname = "" # link name
859 self.uname = "user" # user name
860 self.gname = "group" # group name
861 self.devmajor = 0 # device major number
862 self.devminor = 0 # device minor number
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000863
Georg Brandl38c6a222006-05-10 16:26:03 +0000864 self.offset = 0 # the tar header starts here
865 self.offset_data = 0 # the file's data starts here
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000866
867 def __repr__(self):
868 return "<%s %r at %#x>" % (self.__class__.__name__,self.name,id(self))
869
Guido van Rossum75b64e62005-01-16 00:16:11 +0000870 @classmethod
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000871 def frombuf(cls, buf):
872 """Construct a TarInfo object from a 512 byte string buffer.
873 """
Georg Brandl38c6a222006-05-10 16:26:03 +0000874 if len(buf) != BLOCKSIZE:
Georg Brandlebbeed72006-12-19 22:06:46 +0000875 raise HeaderError("truncated header")
Georg Brandl38c6a222006-05-10 16:26:03 +0000876 if buf.count(NUL) == BLOCKSIZE:
Georg Brandlebbeed72006-12-19 22:06:46 +0000877 raise HeaderError("empty header")
878
Georg Brandlded1c4d2006-12-20 11:55:16 +0000879 chksum = nti(buf[148:156])
Georg Brandlebbeed72006-12-19 22:06:46 +0000880 if chksum not in calc_chksums(buf):
881 raise HeaderError("bad checksum")
Georg Brandl38c6a222006-05-10 16:26:03 +0000882
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000883 tarinfo = cls()
Georg Brandl38c6a222006-05-10 16:26:03 +0000884 tarinfo.buf = buf
Georg Brandle8953182006-05-27 14:02:03 +0000885 tarinfo.name = buf[0:100].rstrip(NUL)
Georg Brandl38c6a222006-05-10 16:26:03 +0000886 tarinfo.mode = nti(buf[100:108])
887 tarinfo.uid = nti(buf[108:116])
888 tarinfo.gid = nti(buf[116:124])
889 tarinfo.size = nti(buf[124:136])
890 tarinfo.mtime = nti(buf[136:148])
Georg Brandlebbeed72006-12-19 22:06:46 +0000891 tarinfo.chksum = chksum
Georg Brandl38c6a222006-05-10 16:26:03 +0000892 tarinfo.type = buf[156:157]
Georg Brandle8953182006-05-27 14:02:03 +0000893 tarinfo.linkname = buf[157:257].rstrip(NUL)
894 tarinfo.uname = buf[265:297].rstrip(NUL)
895 tarinfo.gname = buf[297:329].rstrip(NUL)
Georg Brandl38c6a222006-05-10 16:26:03 +0000896 tarinfo.devmajor = nti(buf[329:337])
897 tarinfo.devminor = nti(buf[337:345])
Georg Brandl3354f282006-10-29 09:16:12 +0000898 prefix = buf[345:500].rstrip(NUL)
899
900 if prefix and not tarinfo.issparse():
901 tarinfo.name = prefix + "/" + tarinfo.name
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000902
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000903 return tarinfo
904
Georg Brandl38c6a222006-05-10 16:26:03 +0000905 def tobuf(self, posix=False):
Georg Brandl3354f282006-10-29 09:16:12 +0000906 """Return a tar header as a string of 512 byte blocks.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000907 """
Georg Brandl3354f282006-10-29 09:16:12 +0000908 buf = ""
909 type = self.type
910 prefix = ""
911
912 if self.name.endswith("/"):
913 type = DIRTYPE
914
Georg Brandl87fa5592006-12-06 22:21:18 +0000915 if type in (GNUTYPE_LONGNAME, GNUTYPE_LONGLINK):
916 # Prevent "././@LongLink" from being normalized.
917 name = self.name
918 else:
919 name = normpath(self.name)
Georg Brandl3354f282006-10-29 09:16:12 +0000920
921 if type == DIRTYPE:
922 # directories should end with '/'
923 name += "/"
924
925 linkname = self.linkname
926 if linkname:
927 # if linkname is empty we end up with a '.'
928 linkname = normpath(linkname)
929
930 if posix:
931 if self.size > MAXSIZE_MEMBER:
932 raise ValueError("file is too large (>= 8 GB)")
933
934 if len(self.linkname) > LENGTH_LINK:
935 raise ValueError("linkname is too long (>%d)" % (LENGTH_LINK))
936
937 if len(name) > LENGTH_NAME:
938 prefix = name[:LENGTH_PREFIX + 1]
939 while prefix and prefix[-1] != "/":
940 prefix = prefix[:-1]
941
942 name = name[len(prefix):]
943 prefix = prefix[:-1]
944
945 if not prefix or len(name) > LENGTH_NAME:
946 raise ValueError("name is too long")
947
948 else:
949 if len(self.linkname) > LENGTH_LINK:
950 buf += self._create_gnulong(self.linkname, GNUTYPE_LONGLINK)
951
952 if len(name) > LENGTH_NAME:
953 buf += self._create_gnulong(name, GNUTYPE_LONGNAME)
954
Georg Brandl38c6a222006-05-10 16:26:03 +0000955 parts = [
Georg Brandl3354f282006-10-29 09:16:12 +0000956 stn(name, 100),
Georg Brandl38c6a222006-05-10 16:26:03 +0000957 itn(self.mode & 07777, 8, posix),
958 itn(self.uid, 8, posix),
959 itn(self.gid, 8, posix),
960 itn(self.size, 12, posix),
961 itn(self.mtime, 12, posix),
962 " ", # checksum field
Georg Brandl3354f282006-10-29 09:16:12 +0000963 type,
Georg Brandl38c6a222006-05-10 16:26:03 +0000964 stn(self.linkname, 100),
965 stn(MAGIC, 6),
966 stn(VERSION, 2),
967 stn(self.uname, 32),
968 stn(self.gname, 32),
969 itn(self.devmajor, 8, posix),
970 itn(self.devminor, 8, posix),
Georg Brandl3354f282006-10-29 09:16:12 +0000971 stn(prefix, 155)
Georg Brandl38c6a222006-05-10 16:26:03 +0000972 ]
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000973
Georg Brandl3354f282006-10-29 09:16:12 +0000974 buf += struct.pack("%ds" % BLOCKSIZE, "".join(parts))
Georg Brandl87fa5592006-12-06 22:21:18 +0000975 chksum = calc_chksums(buf[-BLOCKSIZE:])[0]
Georg Brandl3354f282006-10-29 09:16:12 +0000976 buf = buf[:-364] + "%06o\0" % chksum + buf[-357:]
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000977 self.buf = buf
978 return buf
979
Georg Brandl3354f282006-10-29 09:16:12 +0000980 def _create_gnulong(self, name, type):
981 """Create a GNU longname/longlink header from name.
982 It consists of an extended tar header, with the length
983 of the longname as size, followed by data blocks,
984 which contain the longname as a null terminated string.
985 """
986 name += NUL
987
988 tarinfo = self.__class__()
989 tarinfo.name = "././@LongLink"
990 tarinfo.type = type
991 tarinfo.mode = 0
992 tarinfo.size = len(name)
993
994 # create extended header
995 buf = tarinfo.tobuf()
996 # create name blocks
997 buf += name
998 blocks, remainder = divmod(len(name), BLOCKSIZE)
999 if remainder > 0:
1000 buf += (BLOCKSIZE - remainder) * NUL
1001 return buf
1002
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001003 def isreg(self):
1004 return self.type in REGULAR_TYPES
1005 def isfile(self):
1006 return self.isreg()
1007 def isdir(self):
1008 return self.type == DIRTYPE
1009 def issym(self):
1010 return self.type == SYMTYPE
1011 def islnk(self):
1012 return self.type == LNKTYPE
1013 def ischr(self):
1014 return self.type == CHRTYPE
1015 def isblk(self):
1016 return self.type == BLKTYPE
1017 def isfifo(self):
1018 return self.type == FIFOTYPE
1019 def issparse(self):
1020 return self.type == GNUTYPE_SPARSE
1021 def isdev(self):
1022 return self.type in (CHRTYPE, BLKTYPE, FIFOTYPE)
1023# class TarInfo
1024
1025class TarFile(object):
1026 """The TarFile Class provides an interface to tar archives.
1027 """
1028
1029 debug = 0 # May be set from 0 (no msgs) to 3 (all msgs)
1030
1031 dereference = False # If true, add content of linked file to the
1032 # tar file, else the link.
1033
1034 ignore_zeros = False # If true, skips empty or invalid blocks and
1035 # continues processing.
1036
1037 errorlevel = 0 # If 0, fatal errors only appear in debug
1038 # messages (if debug >= 0). If > 0, errors
1039 # are passed to the caller as exceptions.
1040
Martin v. Löwis75b9da42004-08-18 13:57:44 +00001041 posix = False # If True, generates POSIX.1-1990-compliant
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001042 # archives (no GNU extensions!)
1043
1044 fileobject = ExFileObject
1045
1046 def __init__(self, name=None, mode="r", fileobj=None):
1047 """Open an (uncompressed) tar archive `name'. `mode' is either 'r' to
1048 read from an existing archive, 'a' to append data to an existing
1049 file or 'w' to create a new file overwriting an existing one. `mode'
1050 defaults to 'r'.
1051 If `fileobj' is given, it is used for reading or writing data. If it
1052 can be determined, `mode' is overridden by `fileobj's mode.
1053 `fileobj' is not closed, when TarFile is closed.
1054 """
Martin v. Löwisfaffa152005-08-24 06:43:09 +00001055 self.name = name
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001056
1057 if len(mode) > 1 or mode not in "raw":
Georg Brandle4751e32006-05-18 06:11:19 +00001058 raise ValueError("mode must be 'r', 'a' or 'w'")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001059 self._mode = mode
1060 self.mode = {"r": "rb", "a": "r+b", "w": "wb"}[mode]
1061
1062 if not fileobj:
1063 fileobj = file(self.name, self.mode)
1064 self._extfileobj = False
1065 else:
1066 if self.name is None and hasattr(fileobj, "name"):
Martin v. Löwisfaffa152005-08-24 06:43:09 +00001067 self.name = fileobj.name
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001068 if hasattr(fileobj, "mode"):
1069 self.mode = fileobj.mode
1070 self._extfileobj = True
1071 self.fileobj = fileobj
1072
1073 # Init datastructures
Georg Brandl38c6a222006-05-10 16:26:03 +00001074 self.closed = False
1075 self.members = [] # list of members as TarInfo objects
1076 self._loaded = False # flag if all members have been read
1077 self.offset = 0L # current position in the archive file
1078 self.inodes = {} # dictionary caching the inodes of
1079 # archive members already added
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001080
1081 if self._mode == "r":
1082 self.firstmember = None
1083 self.firstmember = self.next()
1084
1085 if self._mode == "a":
1086 # Move to the end of the archive,
1087 # before the first empty block.
1088 self.firstmember = None
1089 while True:
1090 try:
1091 tarinfo = self.next()
1092 except ReadError:
1093 self.fileobj.seek(0)
1094 break
1095 if tarinfo is None:
1096 self.fileobj.seek(- BLOCKSIZE, 1)
1097 break
1098
1099 if self._mode in "aw":
1100 self._loaded = True
1101
1102 #--------------------------------------------------------------------------
1103 # Below are the classmethods which act as alternate constructors to the
1104 # TarFile class. The open() method is the only one that is needed for
1105 # public use; it is the "super"-constructor and is able to select an
1106 # adequate "sub"-constructor for a particular compression using the mapping
1107 # from OPEN_METH.
1108 #
1109 # This concept allows one to subclass TarFile without losing the comfort of
1110 # the super-constructor. A sub-constructor is registered and made available
1111 # by adding it to the mapping in OPEN_METH.
1112
Guido van Rossum75b64e62005-01-16 00:16:11 +00001113 @classmethod
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001114 def open(cls, name=None, mode="r", fileobj=None, bufsize=20*512):
1115 """Open a tar archive for reading, writing or appending. Return
1116 an appropriate TarFile class.
1117
1118 mode:
Martin v. Löwis78be7df2005-03-05 12:47:42 +00001119 'r' or 'r:*' open for reading with transparent compression
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001120 'r:' open for reading exclusively uncompressed
1121 'r:gz' open for reading with gzip compression
1122 'r:bz2' open for reading with bzip2 compression
1123 'a' or 'a:' open for appending
1124 'w' or 'w:' open for writing without compression
1125 'w:gz' open for writing with gzip compression
1126 'w:bz2' open for writing with bzip2 compression
Martin v. Löwis78be7df2005-03-05 12:47:42 +00001127
1128 'r|*' open a stream of tar blocks with transparent compression
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001129 'r|' open an uncompressed stream of tar blocks for reading
1130 'r|gz' open a gzip compressed stream of tar blocks
1131 'r|bz2' open a bzip2 compressed stream of tar blocks
1132 'w|' open an uncompressed stream for writing
1133 'w|gz' open a gzip compressed stream for writing
1134 'w|bz2' open a bzip2 compressed stream for writing
1135 """
1136
1137 if not name and not fileobj:
Georg Brandle4751e32006-05-18 06:11:19 +00001138 raise ValueError("nothing to open")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001139
Martin v. Löwis78be7df2005-03-05 12:47:42 +00001140 if mode in ("r", "r:*"):
1141 # Find out which *open() is appropriate for opening the file.
1142 for comptype in cls.OPEN_METH:
1143 func = getattr(cls, cls.OPEN_METH[comptype])
1144 try:
1145 return func(name, "r", fileobj)
1146 except (ReadError, CompressionError):
1147 continue
Georg Brandle4751e32006-05-18 06:11:19 +00001148 raise ReadError("file could not be opened successfully")
Martin v. Löwis78be7df2005-03-05 12:47:42 +00001149
1150 elif ":" in mode:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001151 filemode, comptype = mode.split(":", 1)
1152 filemode = filemode or "r"
1153 comptype = comptype or "tar"
1154
1155 # Select the *open() function according to
1156 # given compression.
1157 if comptype in cls.OPEN_METH:
1158 func = getattr(cls, cls.OPEN_METH[comptype])
1159 else:
Georg Brandle4751e32006-05-18 06:11:19 +00001160 raise CompressionError("unknown compression type %r" % comptype)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001161 return func(name, filemode, fileobj)
1162
1163 elif "|" in mode:
1164 filemode, comptype = mode.split("|", 1)
1165 filemode = filemode or "r"
1166 comptype = comptype or "tar"
1167
1168 if filemode not in "rw":
Georg Brandle4751e32006-05-18 06:11:19 +00001169 raise ValueError("mode must be 'r' or 'w'")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001170
1171 t = cls(name, filemode,
1172 _Stream(name, filemode, comptype, fileobj, bufsize))
1173 t._extfileobj = False
1174 return t
1175
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001176 elif mode in "aw":
1177 return cls.taropen(name, mode, fileobj)
1178
Georg Brandle4751e32006-05-18 06:11:19 +00001179 raise ValueError("undiscernible mode")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001180
Guido van Rossum75b64e62005-01-16 00:16:11 +00001181 @classmethod
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001182 def taropen(cls, name, mode="r", fileobj=None):
1183 """Open uncompressed tar archive name for reading or writing.
1184 """
1185 if len(mode) > 1 or mode not in "raw":
Georg Brandle4751e32006-05-18 06:11:19 +00001186 raise ValueError("mode must be 'r', 'a' or 'w'")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001187 return cls(name, mode, fileobj)
1188
Guido van Rossum75b64e62005-01-16 00:16:11 +00001189 @classmethod
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001190 def gzopen(cls, name, mode="r", fileobj=None, compresslevel=9):
1191 """Open gzip compressed tar archive name for reading or writing.
1192 Appending is not allowed.
1193 """
1194 if len(mode) > 1 or mode not in "rw":
Georg Brandle4751e32006-05-18 06:11:19 +00001195 raise ValueError("mode must be 'r' or 'w'")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001196
1197 try:
1198 import gzip
Neal Norwitz4ec68242003-04-11 03:05:56 +00001199 gzip.GzipFile
1200 except (ImportError, AttributeError):
Georg Brandle4751e32006-05-18 06:11:19 +00001201 raise CompressionError("gzip module is not available")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001202
1203 pre, ext = os.path.splitext(name)
Martin v. Löwisfaffa152005-08-24 06:43:09 +00001204 pre = os.path.basename(pre)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001205 if ext == ".tgz":
1206 ext = ".tar"
1207 if ext == ".gz":
1208 ext = ""
Martin v. Löwisfaffa152005-08-24 06:43:09 +00001209 tarname = pre + ext
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001210
1211 if fileobj is None:
1212 fileobj = file(name, mode + "b")
1213
Martin v. Löwisfaffa152005-08-24 06:43:09 +00001214 if mode != "r":
1215 name = tarname
1216
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001217 try:
Martin v. Löwisfaffa152005-08-24 06:43:09 +00001218 t = cls.taropen(tarname, mode,
1219 gzip.GzipFile(name, mode, compresslevel, fileobj)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001220 )
1221 except IOError:
Georg Brandle4751e32006-05-18 06:11:19 +00001222 raise ReadError("not a gzip file")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001223 t._extfileobj = False
1224 return t
1225
Guido van Rossum75b64e62005-01-16 00:16:11 +00001226 @classmethod
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001227 def bz2open(cls, name, mode="r", fileobj=None, compresslevel=9):
1228 """Open bzip2 compressed tar archive name for reading or writing.
1229 Appending is not allowed.
1230 """
1231 if len(mode) > 1 or mode not in "rw":
Georg Brandle4751e32006-05-18 06:11:19 +00001232 raise ValueError("mode must be 'r' or 'w'.")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001233
1234 try:
1235 import bz2
1236 except ImportError:
Georg Brandle4751e32006-05-18 06:11:19 +00001237 raise CompressionError("bz2 module is not available")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001238
Martin v. Löwisfaffa152005-08-24 06:43:09 +00001239 pre, ext = os.path.splitext(name)
1240 pre = os.path.basename(pre)
1241 if ext == ".tbz2":
1242 ext = ".tar"
1243 if ext == ".bz2":
1244 ext = ""
1245 tarname = pre + ext
1246
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001247 if fileobj is not None:
Georg Brandl49c8f4c2006-05-15 19:30:35 +00001248 fileobj = _BZ2Proxy(fileobj, mode)
1249 else:
1250 fileobj = bz2.BZ2File(name, mode, compresslevel=compresslevel)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001251
1252 try:
Georg Brandl49c8f4c2006-05-15 19:30:35 +00001253 t = cls.taropen(tarname, mode, fileobj)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001254 except IOError:
Georg Brandle4751e32006-05-18 06:11:19 +00001255 raise ReadError("not a bzip2 file")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001256 t._extfileobj = False
1257 return t
1258
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001259 # All *open() methods are registered here.
1260 OPEN_METH = {
1261 "tar": "taropen", # uncompressed tar
1262 "gz": "gzopen", # gzip compressed tar
1263 "bz2": "bz2open" # bzip2 compressed tar
1264 }
1265
1266 #--------------------------------------------------------------------------
1267 # The public methods which TarFile provides:
1268
1269 def close(self):
1270 """Close the TarFile. In write-mode, two finishing zero blocks are
1271 appended to the archive.
1272 """
1273 if self.closed:
1274 return
1275
1276 if self._mode in "aw":
1277 self.fileobj.write(NUL * (BLOCKSIZE * 2))
1278 self.offset += (BLOCKSIZE * 2)
1279 # fill up the end with zero-blocks
1280 # (like option -b20 for tar does)
1281 blocks, remainder = divmod(self.offset, RECORDSIZE)
1282 if remainder > 0:
1283 self.fileobj.write(NUL * (RECORDSIZE - remainder))
1284
1285 if not self._extfileobj:
1286 self.fileobj.close()
1287 self.closed = True
1288
1289 def getmember(self, name):
1290 """Return a TarInfo object for member `name'. If `name' can not be
1291 found in the archive, KeyError is raised. If a member occurs more
1292 than once in the archive, its last occurence is assumed to be the
1293 most up-to-date version.
1294 """
Martin v. Löwisf3c56112004-09-18 09:08:52 +00001295 tarinfo = self._getmember(name)
1296 if tarinfo is None:
Georg Brandle4751e32006-05-18 06:11:19 +00001297 raise KeyError("filename %r not found" % name)
Martin v. Löwisf3c56112004-09-18 09:08:52 +00001298 return tarinfo
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001299
1300 def getmembers(self):
1301 """Return the members of the archive as a list of TarInfo objects. The
1302 list has the same order as the members in the archive.
1303 """
1304 self._check()
1305 if not self._loaded: # if we want to obtain a list of
1306 self._load() # all members, we first have to
1307 # scan the whole archive.
1308 return self.members
1309
1310 def getnames(self):
1311 """Return the members of the archive as a list of their names. It has
1312 the same order as the list returned by getmembers().
1313 """
Martin v. Löwisf3c56112004-09-18 09:08:52 +00001314 return [tarinfo.name for tarinfo in self.getmembers()]
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001315
1316 def gettarinfo(self, name=None, arcname=None, fileobj=None):
1317 """Create a TarInfo object for either the file `name' or the file
1318 object `fileobj' (using os.fstat on its file descriptor). You can
1319 modify some of the TarInfo's attributes before you add it using
1320 addfile(). If given, `arcname' specifies an alternative name for the
1321 file in the archive.
1322 """
1323 self._check("aw")
1324
1325 # When fileobj is given, replace name by
1326 # fileobj's real name.
1327 if fileobj is not None:
1328 name = fileobj.name
1329
1330 # Building the name of the member in the archive.
1331 # Backward slashes are converted to forward slashes,
1332 # Absolute paths are turned to relative paths.
1333 if arcname is None:
1334 arcname = name
1335 arcname = normpath(arcname)
1336 drv, arcname = os.path.splitdrive(arcname)
1337 while arcname[0:1] == "/":
1338 arcname = arcname[1:]
1339
1340 # Now, fill the TarInfo object with
1341 # information specific for the file.
1342 tarinfo = TarInfo()
1343
1344 # Use os.stat or os.lstat, depending on platform
1345 # and if symlinks shall be resolved.
1346 if fileobj is None:
1347 if hasattr(os, "lstat") and not self.dereference:
1348 statres = os.lstat(name)
1349 else:
1350 statres = os.stat(name)
1351 else:
1352 statres = os.fstat(fileobj.fileno())
1353 linkname = ""
1354
1355 stmd = statres.st_mode
1356 if stat.S_ISREG(stmd):
1357 inode = (statres.st_ino, statres.st_dev)
Neal Norwitzb0e32e22005-10-20 04:50:13 +00001358 if not self.dereference and \
1359 statres.st_nlink > 1 and inode in self.inodes:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001360 # Is it a hardlink to an already
1361 # archived file?
1362 type = LNKTYPE
1363 linkname = self.inodes[inode]
1364 else:
1365 # The inode is added only if its valid.
1366 # For win32 it is always 0.
1367 type = REGTYPE
1368 if inode[0]:
1369 self.inodes[inode] = arcname
1370 elif stat.S_ISDIR(stmd):
1371 type = DIRTYPE
1372 if arcname[-1:] != "/":
1373 arcname += "/"
1374 elif stat.S_ISFIFO(stmd):
1375 type = FIFOTYPE
1376 elif stat.S_ISLNK(stmd):
1377 type = SYMTYPE
1378 linkname = os.readlink(name)
1379 elif stat.S_ISCHR(stmd):
1380 type = CHRTYPE
1381 elif stat.S_ISBLK(stmd):
1382 type = BLKTYPE
1383 else:
1384 return None
1385
1386 # Fill the TarInfo object with all
1387 # information we can get.
Martin v. Löwis5dbdc592005-08-27 10:07:56 +00001388 tarinfo.name = arcname
1389 tarinfo.mode = stmd
1390 tarinfo.uid = statres.st_uid
1391 tarinfo.gid = statres.st_gid
1392 if stat.S_ISREG(stmd):
Martin v. Löwis61d77e02004-08-20 06:35:46 +00001393 tarinfo.size = statres.st_size
Martin v. Löwis5dbdc592005-08-27 10:07:56 +00001394 else:
1395 tarinfo.size = 0L
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001396 tarinfo.mtime = statres.st_mtime
Martin v. Löwis5dbdc592005-08-27 10:07:56 +00001397 tarinfo.type = type
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001398 tarinfo.linkname = linkname
1399 if pwd:
1400 try:
1401 tarinfo.uname = pwd.getpwuid(tarinfo.uid)[0]
1402 except KeyError:
1403 pass
1404 if grp:
1405 try:
1406 tarinfo.gname = grp.getgrgid(tarinfo.gid)[0]
1407 except KeyError:
1408 pass
1409
1410 if type in (CHRTYPE, BLKTYPE):
1411 if hasattr(os, "major") and hasattr(os, "minor"):
1412 tarinfo.devmajor = os.major(statres.st_rdev)
1413 tarinfo.devminor = os.minor(statres.st_rdev)
1414 return tarinfo
1415
1416 def list(self, verbose=True):
1417 """Print a table of contents to sys.stdout. If `verbose' is False, only
1418 the names of the members are printed. If it is True, an `ls -l'-like
1419 output is produced.
1420 """
1421 self._check()
1422
1423 for tarinfo in self:
1424 if verbose:
1425 print filemode(tarinfo.mode),
1426 print "%s/%s" % (tarinfo.uname or tarinfo.uid,
1427 tarinfo.gname or tarinfo.gid),
1428 if tarinfo.ischr() or tarinfo.isblk():
1429 print "%10s" % ("%d,%d" \
1430 % (tarinfo.devmajor, tarinfo.devminor)),
1431 else:
1432 print "%10d" % tarinfo.size,
1433 print "%d-%02d-%02d %02d:%02d:%02d" \
1434 % time.localtime(tarinfo.mtime)[:6],
1435
1436 print tarinfo.name,
1437
1438 if verbose:
1439 if tarinfo.issym():
1440 print "->", tarinfo.linkname,
1441 if tarinfo.islnk():
1442 print "link to", tarinfo.linkname,
1443 print
1444
1445 def add(self, name, arcname=None, recursive=True):
1446 """Add the file `name' to the archive. `name' may be any type of file
1447 (directory, fifo, symbolic link, etc.). If given, `arcname'
1448 specifies an alternative name for the file in the archive.
1449 Directories are added recursively by default. This can be avoided by
1450 setting `recursive' to False.
1451 """
1452 self._check("aw")
1453
1454 if arcname is None:
1455 arcname = name
1456
1457 # Skip if somebody tries to archive the archive...
Martin v. Löwisfaffa152005-08-24 06:43:09 +00001458 if self.name is not None \
1459 and os.path.abspath(name) == os.path.abspath(self.name):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001460 self._dbg(2, "tarfile: Skipped %r" % name)
1461 return
1462
1463 # Special case: The user wants to add the current
1464 # working directory.
1465 if name == ".":
1466 if recursive:
1467 if arcname == ".":
1468 arcname = ""
1469 for f in os.listdir("."):
1470 self.add(f, os.path.join(arcname, f))
1471 return
1472
1473 self._dbg(1, name)
1474
1475 # Create a TarInfo object from the file.
1476 tarinfo = self.gettarinfo(name, arcname)
1477
1478 if tarinfo is None:
1479 self._dbg(1, "tarfile: Unsupported type %r" % name)
1480 return
1481
1482 # Append the tar header and data to the archive.
1483 if tarinfo.isreg():
1484 f = file(name, "rb")
1485 self.addfile(tarinfo, f)
1486 f.close()
1487
Martin v. Löwis5dbdc592005-08-27 10:07:56 +00001488 elif tarinfo.isdir():
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001489 self.addfile(tarinfo)
1490 if recursive:
1491 for f in os.listdir(name):
1492 self.add(os.path.join(name, f), os.path.join(arcname, f))
1493
Martin v. Löwis5dbdc592005-08-27 10:07:56 +00001494 else:
1495 self.addfile(tarinfo)
1496
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001497 def addfile(self, tarinfo, fileobj=None):
1498 """Add the TarInfo object `tarinfo' to the archive. If `fileobj' is
1499 given, tarinfo.size bytes are read from it and added to the archive.
1500 You can create TarInfo objects using gettarinfo().
1501 On Windows platforms, `fileobj' should always be opened with mode
1502 'rb' to avoid irritation about the file size.
1503 """
1504 self._check("aw")
1505
Georg Brandl3354f282006-10-29 09:16:12 +00001506 tarinfo = copy.copy(tarinfo)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001507
Georg Brandl3354f282006-10-29 09:16:12 +00001508 buf = tarinfo.tobuf(self.posix)
1509 self.fileobj.write(buf)
1510 self.offset += len(buf)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001511
1512 # If there's data to follow, append it.
1513 if fileobj is not None:
1514 copyfileobj(fileobj, self.fileobj, tarinfo.size)
1515 blocks, remainder = divmod(tarinfo.size, BLOCKSIZE)
1516 if remainder > 0:
1517 self.fileobj.write(NUL * (BLOCKSIZE - remainder))
1518 blocks += 1
1519 self.offset += blocks * BLOCKSIZE
1520
Martin v. Löwisf3c56112004-09-18 09:08:52 +00001521 self.members.append(tarinfo)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001522
Martin v. Löwis00a73e72005-03-04 19:40:34 +00001523 def extractall(self, path=".", members=None):
1524 """Extract all members from the archive to the current working
1525 directory and set owner, modification time and permissions on
1526 directories afterwards. `path' specifies a different directory
1527 to extract to. `members' is optional and must be a subset of the
1528 list returned by getmembers().
1529 """
1530 directories = []
1531
1532 if members is None:
1533 members = self
1534
1535 for tarinfo in members:
1536 if tarinfo.isdir():
1537 # Extract directory with a safe mode, so that
1538 # all files below can be extracted as well.
1539 try:
1540 os.makedirs(os.path.join(path, tarinfo.name), 0777)
1541 except EnvironmentError:
1542 pass
1543 directories.append(tarinfo)
1544 else:
1545 self.extract(tarinfo, path)
1546
1547 # Reverse sort directories.
1548 directories.sort(lambda a, b: cmp(a.name, b.name))
1549 directories.reverse()
1550
1551 # Set correct owner, mtime and filemode on directories.
1552 for tarinfo in directories:
1553 path = os.path.join(path, tarinfo.name)
1554 try:
1555 self.chown(tarinfo, path)
1556 self.utime(tarinfo, path)
1557 self.chmod(tarinfo, path)
1558 except ExtractError, e:
1559 if self.errorlevel > 1:
1560 raise
1561 else:
1562 self._dbg(1, "tarfile: %s" % e)
1563
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001564 def extract(self, member, path=""):
1565 """Extract a member from the archive to the current working directory,
1566 using its full name. Its file information is extracted as accurately
1567 as possible. `member' may be a filename or a TarInfo object. You can
1568 specify a different directory using `path'.
1569 """
1570 self._check("r")
1571
1572 if isinstance(member, TarInfo):
1573 tarinfo = member
1574 else:
1575 tarinfo = self.getmember(member)
1576
Neal Norwitza4f651a2004-07-20 22:07:44 +00001577 # Prepare the link target for makelink().
1578 if tarinfo.islnk():
1579 tarinfo._link_target = os.path.join(path, tarinfo.linkname)
1580
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001581 try:
1582 self._extract_member(tarinfo, os.path.join(path, tarinfo.name))
1583 except EnvironmentError, e:
1584 if self.errorlevel > 0:
1585 raise
1586 else:
1587 if e.filename is None:
1588 self._dbg(1, "tarfile: %s" % e.strerror)
1589 else:
1590 self._dbg(1, "tarfile: %s %r" % (e.strerror, e.filename))
1591 except ExtractError, e:
1592 if self.errorlevel > 1:
1593 raise
1594 else:
1595 self._dbg(1, "tarfile: %s" % e)
1596
1597 def extractfile(self, member):
1598 """Extract a member from the archive as a file object. `member' may be
1599 a filename or a TarInfo object. If `member' is a regular file, a
1600 file-like object is returned. If `member' is a link, a file-like
1601 object is constructed from the link's target. If `member' is none of
1602 the above, None is returned.
1603 The file-like object is read-only and provides the following
1604 methods: read(), readline(), readlines(), seek() and tell()
1605 """
1606 self._check("r")
1607
1608 if isinstance(member, TarInfo):
1609 tarinfo = member
1610 else:
1611 tarinfo = self.getmember(member)
1612
1613 if tarinfo.isreg():
1614 return self.fileobject(self, tarinfo)
1615
1616 elif tarinfo.type not in SUPPORTED_TYPES:
1617 # If a member's type is unknown, it is treated as a
1618 # regular file.
1619 return self.fileobject(self, tarinfo)
1620
1621 elif tarinfo.islnk() or tarinfo.issym():
1622 if isinstance(self.fileobj, _Stream):
1623 # A small but ugly workaround for the case that someone tries
1624 # to extract a (sym)link as a file-object from a non-seekable
1625 # stream of tar blocks.
Georg Brandle4751e32006-05-18 06:11:19 +00001626 raise StreamError("cannot extract (sym)link as file object")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001627 else:
Georg Brandl7eb4b7d2005-07-22 21:49:32 +00001628 # A (sym)link's file object is its target's file object.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001629 return self.extractfile(self._getmember(tarinfo.linkname,
1630 tarinfo))
1631 else:
1632 # If there's no data associated with the member (directory, chrdev,
1633 # blkdev, etc.), return None instead of a file object.
1634 return None
1635
1636 def _extract_member(self, tarinfo, targetpath):
1637 """Extract the TarInfo object tarinfo to a physical
1638 file called targetpath.
1639 """
1640 # Fetch the TarInfo object for the given name
1641 # and build the destination pathname, replacing
1642 # forward slashes to platform specific separators.
1643 if targetpath[-1:] == "/":
1644 targetpath = targetpath[:-1]
1645 targetpath = os.path.normpath(targetpath)
1646
1647 # Create all upper directories.
1648 upperdirs = os.path.dirname(targetpath)
1649 if upperdirs and not os.path.exists(upperdirs):
1650 ti = TarInfo()
1651 ti.name = upperdirs
1652 ti.type = DIRTYPE
1653 ti.mode = 0777
1654 ti.mtime = tarinfo.mtime
1655 ti.uid = tarinfo.uid
1656 ti.gid = tarinfo.gid
1657 ti.uname = tarinfo.uname
1658 ti.gname = tarinfo.gname
1659 try:
1660 self._extract_member(ti, ti.name)
1661 except:
1662 pass
1663
1664 if tarinfo.islnk() or tarinfo.issym():
1665 self._dbg(1, "%s -> %s" % (tarinfo.name, tarinfo.linkname))
1666 else:
1667 self._dbg(1, tarinfo.name)
1668
1669 if tarinfo.isreg():
1670 self.makefile(tarinfo, targetpath)
1671 elif tarinfo.isdir():
1672 self.makedir(tarinfo, targetpath)
1673 elif tarinfo.isfifo():
1674 self.makefifo(tarinfo, targetpath)
1675 elif tarinfo.ischr() or tarinfo.isblk():
1676 self.makedev(tarinfo, targetpath)
1677 elif tarinfo.islnk() or tarinfo.issym():
1678 self.makelink(tarinfo, targetpath)
1679 elif tarinfo.type not in SUPPORTED_TYPES:
1680 self.makeunknown(tarinfo, targetpath)
1681 else:
1682 self.makefile(tarinfo, targetpath)
1683
1684 self.chown(tarinfo, targetpath)
1685 if not tarinfo.issym():
1686 self.chmod(tarinfo, targetpath)
1687 self.utime(tarinfo, targetpath)
1688
1689 #--------------------------------------------------------------------------
1690 # Below are the different file methods. They are called via
1691 # _extract_member() when extract() is called. They can be replaced in a
1692 # subclass to implement other functionality.
1693
1694 def makedir(self, tarinfo, targetpath):
1695 """Make a directory called targetpath.
1696 """
1697 try:
1698 os.mkdir(targetpath)
1699 except EnvironmentError, e:
1700 if e.errno != errno.EEXIST:
1701 raise
1702
1703 def makefile(self, tarinfo, targetpath):
1704 """Make a file called targetpath.
1705 """
1706 source = self.extractfile(tarinfo)
1707 target = file(targetpath, "wb")
1708 copyfileobj(source, target)
1709 source.close()
1710 target.close()
1711
1712 def makeunknown(self, tarinfo, targetpath):
1713 """Make a file from a TarInfo object with an unknown type
1714 at targetpath.
1715 """
1716 self.makefile(tarinfo, targetpath)
1717 self._dbg(1, "tarfile: Unknown file type %r, " \
1718 "extracted as regular file." % tarinfo.type)
1719
1720 def makefifo(self, tarinfo, targetpath):
1721 """Make a fifo called targetpath.
1722 """
1723 if hasattr(os, "mkfifo"):
1724 os.mkfifo(targetpath)
1725 else:
Georg Brandle4751e32006-05-18 06:11:19 +00001726 raise ExtractError("fifo not supported by system")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001727
1728 def makedev(self, tarinfo, targetpath):
1729 """Make a character or block device called targetpath.
1730 """
1731 if not hasattr(os, "mknod") or not hasattr(os, "makedev"):
Georg Brandle4751e32006-05-18 06:11:19 +00001732 raise ExtractError("special devices not supported by system")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001733
1734 mode = tarinfo.mode
1735 if tarinfo.isblk():
1736 mode |= stat.S_IFBLK
1737 else:
1738 mode |= stat.S_IFCHR
1739
1740 os.mknod(targetpath, mode,
1741 os.makedev(tarinfo.devmajor, tarinfo.devminor))
1742
1743 def makelink(self, tarinfo, targetpath):
1744 """Make a (symbolic) link called targetpath. If it cannot be created
1745 (platform limitation), we try to make a copy of the referenced file
1746 instead of a link.
1747 """
1748 linkpath = tarinfo.linkname
1749 try:
1750 if tarinfo.issym():
1751 os.symlink(linkpath, targetpath)
1752 else:
Neal Norwitza4f651a2004-07-20 22:07:44 +00001753 # See extract().
1754 os.link(tarinfo._link_target, targetpath)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001755 except AttributeError:
1756 if tarinfo.issym():
1757 linkpath = os.path.join(os.path.dirname(tarinfo.name),
1758 linkpath)
1759 linkpath = normpath(linkpath)
1760
1761 try:
1762 self._extract_member(self.getmember(linkpath), targetpath)
1763 except (EnvironmentError, KeyError), e:
1764 linkpath = os.path.normpath(linkpath)
1765 try:
1766 shutil.copy2(linkpath, targetpath)
1767 except EnvironmentError, e:
Georg Brandle4751e32006-05-18 06:11:19 +00001768 raise IOError("link could not be created")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001769
1770 def chown(self, tarinfo, targetpath):
1771 """Set owner of targetpath according to tarinfo.
1772 """
1773 if pwd and hasattr(os, "geteuid") and os.geteuid() == 0:
1774 # We have to be root to do so.
1775 try:
1776 g = grp.getgrnam(tarinfo.gname)[2]
1777 except KeyError:
1778 try:
1779 g = grp.getgrgid(tarinfo.gid)[2]
1780 except KeyError:
1781 g = os.getgid()
1782 try:
1783 u = pwd.getpwnam(tarinfo.uname)[2]
1784 except KeyError:
1785 try:
1786 u = pwd.getpwuid(tarinfo.uid)[2]
1787 except KeyError:
1788 u = os.getuid()
1789 try:
1790 if tarinfo.issym() and hasattr(os, "lchown"):
1791 os.lchown(targetpath, u, g)
1792 else:
Andrew MacIntyre7970d202003-02-19 12:51:34 +00001793 if sys.platform != "os2emx":
1794 os.chown(targetpath, u, g)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001795 except EnvironmentError, e:
Georg Brandle4751e32006-05-18 06:11:19 +00001796 raise ExtractError("could not change owner")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001797
1798 def chmod(self, tarinfo, targetpath):
1799 """Set file permissions of targetpath according to tarinfo.
1800 """
Jack Jansen834eff62003-03-07 12:47:06 +00001801 if hasattr(os, 'chmod'):
1802 try:
1803 os.chmod(targetpath, tarinfo.mode)
1804 except EnvironmentError, e:
Georg Brandle4751e32006-05-18 06:11:19 +00001805 raise ExtractError("could not change mode")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001806
1807 def utime(self, tarinfo, targetpath):
1808 """Set modification time of targetpath according to tarinfo.
1809 """
Jack Jansen834eff62003-03-07 12:47:06 +00001810 if not hasattr(os, 'utime'):
Tim Petersf9347782003-03-07 15:36:41 +00001811 return
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001812 if sys.platform == "win32" and tarinfo.isdir():
1813 # According to msdn.microsoft.com, it is an error (EACCES)
1814 # to use utime() on directories.
1815 return
1816 try:
1817 os.utime(targetpath, (tarinfo.mtime, tarinfo.mtime))
1818 except EnvironmentError, e:
Georg Brandle4751e32006-05-18 06:11:19 +00001819 raise ExtractError("could not change modification time")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001820
1821 #--------------------------------------------------------------------------
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001822 def next(self):
1823 """Return the next member of the archive as a TarInfo object, when
1824 TarFile is opened for reading. Return None if there is no more
1825 available.
1826 """
1827 self._check("ra")
1828 if self.firstmember is not None:
1829 m = self.firstmember
1830 self.firstmember = None
1831 return m
1832
1833 # Read the next block.
Andrew M. Kuchling864bba12004-07-10 22:02:11 +00001834 self.fileobj.seek(self.offset)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001835 while True:
1836 buf = self.fileobj.read(BLOCKSIZE)
1837 if not buf:
1838 return None
Georg Brandl38c6a222006-05-10 16:26:03 +00001839
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001840 try:
1841 tarinfo = TarInfo.frombuf(buf)
Tim Peters8a299d22006-05-19 19:16:34 +00001842
Georg Brandl38c6a222006-05-10 16:26:03 +00001843 # Set the TarInfo object's offset to the current position of the
1844 # TarFile and set self.offset to the position where the data blocks
1845 # should begin.
1846 tarinfo.offset = self.offset
1847 self.offset += BLOCKSIZE
1848
1849 tarinfo = self.proc_member(tarinfo)
1850
Georg Brandlebbeed72006-12-19 22:06:46 +00001851 except HeaderError, e:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001852 if self.ignore_zeros:
Georg Brandlebbeed72006-12-19 22:06:46 +00001853 self._dbg(2, "0x%X: %s" % (self.offset, e))
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001854 self.offset += BLOCKSIZE
1855 continue
1856 else:
Andrew M. Kuchling864bba12004-07-10 22:02:11 +00001857 if self.offset == 0:
Georg Brandlebbeed72006-12-19 22:06:46 +00001858 raise ReadError(str(e))
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001859 return None
1860 break
1861
Georg Brandl38c6a222006-05-10 16:26:03 +00001862 # Some old tar programs represent a directory as a regular
1863 # file with a trailing slash.
1864 if tarinfo.isreg() and tarinfo.name.endswith("/"):
1865 tarinfo.type = DIRTYPE
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001866
Georg Brandl38c6a222006-05-10 16:26:03 +00001867 # Directory names should have a '/' at the end.
1868 if tarinfo.isdir():
1869 tarinfo.name += "/"
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001870
Martin v. Löwisf3c56112004-09-18 09:08:52 +00001871 self.members.append(tarinfo)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001872 return tarinfo
1873
1874 #--------------------------------------------------------------------------
Georg Brandl38c6a222006-05-10 16:26:03 +00001875 # The following are methods that are called depending on the type of a
1876 # member. The entry point is proc_member() which is called with a TarInfo
1877 # object created from the header block from the current offset. The
1878 # proc_member() method can be overridden in a subclass to add custom
1879 # proc_*() methods. A proc_*() method MUST implement the following
1880 # operations:
1881 # 1. Set tarinfo.offset_data to the position where the data blocks begin,
1882 # if there is data that follows.
1883 # 2. Set self.offset to the position where the next member's header will
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001884 # begin.
Georg Brandl38c6a222006-05-10 16:26:03 +00001885 # 3. Return tarinfo or another valid TarInfo object.
1886 def proc_member(self, tarinfo):
1887 """Choose the right processing method for tarinfo depending
1888 on its type and call it.
1889 """
1890 if tarinfo.type in (GNUTYPE_LONGNAME, GNUTYPE_LONGLINK):
1891 return self.proc_gnulong(tarinfo)
1892 elif tarinfo.type == GNUTYPE_SPARSE:
1893 return self.proc_sparse(tarinfo)
1894 else:
1895 return self.proc_builtin(tarinfo)
1896
1897 def proc_builtin(self, tarinfo):
1898 """Process a builtin type member or an unknown member
1899 which will be treated as a regular file.
1900 """
1901 tarinfo.offset_data = self.offset
1902 if tarinfo.isreg() or tarinfo.type not in SUPPORTED_TYPES:
1903 # Skip the following data blocks.
1904 self.offset += self._block(tarinfo.size)
1905 return tarinfo
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001906
1907 def proc_gnulong(self, tarinfo):
Georg Brandl38c6a222006-05-10 16:26:03 +00001908 """Process the blocks that hold a GNU longname
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001909 or longlink member.
1910 """
1911 buf = ""
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001912 count = tarinfo.size
1913 while count > 0:
1914 block = self.fileobj.read(BLOCKSIZE)
1915 buf += block
1916 self.offset += BLOCKSIZE
1917 count -= BLOCKSIZE
1918
Georg Brandl38c6a222006-05-10 16:26:03 +00001919 # Fetch the next header and process it.
1920 b = self.fileobj.read(BLOCKSIZE)
1921 t = TarInfo.frombuf(b)
1922 t.offset = self.offset
1923 self.offset += BLOCKSIZE
1924 next = self.proc_member(t)
Andrew M. Kuchling864bba12004-07-10 22:02:11 +00001925
Georg Brandl38c6a222006-05-10 16:26:03 +00001926 # Patch the TarInfo object from the next header with
1927 # the longname information.
Andrew M. Kuchling864bba12004-07-10 22:02:11 +00001928 next.offset = tarinfo.offset
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001929 if tarinfo.type == GNUTYPE_LONGNAME:
Georg Brandle8953182006-05-27 14:02:03 +00001930 next.name = buf.rstrip(NUL)
Andrew M. Kuchling864bba12004-07-10 22:02:11 +00001931 elif tarinfo.type == GNUTYPE_LONGLINK:
Georg Brandle8953182006-05-27 14:02:03 +00001932 next.linkname = buf.rstrip(NUL)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001933
Andrew M. Kuchling864bba12004-07-10 22:02:11 +00001934 return next
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001935
1936 def proc_sparse(self, tarinfo):
Georg Brandl38c6a222006-05-10 16:26:03 +00001937 """Process a GNU sparse header plus extra headers.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001938 """
Georg Brandl38c6a222006-05-10 16:26:03 +00001939 buf = tarinfo.buf
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001940 sp = _ringbuffer()
1941 pos = 386
1942 lastpos = 0L
1943 realpos = 0L
1944 # There are 4 possible sparse structs in the
1945 # first header.
1946 for i in xrange(4):
1947 try:
Georg Brandl38c6a222006-05-10 16:26:03 +00001948 offset = nti(buf[pos:pos + 12])
1949 numbytes = nti(buf[pos + 12:pos + 24])
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001950 except ValueError:
1951 break
1952 if offset > lastpos:
1953 sp.append(_hole(lastpos, offset - lastpos))
1954 sp.append(_data(offset, numbytes, realpos))
1955 realpos += numbytes
1956 lastpos = offset + numbytes
1957 pos += 24
1958
1959 isextended = ord(buf[482])
Georg Brandl38c6a222006-05-10 16:26:03 +00001960 origsize = nti(buf[483:495])
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001961
1962 # If the isextended flag is given,
1963 # there are extra headers to process.
1964 while isextended == 1:
1965 buf = self.fileobj.read(BLOCKSIZE)
1966 self.offset += BLOCKSIZE
1967 pos = 0
1968 for i in xrange(21):
1969 try:
Georg Brandl38c6a222006-05-10 16:26:03 +00001970 offset = nti(buf[pos:pos + 12])
1971 numbytes = nti(buf[pos + 12:pos + 24])
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001972 except ValueError:
1973 break
1974 if offset > lastpos:
1975 sp.append(_hole(lastpos, offset - lastpos))
1976 sp.append(_data(offset, numbytes, realpos))
1977 realpos += numbytes
1978 lastpos = offset + numbytes
1979 pos += 24
1980 isextended = ord(buf[504])
1981
1982 if lastpos < origsize:
1983 sp.append(_hole(lastpos, origsize - lastpos))
1984
1985 tarinfo.sparse = sp
1986
1987 tarinfo.offset_data = self.offset
1988 self.offset += self._block(tarinfo.size)
1989 tarinfo.size = origsize
Andrew M. Kuchling864bba12004-07-10 22:02:11 +00001990
Georg Brandl38c6a222006-05-10 16:26:03 +00001991 return tarinfo
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001992
1993 #--------------------------------------------------------------------------
1994 # Little helper methods:
1995
1996 def _block(self, count):
1997 """Round up a byte count by BLOCKSIZE and return it,
1998 e.g. _block(834) => 1024.
1999 """
2000 blocks, remainder = divmod(count, BLOCKSIZE)
2001 if remainder:
2002 blocks += 1
2003 return blocks * BLOCKSIZE
2004
2005 def _getmember(self, name, tarinfo=None):
2006 """Find an archive member by name from bottom to top.
2007 If tarinfo is given, it is used as the starting point.
2008 """
Martin v. Löwisf3c56112004-09-18 09:08:52 +00002009 # Ensure that all members have been loaded.
2010 members = self.getmembers()
2011
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002012 if tarinfo is None:
Martin v. Löwisf3c56112004-09-18 09:08:52 +00002013 end = len(members)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002014 else:
Martin v. Löwisf3c56112004-09-18 09:08:52 +00002015 end = members.index(tarinfo)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002016
2017 for i in xrange(end - 1, -1, -1):
Martin v. Löwisf3c56112004-09-18 09:08:52 +00002018 if name == members[i].name:
2019 return members[i]
Andrew M. Kuchling864bba12004-07-10 22:02:11 +00002020
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002021 def _load(self):
2022 """Read through the entire archive file and look for readable
2023 members.
2024 """
2025 while True:
2026 tarinfo = self.next()
2027 if tarinfo is None:
2028 break
2029 self._loaded = True
2030
2031 def _check(self, mode=None):
2032 """Check if TarFile is still open, and if the operation's mode
2033 corresponds to TarFile's mode.
2034 """
2035 if self.closed:
Georg Brandle4751e32006-05-18 06:11:19 +00002036 raise IOError("%s is closed" % self.__class__.__name__)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002037 if mode is not None and self._mode not in mode:
Georg Brandle4751e32006-05-18 06:11:19 +00002038 raise IOError("bad operation for mode %r" % self._mode)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002039
2040 def __iter__(self):
2041 """Provide an iterator object.
2042 """
2043 if self._loaded:
2044 return iter(self.members)
2045 else:
2046 return TarIter(self)
2047
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002048 def _dbg(self, level, msg):
2049 """Write debugging output to sys.stderr.
2050 """
2051 if level <= self.debug:
2052 print >> sys.stderr, msg
2053# class TarFile
2054
2055class TarIter:
2056 """Iterator Class.
2057
2058 for tarinfo in TarFile(...):
2059 suite...
2060 """
2061
2062 def __init__(self, tarfile):
2063 """Construct a TarIter object.
2064 """
2065 self.tarfile = tarfile
Martin v. Löwis637431b2005-03-03 23:12:42 +00002066 self.index = 0
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002067 def __iter__(self):
2068 """Return iterator object.
2069 """
2070 return self
2071 def next(self):
2072 """Return the next item using TarFile's next() method.
2073 When all members have been read, set TarFile as _loaded.
2074 """
Martin v. Löwis637431b2005-03-03 23:12:42 +00002075 # Fix for SF #1100429: Under rare circumstances it can
2076 # happen that getmembers() is called during iteration,
2077 # which will cause TarIter to stop prematurely.
2078 if not self.tarfile._loaded:
2079 tarinfo = self.tarfile.next()
2080 if not tarinfo:
2081 self.tarfile._loaded = True
2082 raise StopIteration
2083 else:
2084 try:
2085 tarinfo = self.tarfile.members[self.index]
2086 except IndexError:
2087 raise StopIteration
2088 self.index += 1
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002089 return tarinfo
2090
2091# Helper classes for sparse file support
2092class _section:
2093 """Base class for _data and _hole.
2094 """
2095 def __init__(self, offset, size):
2096 self.offset = offset
2097 self.size = size
2098 def __contains__(self, offset):
2099 return self.offset <= offset < self.offset + self.size
2100
2101class _data(_section):
2102 """Represent a data section in a sparse file.
2103 """
2104 def __init__(self, offset, size, realpos):
2105 _section.__init__(self, offset, size)
2106 self.realpos = realpos
2107
2108class _hole(_section):
2109 """Represent a hole section in a sparse file.
2110 """
2111 pass
2112
2113class _ringbuffer(list):
2114 """Ringbuffer class which increases performance
2115 over a regular list.
2116 """
2117 def __init__(self):
2118 self.idx = 0
2119 def find(self, offset):
2120 idx = self.idx
2121 while True:
2122 item = self[idx]
2123 if offset in item:
2124 break
2125 idx += 1
2126 if idx == len(self):
2127 idx = 0
2128 if idx == self.idx:
2129 # End of File
2130 return None
2131 self.idx = idx
2132 return item
2133
2134#---------------------------------------------
2135# zipfile compatible TarFile class
2136#---------------------------------------------
2137TAR_PLAIN = 0 # zipfile.ZIP_STORED
2138TAR_GZIPPED = 8 # zipfile.ZIP_DEFLATED
2139class TarFileCompat:
2140 """TarFile class compatible with standard module zipfile's
2141 ZipFile class.
2142 """
2143 def __init__(self, file, mode="r", compression=TAR_PLAIN):
2144 if compression == TAR_PLAIN:
2145 self.tarfile = TarFile.taropen(file, mode)
2146 elif compression == TAR_GZIPPED:
2147 self.tarfile = TarFile.gzopen(file, mode)
2148 else:
Georg Brandle4751e32006-05-18 06:11:19 +00002149 raise ValueError("unknown compression constant")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002150 if mode[0:1] == "r":
2151 members = self.tarfile.getmembers()
Raymond Hettingera1d09e22005-09-11 16:34:05 +00002152 for m in members:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002153 m.filename = m.name
2154 m.file_size = m.size
2155 m.date_time = time.gmtime(m.mtime)[:6]
2156 def namelist(self):
2157 return map(lambda m: m.name, self.infolist())
2158 def infolist(self):
2159 return filter(lambda m: m.type in REGULAR_TYPES,
2160 self.tarfile.getmembers())
2161 def printdir(self):
2162 self.tarfile.list()
2163 def testzip(self):
2164 return
2165 def getinfo(self, name):
2166 return self.tarfile.getmember(name)
2167 def read(self, name):
2168 return self.tarfile.extractfile(self.tarfile.getmember(name)).read()
2169 def write(self, filename, arcname=None, compress_type=None):
2170 self.tarfile.add(filename, arcname)
2171 def writestr(self, zinfo, bytes):
Raymond Hettingera6172712004-12-31 19:15:26 +00002172 try:
2173 from cStringIO import StringIO
2174 except ImportError:
2175 from StringIO import StringIO
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002176 import calendar
2177 zinfo.name = zinfo.filename
2178 zinfo.size = zinfo.file_size
2179 zinfo.mtime = calendar.timegm(zinfo.date_time)
Raymond Hettingera6172712004-12-31 19:15:26 +00002180 self.tarfile.addfile(zinfo, StringIO(bytes))
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002181 def close(self):
2182 self.tarfile.close()
2183#class TarFileCompat
2184
2185#--------------------
2186# exported functions
2187#--------------------
2188def is_tarfile(name):
2189 """Return True if name points to a tar archive that we
2190 are able to handle, else return False.
2191 """
2192 try:
2193 t = open(name)
2194 t.close()
2195 return True
2196 except TarError:
2197 return False
2198
2199open = TarFile.open