blob: 94fdcb02768493621db98ec7325ac24cc4753ec6 [file] [log] [blame]
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001#!/usr/bin/env python
2# -*- coding: iso-8859-1 -*-
3#-------------------------------------------------------------------
4# tarfile.py
5#-------------------------------------------------------------------
6# Copyright (C) 2002 Lars Gustäbel <lars@gustaebel.de>
7# All rights reserved.
8#
9# Permission is hereby granted, free of charge, to any person
10# obtaining a copy of this software and associated documentation
11# files (the "Software"), to deal in the Software without
12# restriction, including without limitation the rights to use,
13# copy, modify, merge, publish, distribute, sublicense, and/or sell
14# copies of the Software, and to permit persons to whom the
15# Software is furnished to do so, subject to the following
16# conditions:
17#
18# The above copyright notice and this permission notice shall be
19# included in all copies or substantial portions of the Software.
20#
21# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
22# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
23# OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
24# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
25# HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
26# WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
27# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
28# OTHER DEALINGS IN THE SOFTWARE.
29#
30"""Read from and write to tar format archives.
31"""
32
33__version__ = "$Revision$"
34# $Source$
35
Georg Brandl38c6a222006-05-10 16:26:03 +000036version = "0.8.0"
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +000037__author__ = "Lars Gustäbel (lars@gustaebel.de)"
38__date__ = "$Date$"
39__cvsid__ = "$Id$"
40__credits__ = "Gustavo Niemeyer, Niels Gustäbel, Richard Townsend."
41
42#---------
43# Imports
44#---------
45import sys
46import os
47import shutil
48import stat
49import errno
50import time
51import struct
Georg Brandl2527f7f2006-10-29 09:16:15 +000052import copy
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +000053
Jack Jansencfc49022003-03-07 13:37:32 +000054if sys.platform == 'mac':
55 # This module needs work for MacOS9, especially in the area of pathname
56 # handling. In many places it is assumed a simple substitution of / by the
57 # local os.path.sep is good enough to convert pathnames, but this does not
58 # work with the mac rooted:path:name versus :nonrooted:path:name syntax
59 raise ImportError, "tarfile does not work for platform==mac"
60
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +000061try:
62 import grp, pwd
63except ImportError:
64 grp = pwd = None
65
66# from tarfile import *
67__all__ = ["TarFile", "TarInfo", "is_tarfile", "TarError"]
68
69#---------------------------------------------------------
70# tar constants
71#---------------------------------------------------------
72NUL = "\0" # the null character
73BLOCKSIZE = 512 # length of processing blocks
74RECORDSIZE = BLOCKSIZE * 20 # length of records
75MAGIC = "ustar" # magic tar string
76VERSION = "00" # version number
77
78LENGTH_NAME = 100 # maximum length of a filename
79LENGTH_LINK = 100 # maximum length of a linkname
80LENGTH_PREFIX = 155 # maximum length of the prefix field
81MAXSIZE_MEMBER = 077777777777L # maximum size of a file (11 octal digits)
82
83REGTYPE = "0" # regular file
84AREGTYPE = "\0" # regular file
85LNKTYPE = "1" # link (inside tarfile)
86SYMTYPE = "2" # symbolic link
87CHRTYPE = "3" # character special device
88BLKTYPE = "4" # block special device
89DIRTYPE = "5" # directory
90FIFOTYPE = "6" # fifo special device
91CONTTYPE = "7" # contiguous file
92
93GNUTYPE_LONGNAME = "L" # GNU tar extension for longnames
94GNUTYPE_LONGLINK = "K" # GNU tar extension for longlink
95GNUTYPE_SPARSE = "S" # GNU tar extension for sparse file
96
97#---------------------------------------------------------
98# tarfile constants
99#---------------------------------------------------------
100SUPPORTED_TYPES = (REGTYPE, AREGTYPE, LNKTYPE, # file types that tarfile
101 SYMTYPE, DIRTYPE, FIFOTYPE, # can cope with.
102 CONTTYPE, CHRTYPE, BLKTYPE,
103 GNUTYPE_LONGNAME, GNUTYPE_LONGLINK,
104 GNUTYPE_SPARSE)
105
106REGULAR_TYPES = (REGTYPE, AREGTYPE, # file types that somehow
107 CONTTYPE, GNUTYPE_SPARSE) # represent regular files
108
109#---------------------------------------------------------
110# Bits used in the mode field, values in octal.
111#---------------------------------------------------------
112S_IFLNK = 0120000 # symbolic link
113S_IFREG = 0100000 # regular file
114S_IFBLK = 0060000 # block device
115S_IFDIR = 0040000 # directory
116S_IFCHR = 0020000 # character device
117S_IFIFO = 0010000 # fifo
118
119TSUID = 04000 # set UID on execution
120TSGID = 02000 # set GID on execution
121TSVTX = 01000 # reserved
122
123TUREAD = 0400 # read by owner
124TUWRITE = 0200 # write by owner
125TUEXEC = 0100 # execute/search by owner
126TGREAD = 0040 # read by group
127TGWRITE = 0020 # write by group
128TGEXEC = 0010 # execute/search by group
129TOREAD = 0004 # read by other
130TOWRITE = 0002 # write by other
131TOEXEC = 0001 # execute/search by other
132
133#---------------------------------------------------------
134# Some useful functions
135#---------------------------------------------------------
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000136
Georg Brandl38c6a222006-05-10 16:26:03 +0000137def stn(s, length):
138 """Convert a python string to a null-terminated string buffer.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000139 """
Georg Brandlee23f4b2006-10-24 16:54:23 +0000140 return s[:length] + (length - len(s)) * NUL
Georg Brandl38c6a222006-05-10 16:26:03 +0000141
142def nti(s):
143 """Convert a number field to a python number.
144 """
145 # There are two possible encodings for a number field, see
146 # itn() below.
147 if s[0] != chr(0200):
Georg Brandl58bf57f2006-10-12 12:03:11 +0000148 n = int(s.rstrip(NUL + " ") or "0", 8)
Georg Brandl38c6a222006-05-10 16:26:03 +0000149 else:
150 n = 0L
151 for i in xrange(len(s) - 1):
152 n <<= 8
153 n += ord(s[i + 1])
154 return n
155
156def itn(n, digits=8, posix=False):
157 """Convert a python number to a number field.
158 """
159 # POSIX 1003.1-1988 requires numbers to be encoded as a string of
160 # octal digits followed by a null-byte, this allows values up to
161 # (8**(digits-1))-1. GNU tar allows storing numbers greater than
162 # that if necessary. A leading 0200 byte indicates this particular
163 # encoding, the following digits-1 bytes are a big-endian
164 # representation. This allows values up to (256**(digits-1))-1.
165 if 0 <= n < 8 ** (digits - 1):
166 s = "%0*o" % (digits - 1, n) + NUL
167 else:
168 if posix:
Georg Brandle4751e32006-05-18 06:11:19 +0000169 raise ValueError("overflow in number field")
Georg Brandl38c6a222006-05-10 16:26:03 +0000170
171 if n < 0:
172 # XXX We mimic GNU tar's behaviour with negative numbers,
173 # this could raise OverflowError.
174 n = struct.unpack("L", struct.pack("l", n))[0]
175
176 s = ""
177 for i in xrange(digits - 1):
178 s = chr(n & 0377) + s
179 n >>= 8
180 s = chr(0200) + s
181 return s
182
183def calc_chksums(buf):
184 """Calculate the checksum for a member's header by summing up all
185 characters except for the chksum field which is treated as if
186 it was filled with spaces. According to the GNU tar sources,
187 some tars (Sun and NeXT) calculate chksum with signed char,
188 which will be different if there are chars in the buffer with
189 the high bit set. So we calculate two checksums, unsigned and
190 signed.
191 """
192 unsigned_chksum = 256 + sum(struct.unpack("148B", buf[:148]) + struct.unpack("356B", buf[156:512]))
193 signed_chksum = 256 + sum(struct.unpack("148b", buf[:148]) + struct.unpack("356b", buf[156:512]))
194 return unsigned_chksum, signed_chksum
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000195
196def copyfileobj(src, dst, length=None):
197 """Copy length bytes from fileobj src to fileobj dst.
198 If length is None, copy the entire content.
199 """
200 if length == 0:
201 return
202 if length is None:
203 shutil.copyfileobj(src, dst)
204 return
205
206 BUFSIZE = 16 * 1024
207 blocks, remainder = divmod(length, BUFSIZE)
208 for b in xrange(blocks):
209 buf = src.read(BUFSIZE)
210 if len(buf) < BUFSIZE:
Georg Brandle4751e32006-05-18 06:11:19 +0000211 raise IOError("end of file reached")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000212 dst.write(buf)
213
214 if remainder != 0:
215 buf = src.read(remainder)
216 if len(buf) < remainder:
Georg Brandle4751e32006-05-18 06:11:19 +0000217 raise IOError("end of file reached")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000218 dst.write(buf)
219 return
220
221filemode_table = (
Andrew M. Kuchling8bc462f2004-10-20 11:48:42 +0000222 ((S_IFLNK, "l"),
223 (S_IFREG, "-"),
224 (S_IFBLK, "b"),
225 (S_IFDIR, "d"),
226 (S_IFCHR, "c"),
227 (S_IFIFO, "p")),
228
229 ((TUREAD, "r"),),
230 ((TUWRITE, "w"),),
231 ((TUEXEC|TSUID, "s"),
232 (TSUID, "S"),
233 (TUEXEC, "x")),
234
235 ((TGREAD, "r"),),
236 ((TGWRITE, "w"),),
237 ((TGEXEC|TSGID, "s"),
238 (TSGID, "S"),
239 (TGEXEC, "x")),
240
241 ((TOREAD, "r"),),
242 ((TOWRITE, "w"),),
243 ((TOEXEC|TSVTX, "t"),
244 (TSVTX, "T"),
245 (TOEXEC, "x"))
246)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000247
248def filemode(mode):
249 """Convert a file's mode to a string of the form
250 -rwxrwxrwx.
251 Used by TarFile.list()
252 """
Andrew M. Kuchling8bc462f2004-10-20 11:48:42 +0000253 perm = []
254 for table in filemode_table:
255 for bit, char in table:
256 if mode & bit == bit:
257 perm.append(char)
258 break
259 else:
260 perm.append("-")
261 return "".join(perm)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000262
263if os.sep != "/":
264 normpath = lambda path: os.path.normpath(path).replace(os.sep, "/")
265else:
266 normpath = os.path.normpath
267
268class TarError(Exception):
269 """Base exception."""
270 pass
271class ExtractError(TarError):
272 """General exception for extract errors."""
273 pass
274class ReadError(TarError):
275 """Exception for unreadble tar archives."""
276 pass
277class CompressionError(TarError):
278 """Exception for unavailable compression methods."""
279 pass
280class StreamError(TarError):
281 """Exception for unsupported operations on stream-like TarFiles."""
282 pass
283
284#---------------------------
285# internal stream interface
286#---------------------------
287class _LowLevelFile:
288 """Low-level file object. Supports reading and writing.
289 It is used instead of a regular file object for streaming
290 access.
291 """
292
293 def __init__(self, name, mode):
294 mode = {
295 "r": os.O_RDONLY,
296 "w": os.O_WRONLY | os.O_CREAT | os.O_TRUNC,
297 }[mode]
298 if hasattr(os, "O_BINARY"):
299 mode |= os.O_BINARY
300 self.fd = os.open(name, mode)
301
302 def close(self):
303 os.close(self.fd)
304
305 def read(self, size):
306 return os.read(self.fd, size)
307
308 def write(self, s):
309 os.write(self.fd, s)
310
311class _Stream:
312 """Class that serves as an adapter between TarFile and
313 a stream-like object. The stream-like object only
314 needs to have a read() or write() method and is accessed
315 blockwise. Use of gzip or bzip2 compression is possible.
316 A stream-like object could be for example: sys.stdin,
317 sys.stdout, a socket, a tape device etc.
318
319 _Stream is intended to be used only internally.
320 """
321
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000322 def __init__(self, name, mode, comptype, fileobj, bufsize):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000323 """Construct a _Stream object.
324 """
325 self._extfileobj = True
326 if fileobj is None:
327 fileobj = _LowLevelFile(name, mode)
328 self._extfileobj = False
329
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000330 if comptype == '*':
331 # Enable transparent compression detection for the
332 # stream interface
333 fileobj = _StreamProxy(fileobj)
334 comptype = fileobj.getcomptype()
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000335
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000336 self.name = name or ""
337 self.mode = mode
338 self.comptype = comptype
339 self.fileobj = fileobj
340 self.bufsize = bufsize
341 self.buf = ""
342 self.pos = 0L
343 self.closed = False
344
345 if comptype == "gz":
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000346 try:
347 import zlib
348 except ImportError:
Georg Brandle4751e32006-05-18 06:11:19 +0000349 raise CompressionError("zlib module is not available")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000350 self.zlib = zlib
351 self.crc = zlib.crc32("")
352 if mode == "r":
353 self._init_read_gz()
354 else:
355 self._init_write_gz()
356
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000357 if comptype == "bz2":
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000358 try:
359 import bz2
360 except ImportError:
Georg Brandle4751e32006-05-18 06:11:19 +0000361 raise CompressionError("bz2 module is not available")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000362 if mode == "r":
363 self.dbuf = ""
364 self.cmp = bz2.BZ2Decompressor()
365 else:
366 self.cmp = bz2.BZ2Compressor()
367
368 def __del__(self):
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000369 if hasattr(self, "closed") and not self.closed:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000370 self.close()
371
372 def _init_write_gz(self):
373 """Initialize for writing with gzip compression.
374 """
375 self.cmp = self.zlib.compressobj(9, self.zlib.DEFLATED,
376 -self.zlib.MAX_WBITS,
377 self.zlib.DEF_MEM_LEVEL,
378 0)
379 timestamp = struct.pack("<L", long(time.time()))
380 self.__write("\037\213\010\010%s\002\377" % timestamp)
381 if self.name.endswith(".gz"):
382 self.name = self.name[:-3]
383 self.__write(self.name + NUL)
384
385 def write(self, s):
386 """Write string s to the stream.
387 """
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000388 if self.comptype == "gz":
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000389 self.crc = self.zlib.crc32(s, self.crc)
390 self.pos += len(s)
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000391 if self.comptype != "tar":
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000392 s = self.cmp.compress(s)
393 self.__write(s)
394
395 def __write(self, s):
396 """Write string s to the stream if a whole new block
397 is ready to be written.
398 """
399 self.buf += s
400 while len(self.buf) > self.bufsize:
401 self.fileobj.write(self.buf[:self.bufsize])
402 self.buf = self.buf[self.bufsize:]
403
404 def close(self):
405 """Close the _Stream object. No operation should be
406 done on it afterwards.
407 """
408 if self.closed:
409 return
410
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000411 if self.mode == "w" and self.comptype != "tar":
Martin v. Löwisc234a522004-08-22 21:28:33 +0000412 self.buf += self.cmp.flush()
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000413
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000414 if self.mode == "w" and self.buf:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000415 self.fileobj.write(self.buf)
416 self.buf = ""
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000417 if self.comptype == "gz":
Tim Petersa05f6e22006-08-02 05:20:08 +0000418 # The native zlib crc is an unsigned 32-bit integer, but
419 # the Python wrapper implicitly casts that to a signed C
420 # long. So, on a 32-bit box self.crc may "look negative",
421 # while the same crc on a 64-bit box may "look positive".
422 # To avoid irksome warnings from the `struct` module, force
423 # it to look positive on all boxes.
424 self.fileobj.write(struct.pack("<L", self.crc & 0xffffffffL))
Andrew M. Kuchling10a44492003-10-24 17:38:34 +0000425 self.fileobj.write(struct.pack("<L", self.pos & 0xffffFFFFL))
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000426
427 if not self._extfileobj:
428 self.fileobj.close()
429
430 self.closed = True
431
432 def _init_read_gz(self):
433 """Initialize for reading a gzip compressed fileobj.
434 """
435 self.cmp = self.zlib.decompressobj(-self.zlib.MAX_WBITS)
436 self.dbuf = ""
437
438 # taken from gzip.GzipFile with some alterations
439 if self.__read(2) != "\037\213":
Georg Brandle4751e32006-05-18 06:11:19 +0000440 raise ReadError("not a gzip file")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000441 if self.__read(1) != "\010":
Georg Brandle4751e32006-05-18 06:11:19 +0000442 raise CompressionError("unsupported compression method")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000443
444 flag = ord(self.__read(1))
445 self.__read(6)
446
447 if flag & 4:
448 xlen = ord(self.__read(1)) + 256 * ord(self.__read(1))
449 self.read(xlen)
450 if flag & 8:
451 while True:
452 s = self.__read(1)
453 if not s or s == NUL:
454 break
455 if flag & 16:
456 while True:
457 s = self.__read(1)
458 if not s or s == NUL:
459 break
460 if flag & 2:
461 self.__read(2)
462
463 def tell(self):
464 """Return the stream's file pointer position.
465 """
466 return self.pos
467
468 def seek(self, pos=0):
469 """Set the stream's file pointer to pos. Negative seeking
470 is forbidden.
471 """
472 if pos - self.pos >= 0:
473 blocks, remainder = divmod(pos - self.pos, self.bufsize)
474 for i in xrange(blocks):
475 self.read(self.bufsize)
476 self.read(remainder)
477 else:
Georg Brandle4751e32006-05-18 06:11:19 +0000478 raise StreamError("seeking backwards is not allowed")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000479 return self.pos
480
481 def read(self, size=None):
482 """Return the next size number of bytes from the stream.
483 If size is not defined, return all bytes of the stream
484 up to EOF.
485 """
486 if size is None:
487 t = []
488 while True:
489 buf = self._read(self.bufsize)
490 if not buf:
491 break
492 t.append(buf)
493 buf = "".join(t)
494 else:
495 buf = self._read(size)
496 self.pos += len(buf)
497 return buf
498
499 def _read(self, size):
500 """Return size bytes from the stream.
501 """
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000502 if self.comptype == "tar":
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000503 return self.__read(size)
504
505 c = len(self.dbuf)
506 t = [self.dbuf]
507 while c < size:
508 buf = self.__read(self.bufsize)
509 if not buf:
510 break
511 buf = self.cmp.decompress(buf)
512 t.append(buf)
513 c += len(buf)
514 t = "".join(t)
515 self.dbuf = t[size:]
516 return t[:size]
517
518 def __read(self, size):
519 """Return size bytes from stream. If internal buffer is empty,
520 read another block from the stream.
521 """
522 c = len(self.buf)
523 t = [self.buf]
524 while c < size:
525 buf = self.fileobj.read(self.bufsize)
526 if not buf:
527 break
528 t.append(buf)
529 c += len(buf)
530 t = "".join(t)
531 self.buf = t[size:]
532 return t[:size]
533# class _Stream
534
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000535class _StreamProxy(object):
536 """Small proxy class that enables transparent compression
537 detection for the Stream interface (mode 'r|*').
538 """
539
540 def __init__(self, fileobj):
541 self.fileobj = fileobj
542 self.buf = self.fileobj.read(BLOCKSIZE)
543
544 def read(self, size):
545 self.read = self.fileobj.read
546 return self.buf
547
548 def getcomptype(self):
549 if self.buf.startswith("\037\213\010"):
550 return "gz"
551 if self.buf.startswith("BZh91"):
552 return "bz2"
553 return "tar"
554
555 def close(self):
556 self.fileobj.close()
557# class StreamProxy
558
Georg Brandl49c8f4c2006-05-15 19:30:35 +0000559class _BZ2Proxy(object):
560 """Small proxy class that enables external file object
561 support for "r:bz2" and "w:bz2" modes. This is actually
562 a workaround for a limitation in bz2 module's BZ2File
563 class which (unlike gzip.GzipFile) has no support for
564 a file object argument.
565 """
566
567 blocksize = 16 * 1024
568
569 def __init__(self, fileobj, mode):
570 self.fileobj = fileobj
571 self.mode = mode
572 self.init()
573
574 def init(self):
575 import bz2
576 self.pos = 0
577 if self.mode == "r":
578 self.bz2obj = bz2.BZ2Decompressor()
579 self.fileobj.seek(0)
580 self.buf = ""
581 else:
582 self.bz2obj = bz2.BZ2Compressor()
583
584 def read(self, size):
585 b = [self.buf]
586 x = len(self.buf)
587 while x < size:
588 try:
589 raw = self.fileobj.read(self.blocksize)
590 data = self.bz2obj.decompress(raw)
591 b.append(data)
592 except EOFError:
593 break
594 x += len(data)
595 self.buf = "".join(b)
596
597 buf = self.buf[:size]
598 self.buf = self.buf[size:]
599 self.pos += len(buf)
600 return buf
601
602 def seek(self, pos):
603 if pos < self.pos:
604 self.init()
605 self.read(pos - self.pos)
606
607 def tell(self):
608 return self.pos
609
610 def write(self, data):
611 self.pos += len(data)
612 raw = self.bz2obj.compress(data)
613 self.fileobj.write(raw)
614
615 def close(self):
616 if self.mode == "w":
617 raw = self.bz2obj.flush()
618 self.fileobj.write(raw)
Georg Brandle8953182006-05-27 14:02:03 +0000619 self.fileobj.close()
Georg Brandl49c8f4c2006-05-15 19:30:35 +0000620# class _BZ2Proxy
621
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000622#------------------------
623# Extraction file object
624#------------------------
Lars Gustäbelaedb92e2006-12-23 16:51:47 +0000625class _FileInFile(object):
626 """A thin wrapper around an existing file object that
627 provides a part of its data as an individual file
628 object.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000629 """
630
Lars Gustäbelaedb92e2006-12-23 16:51:47 +0000631 def __init__(self, fileobj, offset, size, sparse=None):
632 self.fileobj = fileobj
633 self.offset = offset
634 self.size = size
635 self.sparse = sparse
636 self.position = 0
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000637
Lars Gustäbelaedb92e2006-12-23 16:51:47 +0000638 def tell(self):
639 """Return the current file position.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000640 """
Lars Gustäbelaedb92e2006-12-23 16:51:47 +0000641 return self.position
642
643 def seek(self, position):
644 """Seek to a position in the file.
645 """
646 self.position = position
647
648 def read(self, size=None):
649 """Read data from the file.
650 """
651 if size is None:
652 size = self.size - self.position
653 else:
654 size = min(size, self.size - self.position)
655
656 if self.sparse is None:
657 return self.readnormal(size)
658 else:
659 return self.readsparse(size)
660
661 def readnormal(self, size):
662 """Read operation for regular files.
663 """
664 self.fileobj.seek(self.offset + self.position)
665 self.position += size
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000666 return self.fileobj.read(size)
667
Lars Gustäbelaedb92e2006-12-23 16:51:47 +0000668 def readsparse(self, size):
669 """Read operation for sparse files.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000670 """
Lars Gustäbelaedb92e2006-12-23 16:51:47 +0000671 data = []
672 while size > 0:
673 buf = self.readsparsesection(size)
674 if not buf:
675 break
676 size -= len(buf)
677 data.append(buf)
678 return "".join(data)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000679
Lars Gustäbelaedb92e2006-12-23 16:51:47 +0000680 def readsparsesection(self, size):
681 """Read a single section of a sparse file.
682 """
683 section = self.sparse.find(self.position)
684
685 if section is None:
686 return ""
687
688 size = min(size, section.offset + section.size - self.position)
689
690 if isinstance(section, _data):
691 realpos = section.realpos + self.position - section.offset
692 self.fileobj.seek(self.offset + realpos)
693 self.position += size
694 return self.fileobj.read(size)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000695 else:
Lars Gustäbelaedb92e2006-12-23 16:51:47 +0000696 self.position += size
697 return NUL * size
698#class _FileInFile
699
700
701class ExFileObject(object):
702 """File-like object for reading an archive member.
703 Is returned by TarFile.extractfile().
704 """
705 blocksize = 1024
706
707 def __init__(self, tarfile, tarinfo):
708 self.fileobj = _FileInFile(tarfile.fileobj,
709 tarinfo.offset_data,
710 tarinfo.size,
711 getattr(tarinfo, "sparse", None))
712 self.name = tarinfo.name
713 self.mode = "r"
714 self.closed = False
715 self.size = tarinfo.size
716
717 self.position = 0
718 self.buffer = ""
719
720 def read(self, size=None):
721 """Read at most size bytes from the file. If size is not
722 present or None, read all data until EOF is reached.
723 """
724 if self.closed:
725 raise ValueError("I/O operation on closed file")
726
727 buf = ""
728 if self.buffer:
729 if size is None:
730 buf = self.buffer
731 self.buffer = ""
732 else:
733 buf = self.buffer[:size]
734 self.buffer = self.buffer[size:]
735
736 if size is None:
737 buf += self.fileobj.read()
738 else:
739 buf += self.fileobj.read(size - len(buf))
740
741 self.position += len(buf)
742 return buf
743
744 def readline(self, size=-1):
745 """Read one entire line from the file. If size is present
746 and non-negative, return a string with at most that
747 size, which may be an incomplete line.
748 """
749 if self.closed:
750 raise ValueError("I/O operation on closed file")
751
752 if "\n" in self.buffer:
753 pos = self.buffer.find("\n") + 1
754 else:
755 buffers = [self.buffer]
756 while True:
757 buf = self.fileobj.read(self.blocksize)
758 buffers.append(buf)
759 if not buf or "\n" in buf:
760 self.buffer = "".join(buffers)
761 pos = self.buffer.find("\n") + 1
762 if pos == 0:
763 # no newline found.
764 pos = len(self.buffer)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000765 break
Lars Gustäbelaedb92e2006-12-23 16:51:47 +0000766
767 if size != -1:
768 pos = min(size, pos)
769
770 buf = self.buffer[:pos]
771 self.buffer = self.buffer[pos:]
772 self.position += len(buf)
773 return buf
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000774
775 def readlines(self):
Lars Gustäbelaedb92e2006-12-23 16:51:47 +0000776 """Return a list with all remaining lines.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000777 """
778 result = []
779 while True:
780 line = self.readline()
781 if not line: break
782 result.append(line)
783 return result
784
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000785 def tell(self):
786 """Return the current file position.
787 """
Lars Gustäbelaedb92e2006-12-23 16:51:47 +0000788 if self.closed:
789 raise ValueError("I/O operation on closed file")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000790
Lars Gustäbelaedb92e2006-12-23 16:51:47 +0000791 return self.position
792
793 def seek(self, pos, whence=os.SEEK_SET):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000794 """Seek to a position in the file.
795 """
Lars Gustäbelaedb92e2006-12-23 16:51:47 +0000796 if self.closed:
797 raise ValueError("I/O operation on closed file")
798
799 if whence == os.SEEK_SET:
800 self.position = min(max(pos, 0), self.size)
801 elif whence == os.SEEK_CUR:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000802 if pos < 0:
Lars Gustäbelaedb92e2006-12-23 16:51:47 +0000803 self.position = max(self.position + pos, 0)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000804 else:
Lars Gustäbelaedb92e2006-12-23 16:51:47 +0000805 self.position = min(self.position + pos, self.size)
806 elif whence == os.SEEK_END:
807 self.position = max(min(self.size + pos, self.size), 0)
808 else:
809 raise ValueError("Invalid argument")
810
811 self.buffer = ""
812 self.fileobj.seek(self.position)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000813
814 def close(self):
815 """Close the file object.
816 """
817 self.closed = True
Martin v. Löwisdf241532005-03-03 08:17:42 +0000818
819 def __iter__(self):
Lars Gustäbelaedb92e2006-12-23 16:51:47 +0000820 """Get an iterator over the file's lines.
Martin v. Löwisdf241532005-03-03 08:17:42 +0000821 """
Lars Gustäbelaedb92e2006-12-23 16:51:47 +0000822 while True:
823 line = self.readline()
824 if not line:
825 break
826 yield line
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000827#class ExFileObject
828
829#------------------
830# Exported Classes
831#------------------
832class TarInfo(object):
833 """Informational class which holds the details about an
834 archive member given by a tar header block.
835 TarInfo objects are returned by TarFile.getmember(),
836 TarFile.getmembers() and TarFile.gettarinfo() and are
837 usually created internally.
838 """
839
840 def __init__(self, name=""):
841 """Construct a TarInfo object. name is the optional name
842 of the member.
843 """
Georg Brandl38c6a222006-05-10 16:26:03 +0000844 self.name = name # member name (dirnames must end with '/')
845 self.mode = 0666 # file permissions
846 self.uid = 0 # user id
847 self.gid = 0 # group id
848 self.size = 0 # file size
849 self.mtime = 0 # modification time
850 self.chksum = 0 # header checksum
851 self.type = REGTYPE # member type
852 self.linkname = "" # link name
853 self.uname = "user" # user name
854 self.gname = "group" # group name
855 self.devmajor = 0 # device major number
856 self.devminor = 0 # device minor number
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000857
Georg Brandl38c6a222006-05-10 16:26:03 +0000858 self.offset = 0 # the tar header starts here
859 self.offset_data = 0 # the file's data starts here
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000860
861 def __repr__(self):
862 return "<%s %r at %#x>" % (self.__class__.__name__,self.name,id(self))
863
Guido van Rossum75b64e62005-01-16 00:16:11 +0000864 @classmethod
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000865 def frombuf(cls, buf):
866 """Construct a TarInfo object from a 512 byte string buffer.
867 """
Georg Brandl38c6a222006-05-10 16:26:03 +0000868 if len(buf) != BLOCKSIZE:
Georg Brandle4751e32006-05-18 06:11:19 +0000869 raise ValueError("truncated header")
Georg Brandl38c6a222006-05-10 16:26:03 +0000870 if buf.count(NUL) == BLOCKSIZE:
Georg Brandle4751e32006-05-18 06:11:19 +0000871 raise ValueError("empty header")
Georg Brandl38c6a222006-05-10 16:26:03 +0000872
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000873 tarinfo = cls()
Georg Brandl38c6a222006-05-10 16:26:03 +0000874 tarinfo.buf = buf
Georg Brandle8953182006-05-27 14:02:03 +0000875 tarinfo.name = buf[0:100].rstrip(NUL)
Georg Brandl38c6a222006-05-10 16:26:03 +0000876 tarinfo.mode = nti(buf[100:108])
877 tarinfo.uid = nti(buf[108:116])
878 tarinfo.gid = nti(buf[116:124])
879 tarinfo.size = nti(buf[124:136])
880 tarinfo.mtime = nti(buf[136:148])
881 tarinfo.chksum = nti(buf[148:156])
882 tarinfo.type = buf[156:157]
Georg Brandle8953182006-05-27 14:02:03 +0000883 tarinfo.linkname = buf[157:257].rstrip(NUL)
884 tarinfo.uname = buf[265:297].rstrip(NUL)
885 tarinfo.gname = buf[297:329].rstrip(NUL)
Georg Brandl38c6a222006-05-10 16:26:03 +0000886 tarinfo.devmajor = nti(buf[329:337])
887 tarinfo.devminor = nti(buf[337:345])
Georg Brandl2527f7f2006-10-29 09:16:15 +0000888 prefix = buf[345:500].rstrip(NUL)
889
890 if prefix and not tarinfo.issparse():
891 tarinfo.name = prefix + "/" + tarinfo.name
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000892
Georg Brandl38c6a222006-05-10 16:26:03 +0000893 if tarinfo.chksum not in calc_chksums(buf):
Georg Brandle4751e32006-05-18 06:11:19 +0000894 raise ValueError("invalid header")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000895 return tarinfo
896
Georg Brandl38c6a222006-05-10 16:26:03 +0000897 def tobuf(self, posix=False):
Georg Brandl2527f7f2006-10-29 09:16:15 +0000898 """Return a tar header as a string of 512 byte blocks.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000899 """
Georg Brandl2527f7f2006-10-29 09:16:15 +0000900 buf = ""
901 type = self.type
902 prefix = ""
903
904 if self.name.endswith("/"):
905 type = DIRTYPE
906
Georg Brandl25f58f62006-12-06 22:21:23 +0000907 if type in (GNUTYPE_LONGNAME, GNUTYPE_LONGLINK):
908 # Prevent "././@LongLink" from being normalized.
909 name = self.name
910 else:
911 name = normpath(self.name)
Georg Brandl2527f7f2006-10-29 09:16:15 +0000912
913 if type == DIRTYPE:
914 # directories should end with '/'
915 name += "/"
916
917 linkname = self.linkname
918 if linkname:
919 # if linkname is empty we end up with a '.'
920 linkname = normpath(linkname)
921
922 if posix:
923 if self.size > MAXSIZE_MEMBER:
924 raise ValueError("file is too large (>= 8 GB)")
925
926 if len(self.linkname) > LENGTH_LINK:
927 raise ValueError("linkname is too long (>%d)" % (LENGTH_LINK))
928
929 if len(name) > LENGTH_NAME:
930 prefix = name[:LENGTH_PREFIX + 1]
931 while prefix and prefix[-1] != "/":
932 prefix = prefix[:-1]
933
934 name = name[len(prefix):]
935 prefix = prefix[:-1]
936
937 if not prefix or len(name) > LENGTH_NAME:
938 raise ValueError("name is too long")
939
940 else:
941 if len(self.linkname) > LENGTH_LINK:
942 buf += self._create_gnulong(self.linkname, GNUTYPE_LONGLINK)
943
944 if len(name) > LENGTH_NAME:
945 buf += self._create_gnulong(name, GNUTYPE_LONGNAME)
946
Georg Brandl38c6a222006-05-10 16:26:03 +0000947 parts = [
Georg Brandl2527f7f2006-10-29 09:16:15 +0000948 stn(name, 100),
Georg Brandl38c6a222006-05-10 16:26:03 +0000949 itn(self.mode & 07777, 8, posix),
950 itn(self.uid, 8, posix),
951 itn(self.gid, 8, posix),
952 itn(self.size, 12, posix),
953 itn(self.mtime, 12, posix),
954 " ", # checksum field
Georg Brandl2527f7f2006-10-29 09:16:15 +0000955 type,
Georg Brandl38c6a222006-05-10 16:26:03 +0000956 stn(self.linkname, 100),
957 stn(MAGIC, 6),
958 stn(VERSION, 2),
959 stn(self.uname, 32),
960 stn(self.gname, 32),
961 itn(self.devmajor, 8, posix),
962 itn(self.devminor, 8, posix),
Georg Brandl2527f7f2006-10-29 09:16:15 +0000963 stn(prefix, 155)
Georg Brandl38c6a222006-05-10 16:26:03 +0000964 ]
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000965
Lars Gustäbel8ff1f6a2007-04-21 12:20:09 +0000966 buf += "".join(parts).ljust(BLOCKSIZE, NUL)
Georg Brandl25f58f62006-12-06 22:21:23 +0000967 chksum = calc_chksums(buf[-BLOCKSIZE:])[0]
Georg Brandl2527f7f2006-10-29 09:16:15 +0000968 buf = buf[:-364] + "%06o\0" % chksum + buf[-357:]
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000969 self.buf = buf
970 return buf
971
Georg Brandl2527f7f2006-10-29 09:16:15 +0000972 def _create_gnulong(self, name, type):
973 """Create a GNU longname/longlink header from name.
974 It consists of an extended tar header, with the length
975 of the longname as size, followed by data blocks,
976 which contain the longname as a null terminated string.
977 """
978 name += NUL
979
980 tarinfo = self.__class__()
981 tarinfo.name = "././@LongLink"
982 tarinfo.type = type
983 tarinfo.mode = 0
984 tarinfo.size = len(name)
985
986 # create extended header
987 buf = tarinfo.tobuf()
988 # create name blocks
989 buf += name
990 blocks, remainder = divmod(len(name), BLOCKSIZE)
991 if remainder > 0:
992 buf += (BLOCKSIZE - remainder) * NUL
993 return buf
994
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000995 def isreg(self):
996 return self.type in REGULAR_TYPES
997 def isfile(self):
998 return self.isreg()
999 def isdir(self):
1000 return self.type == DIRTYPE
1001 def issym(self):
1002 return self.type == SYMTYPE
1003 def islnk(self):
1004 return self.type == LNKTYPE
1005 def ischr(self):
1006 return self.type == CHRTYPE
1007 def isblk(self):
1008 return self.type == BLKTYPE
1009 def isfifo(self):
1010 return self.type == FIFOTYPE
1011 def issparse(self):
1012 return self.type == GNUTYPE_SPARSE
1013 def isdev(self):
1014 return self.type in (CHRTYPE, BLKTYPE, FIFOTYPE)
1015# class TarInfo
1016
1017class TarFile(object):
1018 """The TarFile Class provides an interface to tar archives.
1019 """
1020
1021 debug = 0 # May be set from 0 (no msgs) to 3 (all msgs)
1022
1023 dereference = False # If true, add content of linked file to the
1024 # tar file, else the link.
1025
1026 ignore_zeros = False # If true, skips empty or invalid blocks and
1027 # continues processing.
1028
1029 errorlevel = 0 # If 0, fatal errors only appear in debug
1030 # messages (if debug >= 0). If > 0, errors
1031 # are passed to the caller as exceptions.
1032
Martin v. Löwis75b9da42004-08-18 13:57:44 +00001033 posix = False # If True, generates POSIX.1-1990-compliant
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001034 # archives (no GNU extensions!)
1035
1036 fileobject = ExFileObject
1037
1038 def __init__(self, name=None, mode="r", fileobj=None):
1039 """Open an (uncompressed) tar archive `name'. `mode' is either 'r' to
1040 read from an existing archive, 'a' to append data to an existing
1041 file or 'w' to create a new file overwriting an existing one. `mode'
1042 defaults to 'r'.
1043 If `fileobj' is given, it is used for reading or writing data. If it
1044 can be determined, `mode' is overridden by `fileobj's mode.
1045 `fileobj' is not closed, when TarFile is closed.
1046 """
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001047 if len(mode) > 1 or mode not in "raw":
Georg Brandle4751e32006-05-18 06:11:19 +00001048 raise ValueError("mode must be 'r', 'a' or 'w'")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001049 self._mode = mode
1050 self.mode = {"r": "rb", "a": "r+b", "w": "wb"}[mode]
1051
1052 if not fileobj:
Lars Gustäbela9bad982007-08-28 12:33:15 +00001053 fileobj = file(name, self.mode)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001054 self._extfileobj = False
1055 else:
Lars Gustäbela9bad982007-08-28 12:33:15 +00001056 if name is None and hasattr(fileobj, "name"):
1057 name = fileobj.name
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001058 if hasattr(fileobj, "mode"):
1059 self.mode = fileobj.mode
1060 self._extfileobj = True
Lars Gustäbela9bad982007-08-28 12:33:15 +00001061 self.name = os.path.abspath(name) if name else None
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001062 self.fileobj = fileobj
1063
1064 # Init datastructures
Georg Brandl38c6a222006-05-10 16:26:03 +00001065 self.closed = False
1066 self.members = [] # list of members as TarInfo objects
1067 self._loaded = False # flag if all members have been read
Lars Gustäbel7cc9c8b2007-12-01 21:06:06 +00001068 self.offset = self.fileobj.tell()
1069 # current position in the archive file
Georg Brandl38c6a222006-05-10 16:26:03 +00001070 self.inodes = {} # dictionary caching the inodes of
1071 # archive members already added
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001072
1073 if self._mode == "r":
1074 self.firstmember = None
1075 self.firstmember = self.next()
1076
1077 if self._mode == "a":
1078 # Move to the end of the archive,
1079 # before the first empty block.
1080 self.firstmember = None
1081 while True:
1082 try:
1083 tarinfo = self.next()
1084 except ReadError:
1085 self.fileobj.seek(0)
1086 break
1087 if tarinfo is None:
1088 self.fileobj.seek(- BLOCKSIZE, 1)
1089 break
1090
1091 if self._mode in "aw":
1092 self._loaded = True
1093
1094 #--------------------------------------------------------------------------
1095 # Below are the classmethods which act as alternate constructors to the
1096 # TarFile class. The open() method is the only one that is needed for
1097 # public use; it is the "super"-constructor and is able to select an
1098 # adequate "sub"-constructor for a particular compression using the mapping
1099 # from OPEN_METH.
1100 #
1101 # This concept allows one to subclass TarFile without losing the comfort of
1102 # the super-constructor. A sub-constructor is registered and made available
1103 # by adding it to the mapping in OPEN_METH.
1104
Guido van Rossum75b64e62005-01-16 00:16:11 +00001105 @classmethod
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001106 def open(cls, name=None, mode="r", fileobj=None, bufsize=20*512):
1107 """Open a tar archive for reading, writing or appending. Return
1108 an appropriate TarFile class.
1109
1110 mode:
Martin v. Löwis78be7df2005-03-05 12:47:42 +00001111 'r' or 'r:*' open for reading with transparent compression
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001112 'r:' open for reading exclusively uncompressed
1113 'r:gz' open for reading with gzip compression
1114 'r:bz2' open for reading with bzip2 compression
1115 'a' or 'a:' open for appending
1116 'w' or 'w:' open for writing without compression
1117 'w:gz' open for writing with gzip compression
1118 'w:bz2' open for writing with bzip2 compression
Martin v. Löwis78be7df2005-03-05 12:47:42 +00001119
1120 'r|*' open a stream of tar blocks with transparent compression
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001121 'r|' open an uncompressed stream of tar blocks for reading
1122 'r|gz' open a gzip compressed stream of tar blocks
1123 'r|bz2' open a bzip2 compressed stream of tar blocks
1124 'w|' open an uncompressed stream for writing
1125 'w|gz' open a gzip compressed stream for writing
1126 'w|bz2' open a bzip2 compressed stream for writing
1127 """
1128
1129 if not name and not fileobj:
Georg Brandle4751e32006-05-18 06:11:19 +00001130 raise ValueError("nothing to open")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001131
Martin v. Löwis78be7df2005-03-05 12:47:42 +00001132 if mode in ("r", "r:*"):
1133 # Find out which *open() is appropriate for opening the file.
1134 for comptype in cls.OPEN_METH:
1135 func = getattr(cls, cls.OPEN_METH[comptype])
Lars Gustäbelf9a2c632006-12-27 10:36:58 +00001136 if fileobj is not None:
1137 saved_pos = fileobj.tell()
Martin v. Löwis78be7df2005-03-05 12:47:42 +00001138 try:
1139 return func(name, "r", fileobj)
1140 except (ReadError, CompressionError):
Lars Gustäbelf9a2c632006-12-27 10:36:58 +00001141 if fileobj is not None:
1142 fileobj.seek(saved_pos)
Martin v. Löwis78be7df2005-03-05 12:47:42 +00001143 continue
Georg Brandle4751e32006-05-18 06:11:19 +00001144 raise ReadError("file could not be opened successfully")
Martin v. Löwis78be7df2005-03-05 12:47:42 +00001145
1146 elif ":" in mode:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001147 filemode, comptype = mode.split(":", 1)
1148 filemode = filemode or "r"
1149 comptype = comptype or "tar"
1150
1151 # Select the *open() function according to
1152 # given compression.
1153 if comptype in cls.OPEN_METH:
1154 func = getattr(cls, cls.OPEN_METH[comptype])
1155 else:
Georg Brandle4751e32006-05-18 06:11:19 +00001156 raise CompressionError("unknown compression type %r" % comptype)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001157 return func(name, filemode, fileobj)
1158
1159 elif "|" in mode:
1160 filemode, comptype = mode.split("|", 1)
1161 filemode = filemode or "r"
1162 comptype = comptype or "tar"
1163
1164 if filemode not in "rw":
Georg Brandle4751e32006-05-18 06:11:19 +00001165 raise ValueError("mode must be 'r' or 'w'")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001166
1167 t = cls(name, filemode,
1168 _Stream(name, filemode, comptype, fileobj, bufsize))
1169 t._extfileobj = False
1170 return t
1171
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001172 elif mode in "aw":
1173 return cls.taropen(name, mode, fileobj)
1174
Georg Brandle4751e32006-05-18 06:11:19 +00001175 raise ValueError("undiscernible mode")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001176
Guido van Rossum75b64e62005-01-16 00:16:11 +00001177 @classmethod
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001178 def taropen(cls, name, mode="r", fileobj=None):
1179 """Open uncompressed tar archive name for reading or writing.
1180 """
1181 if len(mode) > 1 or mode not in "raw":
Georg Brandle4751e32006-05-18 06:11:19 +00001182 raise ValueError("mode must be 'r', 'a' or 'w'")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001183 return cls(name, mode, fileobj)
1184
Guido van Rossum75b64e62005-01-16 00:16:11 +00001185 @classmethod
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001186 def gzopen(cls, name, mode="r", fileobj=None, compresslevel=9):
1187 """Open gzip compressed tar archive name for reading or writing.
1188 Appending is not allowed.
1189 """
1190 if len(mode) > 1 or mode not in "rw":
Georg Brandle4751e32006-05-18 06:11:19 +00001191 raise ValueError("mode must be 'r' or 'w'")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001192
1193 try:
1194 import gzip
Neal Norwitz4ec68242003-04-11 03:05:56 +00001195 gzip.GzipFile
1196 except (ImportError, AttributeError):
Georg Brandle4751e32006-05-18 06:11:19 +00001197 raise CompressionError("gzip module is not available")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001198
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001199 if fileobj is None:
1200 fileobj = file(name, mode + "b")
1201
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001202 try:
Lars Gustäbel12e087a2006-12-23 18:13:57 +00001203 t = cls.taropen(name, mode,
1204 gzip.GzipFile(name, mode, compresslevel, fileobj))
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001205 except IOError:
Georg Brandle4751e32006-05-18 06:11:19 +00001206 raise ReadError("not a gzip file")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001207 t._extfileobj = False
1208 return t
1209
Guido van Rossum75b64e62005-01-16 00:16:11 +00001210 @classmethod
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001211 def bz2open(cls, name, mode="r", fileobj=None, compresslevel=9):
1212 """Open bzip2 compressed tar archive name for reading or writing.
1213 Appending is not allowed.
1214 """
1215 if len(mode) > 1 or mode not in "rw":
Georg Brandle4751e32006-05-18 06:11:19 +00001216 raise ValueError("mode must be 'r' or 'w'.")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001217
1218 try:
1219 import bz2
1220 except ImportError:
Georg Brandle4751e32006-05-18 06:11:19 +00001221 raise CompressionError("bz2 module is not available")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001222
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001223 if fileobj is not None:
Georg Brandl49c8f4c2006-05-15 19:30:35 +00001224 fileobj = _BZ2Proxy(fileobj, mode)
1225 else:
1226 fileobj = bz2.BZ2File(name, mode, compresslevel=compresslevel)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001227
1228 try:
Lars Gustäbel12e087a2006-12-23 18:13:57 +00001229 t = cls.taropen(name, mode, fileobj)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001230 except IOError:
Georg Brandle4751e32006-05-18 06:11:19 +00001231 raise ReadError("not a bzip2 file")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001232 t._extfileobj = False
1233 return t
1234
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001235 # All *open() methods are registered here.
1236 OPEN_METH = {
1237 "tar": "taropen", # uncompressed tar
1238 "gz": "gzopen", # gzip compressed tar
1239 "bz2": "bz2open" # bzip2 compressed tar
1240 }
1241
1242 #--------------------------------------------------------------------------
1243 # The public methods which TarFile provides:
1244
1245 def close(self):
1246 """Close the TarFile. In write-mode, two finishing zero blocks are
1247 appended to the archive.
1248 """
1249 if self.closed:
1250 return
1251
1252 if self._mode in "aw":
1253 self.fileobj.write(NUL * (BLOCKSIZE * 2))
1254 self.offset += (BLOCKSIZE * 2)
1255 # fill up the end with zero-blocks
1256 # (like option -b20 for tar does)
1257 blocks, remainder = divmod(self.offset, RECORDSIZE)
1258 if remainder > 0:
1259 self.fileobj.write(NUL * (RECORDSIZE - remainder))
1260
1261 if not self._extfileobj:
1262 self.fileobj.close()
1263 self.closed = True
1264
1265 def getmember(self, name):
1266 """Return a TarInfo object for member `name'. If `name' can not be
1267 found in the archive, KeyError is raised. If a member occurs more
1268 than once in the archive, its last occurence is assumed to be the
1269 most up-to-date version.
1270 """
Martin v. Löwisf3c56112004-09-18 09:08:52 +00001271 tarinfo = self._getmember(name)
1272 if tarinfo is None:
Georg Brandle4751e32006-05-18 06:11:19 +00001273 raise KeyError("filename %r not found" % name)
Martin v. Löwisf3c56112004-09-18 09:08:52 +00001274 return tarinfo
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001275
1276 def getmembers(self):
1277 """Return the members of the archive as a list of TarInfo objects. The
1278 list has the same order as the members in the archive.
1279 """
1280 self._check()
1281 if not self._loaded: # if we want to obtain a list of
1282 self._load() # all members, we first have to
1283 # scan the whole archive.
1284 return self.members
1285
1286 def getnames(self):
1287 """Return the members of the archive as a list of their names. It has
1288 the same order as the list returned by getmembers().
1289 """
Martin v. Löwisf3c56112004-09-18 09:08:52 +00001290 return [tarinfo.name for tarinfo in self.getmembers()]
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001291
1292 def gettarinfo(self, name=None, arcname=None, fileobj=None):
1293 """Create a TarInfo object for either the file `name' or the file
1294 object `fileobj' (using os.fstat on its file descriptor). You can
1295 modify some of the TarInfo's attributes before you add it using
1296 addfile(). If given, `arcname' specifies an alternative name for the
1297 file in the archive.
1298 """
1299 self._check("aw")
1300
1301 # When fileobj is given, replace name by
1302 # fileobj's real name.
1303 if fileobj is not None:
1304 name = fileobj.name
1305
1306 # Building the name of the member in the archive.
1307 # Backward slashes are converted to forward slashes,
1308 # Absolute paths are turned to relative paths.
1309 if arcname is None:
1310 arcname = name
1311 arcname = normpath(arcname)
1312 drv, arcname = os.path.splitdrive(arcname)
1313 while arcname[0:1] == "/":
1314 arcname = arcname[1:]
1315
1316 # Now, fill the TarInfo object with
1317 # information specific for the file.
1318 tarinfo = TarInfo()
1319
1320 # Use os.stat or os.lstat, depending on platform
1321 # and if symlinks shall be resolved.
1322 if fileobj is None:
1323 if hasattr(os, "lstat") and not self.dereference:
1324 statres = os.lstat(name)
1325 else:
1326 statres = os.stat(name)
1327 else:
1328 statres = os.fstat(fileobj.fileno())
1329 linkname = ""
1330
1331 stmd = statres.st_mode
1332 if stat.S_ISREG(stmd):
1333 inode = (statres.st_ino, statres.st_dev)
Neal Norwitzb0e32e22005-10-20 04:50:13 +00001334 if not self.dereference and \
1335 statres.st_nlink > 1 and inode in self.inodes:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001336 # Is it a hardlink to an already
1337 # archived file?
1338 type = LNKTYPE
1339 linkname = self.inodes[inode]
1340 else:
1341 # The inode is added only if its valid.
1342 # For win32 it is always 0.
1343 type = REGTYPE
1344 if inode[0]:
1345 self.inodes[inode] = arcname
1346 elif stat.S_ISDIR(stmd):
1347 type = DIRTYPE
1348 if arcname[-1:] != "/":
1349 arcname += "/"
1350 elif stat.S_ISFIFO(stmd):
1351 type = FIFOTYPE
1352 elif stat.S_ISLNK(stmd):
1353 type = SYMTYPE
1354 linkname = os.readlink(name)
1355 elif stat.S_ISCHR(stmd):
1356 type = CHRTYPE
1357 elif stat.S_ISBLK(stmd):
1358 type = BLKTYPE
1359 else:
1360 return None
1361
1362 # Fill the TarInfo object with all
1363 # information we can get.
Martin v. Löwis5dbdc592005-08-27 10:07:56 +00001364 tarinfo.name = arcname
1365 tarinfo.mode = stmd
1366 tarinfo.uid = statres.st_uid
1367 tarinfo.gid = statres.st_gid
1368 if stat.S_ISREG(stmd):
Martin v. Löwis61d77e02004-08-20 06:35:46 +00001369 tarinfo.size = statres.st_size
Martin v. Löwis5dbdc592005-08-27 10:07:56 +00001370 else:
1371 tarinfo.size = 0L
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001372 tarinfo.mtime = statres.st_mtime
Martin v. Löwis5dbdc592005-08-27 10:07:56 +00001373 tarinfo.type = type
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001374 tarinfo.linkname = linkname
1375 if pwd:
1376 try:
1377 tarinfo.uname = pwd.getpwuid(tarinfo.uid)[0]
1378 except KeyError:
1379 pass
1380 if grp:
1381 try:
1382 tarinfo.gname = grp.getgrgid(tarinfo.gid)[0]
1383 except KeyError:
1384 pass
1385
1386 if type in (CHRTYPE, BLKTYPE):
1387 if hasattr(os, "major") and hasattr(os, "minor"):
1388 tarinfo.devmajor = os.major(statres.st_rdev)
1389 tarinfo.devminor = os.minor(statres.st_rdev)
1390 return tarinfo
1391
1392 def list(self, verbose=True):
1393 """Print a table of contents to sys.stdout. If `verbose' is False, only
1394 the names of the members are printed. If it is True, an `ls -l'-like
1395 output is produced.
1396 """
1397 self._check()
1398
1399 for tarinfo in self:
1400 if verbose:
1401 print filemode(tarinfo.mode),
1402 print "%s/%s" % (tarinfo.uname or tarinfo.uid,
1403 tarinfo.gname or tarinfo.gid),
1404 if tarinfo.ischr() or tarinfo.isblk():
1405 print "%10s" % ("%d,%d" \
1406 % (tarinfo.devmajor, tarinfo.devminor)),
1407 else:
1408 print "%10d" % tarinfo.size,
1409 print "%d-%02d-%02d %02d:%02d:%02d" \
1410 % time.localtime(tarinfo.mtime)[:6],
1411
1412 print tarinfo.name,
1413
1414 if verbose:
1415 if tarinfo.issym():
1416 print "->", tarinfo.linkname,
1417 if tarinfo.islnk():
1418 print "link to", tarinfo.linkname,
1419 print
1420
1421 def add(self, name, arcname=None, recursive=True):
1422 """Add the file `name' to the archive. `name' may be any type of file
1423 (directory, fifo, symbolic link, etc.). If given, `arcname'
1424 specifies an alternative name for the file in the archive.
1425 Directories are added recursively by default. This can be avoided by
1426 setting `recursive' to False.
1427 """
1428 self._check("aw")
1429
1430 if arcname is None:
1431 arcname = name
1432
1433 # Skip if somebody tries to archive the archive...
Lars Gustäbel12e087a2006-12-23 18:13:57 +00001434 if self.name is not None and os.path.abspath(name) == self.name:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001435 self._dbg(2, "tarfile: Skipped %r" % name)
1436 return
1437
1438 # Special case: The user wants to add the current
1439 # working directory.
1440 if name == ".":
1441 if recursive:
1442 if arcname == ".":
1443 arcname = ""
1444 for f in os.listdir("."):
1445 self.add(f, os.path.join(arcname, f))
1446 return
1447
1448 self._dbg(1, name)
1449
1450 # Create a TarInfo object from the file.
1451 tarinfo = self.gettarinfo(name, arcname)
1452
1453 if tarinfo is None:
1454 self._dbg(1, "tarfile: Unsupported type %r" % name)
1455 return
1456
1457 # Append the tar header and data to the archive.
1458 if tarinfo.isreg():
1459 f = file(name, "rb")
1460 self.addfile(tarinfo, f)
1461 f.close()
1462
Martin v. Löwis5dbdc592005-08-27 10:07:56 +00001463 elif tarinfo.isdir():
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001464 self.addfile(tarinfo)
1465 if recursive:
1466 for f in os.listdir(name):
1467 self.add(os.path.join(name, f), os.path.join(arcname, f))
1468
Martin v. Löwis5dbdc592005-08-27 10:07:56 +00001469 else:
1470 self.addfile(tarinfo)
1471
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001472 def addfile(self, tarinfo, fileobj=None):
1473 """Add the TarInfo object `tarinfo' to the archive. If `fileobj' is
1474 given, tarinfo.size bytes are read from it and added to the archive.
1475 You can create TarInfo objects using gettarinfo().
1476 On Windows platforms, `fileobj' should always be opened with mode
1477 'rb' to avoid irritation about the file size.
1478 """
1479 self._check("aw")
1480
Georg Brandl2527f7f2006-10-29 09:16:15 +00001481 tarinfo = copy.copy(tarinfo)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001482
Georg Brandl2527f7f2006-10-29 09:16:15 +00001483 buf = tarinfo.tobuf(self.posix)
1484 self.fileobj.write(buf)
1485 self.offset += len(buf)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001486
1487 # If there's data to follow, append it.
1488 if fileobj is not None:
1489 copyfileobj(fileobj, self.fileobj, tarinfo.size)
1490 blocks, remainder = divmod(tarinfo.size, BLOCKSIZE)
1491 if remainder > 0:
1492 self.fileobj.write(NUL * (BLOCKSIZE - remainder))
1493 blocks += 1
1494 self.offset += blocks * BLOCKSIZE
1495
Martin v. Löwisf3c56112004-09-18 09:08:52 +00001496 self.members.append(tarinfo)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001497
Martin v. Löwis00a73e72005-03-04 19:40:34 +00001498 def extractall(self, path=".", members=None):
1499 """Extract all members from the archive to the current working
1500 directory and set owner, modification time and permissions on
1501 directories afterwards. `path' specifies a different directory
1502 to extract to. `members' is optional and must be a subset of the
1503 list returned by getmembers().
1504 """
1505 directories = []
1506
1507 if members is None:
1508 members = self
1509
1510 for tarinfo in members:
1511 if tarinfo.isdir():
Lars Gustäbel42993fe2008-02-05 12:00:20 +00001512 # Extract directories with a safe mode.
Martin v. Löwis00a73e72005-03-04 19:40:34 +00001513 directories.append(tarinfo)
Lars Gustäbel42993fe2008-02-05 12:00:20 +00001514 tarinfo = copy.copy(tarinfo)
1515 tarinfo.mode = 0700
1516 self.extract(tarinfo, path)
Martin v. Löwis00a73e72005-03-04 19:40:34 +00001517
1518 # Reverse sort directories.
1519 directories.sort(lambda a, b: cmp(a.name, b.name))
1520 directories.reverse()
1521
1522 # Set correct owner, mtime and filemode on directories.
1523 for tarinfo in directories:
Lars Gustäbele5f9e582008-01-04 14:44:23 +00001524 dirpath = os.path.join(path, tarinfo.name)
Martin v. Löwis00a73e72005-03-04 19:40:34 +00001525 try:
Lars Gustäbele5f9e582008-01-04 14:44:23 +00001526 self.chown(tarinfo, dirpath)
1527 self.utime(tarinfo, dirpath)
1528 self.chmod(tarinfo, dirpath)
Martin v. Löwis00a73e72005-03-04 19:40:34 +00001529 except ExtractError, e:
1530 if self.errorlevel > 1:
1531 raise
1532 else:
1533 self._dbg(1, "tarfile: %s" % e)
1534
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001535 def extract(self, member, path=""):
1536 """Extract a member from the archive to the current working directory,
1537 using its full name. Its file information is extracted as accurately
1538 as possible. `member' may be a filename or a TarInfo object. You can
1539 specify a different directory using `path'.
1540 """
1541 self._check("r")
1542
1543 if isinstance(member, TarInfo):
1544 tarinfo = member
1545 else:
1546 tarinfo = self.getmember(member)
1547
Neal Norwitza4f651a2004-07-20 22:07:44 +00001548 # Prepare the link target for makelink().
1549 if tarinfo.islnk():
1550 tarinfo._link_target = os.path.join(path, tarinfo.linkname)
1551
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001552 try:
1553 self._extract_member(tarinfo, os.path.join(path, tarinfo.name))
1554 except EnvironmentError, e:
1555 if self.errorlevel > 0:
1556 raise
1557 else:
1558 if e.filename is None:
1559 self._dbg(1, "tarfile: %s" % e.strerror)
1560 else:
1561 self._dbg(1, "tarfile: %s %r" % (e.strerror, e.filename))
1562 except ExtractError, e:
1563 if self.errorlevel > 1:
1564 raise
1565 else:
1566 self._dbg(1, "tarfile: %s" % e)
1567
1568 def extractfile(self, member):
1569 """Extract a member from the archive as a file object. `member' may be
1570 a filename or a TarInfo object. If `member' is a regular file, a
1571 file-like object is returned. If `member' is a link, a file-like
1572 object is constructed from the link's target. If `member' is none of
1573 the above, None is returned.
1574 The file-like object is read-only and provides the following
1575 methods: read(), readline(), readlines(), seek() and tell()
1576 """
1577 self._check("r")
1578
1579 if isinstance(member, TarInfo):
1580 tarinfo = member
1581 else:
1582 tarinfo = self.getmember(member)
1583
1584 if tarinfo.isreg():
1585 return self.fileobject(self, tarinfo)
1586
1587 elif tarinfo.type not in SUPPORTED_TYPES:
1588 # If a member's type is unknown, it is treated as a
1589 # regular file.
1590 return self.fileobject(self, tarinfo)
1591
1592 elif tarinfo.islnk() or tarinfo.issym():
1593 if isinstance(self.fileobj, _Stream):
1594 # A small but ugly workaround for the case that someone tries
1595 # to extract a (sym)link as a file-object from a non-seekable
1596 # stream of tar blocks.
Georg Brandle4751e32006-05-18 06:11:19 +00001597 raise StreamError("cannot extract (sym)link as file object")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001598 else:
Georg Brandl7eb4b7d2005-07-22 21:49:32 +00001599 # A (sym)link's file object is its target's file object.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001600 return self.extractfile(self._getmember(tarinfo.linkname,
1601 tarinfo))
1602 else:
1603 # If there's no data associated with the member (directory, chrdev,
1604 # blkdev, etc.), return None instead of a file object.
1605 return None
1606
1607 def _extract_member(self, tarinfo, targetpath):
1608 """Extract the TarInfo object tarinfo to a physical
1609 file called targetpath.
1610 """
1611 # Fetch the TarInfo object for the given name
1612 # and build the destination pathname, replacing
1613 # forward slashes to platform specific separators.
1614 if targetpath[-1:] == "/":
1615 targetpath = targetpath[:-1]
1616 targetpath = os.path.normpath(targetpath)
1617
1618 # Create all upper directories.
1619 upperdirs = os.path.dirname(targetpath)
1620 if upperdirs and not os.path.exists(upperdirs):
Lars Gustäbel42993fe2008-02-05 12:00:20 +00001621 # Create directories that are not part of the archive with
1622 # default permissions.
1623 os.makedirs(upperdirs)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001624
1625 if tarinfo.islnk() or tarinfo.issym():
1626 self._dbg(1, "%s -> %s" % (tarinfo.name, tarinfo.linkname))
1627 else:
1628 self._dbg(1, tarinfo.name)
1629
1630 if tarinfo.isreg():
1631 self.makefile(tarinfo, targetpath)
1632 elif tarinfo.isdir():
1633 self.makedir(tarinfo, targetpath)
1634 elif tarinfo.isfifo():
1635 self.makefifo(tarinfo, targetpath)
1636 elif tarinfo.ischr() or tarinfo.isblk():
1637 self.makedev(tarinfo, targetpath)
1638 elif tarinfo.islnk() or tarinfo.issym():
1639 self.makelink(tarinfo, targetpath)
1640 elif tarinfo.type not in SUPPORTED_TYPES:
1641 self.makeunknown(tarinfo, targetpath)
1642 else:
1643 self.makefile(tarinfo, targetpath)
1644
1645 self.chown(tarinfo, targetpath)
1646 if not tarinfo.issym():
1647 self.chmod(tarinfo, targetpath)
1648 self.utime(tarinfo, targetpath)
1649
1650 #--------------------------------------------------------------------------
1651 # Below are the different file methods. They are called via
1652 # _extract_member() when extract() is called. They can be replaced in a
1653 # subclass to implement other functionality.
1654
1655 def makedir(self, tarinfo, targetpath):
1656 """Make a directory called targetpath.
1657 """
1658 try:
Lars Gustäbel42993fe2008-02-05 12:00:20 +00001659 # Use a safe mode for the directory, the real mode is set
1660 # later in _extract_member().
1661 os.mkdir(targetpath, 0700)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001662 except EnvironmentError, e:
1663 if e.errno != errno.EEXIST:
1664 raise
1665
1666 def makefile(self, tarinfo, targetpath):
1667 """Make a file called targetpath.
1668 """
1669 source = self.extractfile(tarinfo)
1670 target = file(targetpath, "wb")
1671 copyfileobj(source, target)
1672 source.close()
1673 target.close()
1674
1675 def makeunknown(self, tarinfo, targetpath):
1676 """Make a file from a TarInfo object with an unknown type
1677 at targetpath.
1678 """
1679 self.makefile(tarinfo, targetpath)
1680 self._dbg(1, "tarfile: Unknown file type %r, " \
1681 "extracted as regular file." % tarinfo.type)
1682
1683 def makefifo(self, tarinfo, targetpath):
1684 """Make a fifo called targetpath.
1685 """
1686 if hasattr(os, "mkfifo"):
1687 os.mkfifo(targetpath)
1688 else:
Georg Brandle4751e32006-05-18 06:11:19 +00001689 raise ExtractError("fifo not supported by system")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001690
1691 def makedev(self, tarinfo, targetpath):
1692 """Make a character or block device called targetpath.
1693 """
1694 if not hasattr(os, "mknod") or not hasattr(os, "makedev"):
Georg Brandle4751e32006-05-18 06:11:19 +00001695 raise ExtractError("special devices not supported by system")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001696
1697 mode = tarinfo.mode
1698 if tarinfo.isblk():
1699 mode |= stat.S_IFBLK
1700 else:
1701 mode |= stat.S_IFCHR
1702
1703 os.mknod(targetpath, mode,
1704 os.makedev(tarinfo.devmajor, tarinfo.devminor))
1705
1706 def makelink(self, tarinfo, targetpath):
1707 """Make a (symbolic) link called targetpath. If it cannot be created
1708 (platform limitation), we try to make a copy of the referenced file
1709 instead of a link.
1710 """
1711 linkpath = tarinfo.linkname
1712 try:
1713 if tarinfo.issym():
1714 os.symlink(linkpath, targetpath)
1715 else:
Neal Norwitza4f651a2004-07-20 22:07:44 +00001716 # See extract().
1717 os.link(tarinfo._link_target, targetpath)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001718 except AttributeError:
1719 if tarinfo.issym():
1720 linkpath = os.path.join(os.path.dirname(tarinfo.name),
1721 linkpath)
1722 linkpath = normpath(linkpath)
1723
1724 try:
1725 self._extract_member(self.getmember(linkpath), targetpath)
1726 except (EnvironmentError, KeyError), e:
1727 linkpath = os.path.normpath(linkpath)
1728 try:
1729 shutil.copy2(linkpath, targetpath)
1730 except EnvironmentError, e:
Georg Brandle4751e32006-05-18 06:11:19 +00001731 raise IOError("link could not be created")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001732
1733 def chown(self, tarinfo, targetpath):
1734 """Set owner of targetpath according to tarinfo.
1735 """
1736 if pwd and hasattr(os, "geteuid") and os.geteuid() == 0:
1737 # We have to be root to do so.
1738 try:
1739 g = grp.getgrnam(tarinfo.gname)[2]
1740 except KeyError:
1741 try:
1742 g = grp.getgrgid(tarinfo.gid)[2]
1743 except KeyError:
1744 g = os.getgid()
1745 try:
1746 u = pwd.getpwnam(tarinfo.uname)[2]
1747 except KeyError:
1748 try:
1749 u = pwd.getpwuid(tarinfo.uid)[2]
1750 except KeyError:
1751 u = os.getuid()
1752 try:
1753 if tarinfo.issym() and hasattr(os, "lchown"):
1754 os.lchown(targetpath, u, g)
1755 else:
Andrew MacIntyre7970d202003-02-19 12:51:34 +00001756 if sys.platform != "os2emx":
1757 os.chown(targetpath, u, g)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001758 except EnvironmentError, e:
Georg Brandle4751e32006-05-18 06:11:19 +00001759 raise ExtractError("could not change owner")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001760
1761 def chmod(self, tarinfo, targetpath):
1762 """Set file permissions of targetpath according to tarinfo.
1763 """
Jack Jansen834eff62003-03-07 12:47:06 +00001764 if hasattr(os, 'chmod'):
1765 try:
1766 os.chmod(targetpath, tarinfo.mode)
1767 except EnvironmentError, e:
Georg Brandle4751e32006-05-18 06:11:19 +00001768 raise ExtractError("could not change mode")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001769
1770 def utime(self, tarinfo, targetpath):
1771 """Set modification time of targetpath according to tarinfo.
1772 """
Jack Jansen834eff62003-03-07 12:47:06 +00001773 if not hasattr(os, 'utime'):
Tim Petersf9347782003-03-07 15:36:41 +00001774 return
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001775 if sys.platform == "win32" and tarinfo.isdir():
1776 # According to msdn.microsoft.com, it is an error (EACCES)
1777 # to use utime() on directories.
1778 return
1779 try:
1780 os.utime(targetpath, (tarinfo.mtime, tarinfo.mtime))
1781 except EnvironmentError, e:
Georg Brandle4751e32006-05-18 06:11:19 +00001782 raise ExtractError("could not change modification time")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001783
1784 #--------------------------------------------------------------------------
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001785 def next(self):
1786 """Return the next member of the archive as a TarInfo object, when
1787 TarFile is opened for reading. Return None if there is no more
1788 available.
1789 """
1790 self._check("ra")
1791 if self.firstmember is not None:
1792 m = self.firstmember
1793 self.firstmember = None
1794 return m
1795
1796 # Read the next block.
Andrew M. Kuchling864bba12004-07-10 22:02:11 +00001797 self.fileobj.seek(self.offset)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001798 while True:
1799 buf = self.fileobj.read(BLOCKSIZE)
1800 if not buf:
1801 return None
Georg Brandl38c6a222006-05-10 16:26:03 +00001802
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001803 try:
1804 tarinfo = TarInfo.frombuf(buf)
Tim Peters8a299d22006-05-19 19:16:34 +00001805
Georg Brandl38c6a222006-05-10 16:26:03 +00001806 # Set the TarInfo object's offset to the current position of the
1807 # TarFile and set self.offset to the position where the data blocks
1808 # should begin.
1809 tarinfo.offset = self.offset
1810 self.offset += BLOCKSIZE
1811
1812 tarinfo = self.proc_member(tarinfo)
1813
1814 except ValueError, e:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001815 if self.ignore_zeros:
Georg Brandle4751e32006-05-18 06:11:19 +00001816 self._dbg(2, "0x%X: empty or invalid block: %s" %
1817 (self.offset, e))
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001818 self.offset += BLOCKSIZE
1819 continue
1820 else:
Andrew M. Kuchling864bba12004-07-10 22:02:11 +00001821 if self.offset == 0:
Georg Brandle4751e32006-05-18 06:11:19 +00001822 raise ReadError("empty, unreadable or compressed "
1823 "file: %s" % e)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001824 return None
1825 break
1826
Georg Brandl38c6a222006-05-10 16:26:03 +00001827 # Some old tar programs represent a directory as a regular
1828 # file with a trailing slash.
1829 if tarinfo.isreg() and tarinfo.name.endswith("/"):
1830 tarinfo.type = DIRTYPE
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001831
Georg Brandl38c6a222006-05-10 16:26:03 +00001832 # Directory names should have a '/' at the end.
Lars Gustäbeld2201442007-04-20 14:49:02 +00001833 if tarinfo.isdir() and not tarinfo.name.endswith("/"):
Georg Brandl38c6a222006-05-10 16:26:03 +00001834 tarinfo.name += "/"
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001835
Martin v. Löwisf3c56112004-09-18 09:08:52 +00001836 self.members.append(tarinfo)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001837 return tarinfo
1838
1839 #--------------------------------------------------------------------------
Georg Brandl38c6a222006-05-10 16:26:03 +00001840 # The following are methods that are called depending on the type of a
1841 # member. The entry point is proc_member() which is called with a TarInfo
1842 # object created from the header block from the current offset. The
1843 # proc_member() method can be overridden in a subclass to add custom
1844 # proc_*() methods. A proc_*() method MUST implement the following
1845 # operations:
1846 # 1. Set tarinfo.offset_data to the position where the data blocks begin,
1847 # if there is data that follows.
1848 # 2. Set self.offset to the position where the next member's header will
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001849 # begin.
Georg Brandl38c6a222006-05-10 16:26:03 +00001850 # 3. Return tarinfo or another valid TarInfo object.
1851 def proc_member(self, tarinfo):
1852 """Choose the right processing method for tarinfo depending
1853 on its type and call it.
1854 """
1855 if tarinfo.type in (GNUTYPE_LONGNAME, GNUTYPE_LONGLINK):
1856 return self.proc_gnulong(tarinfo)
1857 elif tarinfo.type == GNUTYPE_SPARSE:
1858 return self.proc_sparse(tarinfo)
1859 else:
1860 return self.proc_builtin(tarinfo)
1861
1862 def proc_builtin(self, tarinfo):
1863 """Process a builtin type member or an unknown member
1864 which will be treated as a regular file.
1865 """
1866 tarinfo.offset_data = self.offset
1867 if tarinfo.isreg() or tarinfo.type not in SUPPORTED_TYPES:
1868 # Skip the following data blocks.
1869 self.offset += self._block(tarinfo.size)
1870 return tarinfo
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001871
1872 def proc_gnulong(self, tarinfo):
Georg Brandl38c6a222006-05-10 16:26:03 +00001873 """Process the blocks that hold a GNU longname
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001874 or longlink member.
1875 """
1876 buf = ""
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001877 count = tarinfo.size
1878 while count > 0:
1879 block = self.fileobj.read(BLOCKSIZE)
1880 buf += block
1881 self.offset += BLOCKSIZE
1882 count -= BLOCKSIZE
1883
Georg Brandl38c6a222006-05-10 16:26:03 +00001884 # Fetch the next header and process it.
1885 b = self.fileobj.read(BLOCKSIZE)
1886 t = TarInfo.frombuf(b)
1887 t.offset = self.offset
1888 self.offset += BLOCKSIZE
1889 next = self.proc_member(t)
Andrew M. Kuchling864bba12004-07-10 22:02:11 +00001890
Georg Brandl38c6a222006-05-10 16:26:03 +00001891 # Patch the TarInfo object from the next header with
1892 # the longname information.
Andrew M. Kuchling864bba12004-07-10 22:02:11 +00001893 next.offset = tarinfo.offset
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001894 if tarinfo.type == GNUTYPE_LONGNAME:
Georg Brandle8953182006-05-27 14:02:03 +00001895 next.name = buf.rstrip(NUL)
Andrew M. Kuchling864bba12004-07-10 22:02:11 +00001896 elif tarinfo.type == GNUTYPE_LONGLINK:
Georg Brandle8953182006-05-27 14:02:03 +00001897 next.linkname = buf.rstrip(NUL)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001898
Andrew M. Kuchling864bba12004-07-10 22:02:11 +00001899 return next
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001900
1901 def proc_sparse(self, tarinfo):
Georg Brandl38c6a222006-05-10 16:26:03 +00001902 """Process a GNU sparse header plus extra headers.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001903 """
Georg Brandl38c6a222006-05-10 16:26:03 +00001904 buf = tarinfo.buf
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001905 sp = _ringbuffer()
1906 pos = 386
1907 lastpos = 0L
1908 realpos = 0L
1909 # There are 4 possible sparse structs in the
1910 # first header.
1911 for i in xrange(4):
1912 try:
Georg Brandl38c6a222006-05-10 16:26:03 +00001913 offset = nti(buf[pos:pos + 12])
1914 numbytes = nti(buf[pos + 12:pos + 24])
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001915 except ValueError:
1916 break
1917 if offset > lastpos:
1918 sp.append(_hole(lastpos, offset - lastpos))
1919 sp.append(_data(offset, numbytes, realpos))
1920 realpos += numbytes
1921 lastpos = offset + numbytes
1922 pos += 24
1923
1924 isextended = ord(buf[482])
Georg Brandl38c6a222006-05-10 16:26:03 +00001925 origsize = nti(buf[483:495])
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001926
1927 # If the isextended flag is given,
1928 # there are extra headers to process.
1929 while isextended == 1:
1930 buf = self.fileobj.read(BLOCKSIZE)
1931 self.offset += BLOCKSIZE
1932 pos = 0
1933 for i in xrange(21):
1934 try:
Georg Brandl38c6a222006-05-10 16:26:03 +00001935 offset = nti(buf[pos:pos + 12])
1936 numbytes = nti(buf[pos + 12:pos + 24])
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001937 except ValueError:
1938 break
1939 if offset > lastpos:
1940 sp.append(_hole(lastpos, offset - lastpos))
1941 sp.append(_data(offset, numbytes, realpos))
1942 realpos += numbytes
1943 lastpos = offset + numbytes
1944 pos += 24
1945 isextended = ord(buf[504])
1946
1947 if lastpos < origsize:
1948 sp.append(_hole(lastpos, origsize - lastpos))
1949
1950 tarinfo.sparse = sp
1951
1952 tarinfo.offset_data = self.offset
1953 self.offset += self._block(tarinfo.size)
1954 tarinfo.size = origsize
Andrew M. Kuchling864bba12004-07-10 22:02:11 +00001955
Georg Brandl38c6a222006-05-10 16:26:03 +00001956 return tarinfo
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001957
1958 #--------------------------------------------------------------------------
1959 # Little helper methods:
1960
1961 def _block(self, count):
1962 """Round up a byte count by BLOCKSIZE and return it,
1963 e.g. _block(834) => 1024.
1964 """
1965 blocks, remainder = divmod(count, BLOCKSIZE)
1966 if remainder:
1967 blocks += 1
1968 return blocks * BLOCKSIZE
1969
1970 def _getmember(self, name, tarinfo=None):
1971 """Find an archive member by name from bottom to top.
1972 If tarinfo is given, it is used as the starting point.
1973 """
Martin v. Löwisf3c56112004-09-18 09:08:52 +00001974 # Ensure that all members have been loaded.
1975 members = self.getmembers()
1976
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001977 if tarinfo is None:
Martin v. Löwisf3c56112004-09-18 09:08:52 +00001978 end = len(members)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001979 else:
Martin v. Löwisf3c56112004-09-18 09:08:52 +00001980 end = members.index(tarinfo)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001981
1982 for i in xrange(end - 1, -1, -1):
Martin v. Löwisf3c56112004-09-18 09:08:52 +00001983 if name == members[i].name:
1984 return members[i]
Andrew M. Kuchling864bba12004-07-10 22:02:11 +00001985
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001986 def _load(self):
1987 """Read through the entire archive file and look for readable
1988 members.
1989 """
1990 while True:
1991 tarinfo = self.next()
1992 if tarinfo is None:
1993 break
1994 self._loaded = True
1995
1996 def _check(self, mode=None):
1997 """Check if TarFile is still open, and if the operation's mode
1998 corresponds to TarFile's mode.
1999 """
2000 if self.closed:
Georg Brandle4751e32006-05-18 06:11:19 +00002001 raise IOError("%s is closed" % self.__class__.__name__)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002002 if mode is not None and self._mode not in mode:
Georg Brandle4751e32006-05-18 06:11:19 +00002003 raise IOError("bad operation for mode %r" % self._mode)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002004
2005 def __iter__(self):
2006 """Provide an iterator object.
2007 """
2008 if self._loaded:
2009 return iter(self.members)
2010 else:
2011 return TarIter(self)
2012
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002013 def _dbg(self, level, msg):
2014 """Write debugging output to sys.stderr.
2015 """
2016 if level <= self.debug:
2017 print >> sys.stderr, msg
2018# class TarFile
2019
2020class TarIter:
2021 """Iterator Class.
2022
2023 for tarinfo in TarFile(...):
2024 suite...
2025 """
2026
2027 def __init__(self, tarfile):
2028 """Construct a TarIter object.
2029 """
2030 self.tarfile = tarfile
Martin v. Löwis637431b2005-03-03 23:12:42 +00002031 self.index = 0
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002032 def __iter__(self):
2033 """Return iterator object.
2034 """
2035 return self
2036 def next(self):
2037 """Return the next item using TarFile's next() method.
2038 When all members have been read, set TarFile as _loaded.
2039 """
Martin v. Löwis637431b2005-03-03 23:12:42 +00002040 # Fix for SF #1100429: Under rare circumstances it can
2041 # happen that getmembers() is called during iteration,
2042 # which will cause TarIter to stop prematurely.
2043 if not self.tarfile._loaded:
2044 tarinfo = self.tarfile.next()
2045 if not tarinfo:
2046 self.tarfile._loaded = True
2047 raise StopIteration
2048 else:
2049 try:
2050 tarinfo = self.tarfile.members[self.index]
2051 except IndexError:
2052 raise StopIteration
2053 self.index += 1
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002054 return tarinfo
2055
2056# Helper classes for sparse file support
2057class _section:
2058 """Base class for _data and _hole.
2059 """
2060 def __init__(self, offset, size):
2061 self.offset = offset
2062 self.size = size
2063 def __contains__(self, offset):
2064 return self.offset <= offset < self.offset + self.size
2065
2066class _data(_section):
2067 """Represent a data section in a sparse file.
2068 """
2069 def __init__(self, offset, size, realpos):
2070 _section.__init__(self, offset, size)
2071 self.realpos = realpos
2072
2073class _hole(_section):
2074 """Represent a hole section in a sparse file.
2075 """
2076 pass
2077
2078class _ringbuffer(list):
2079 """Ringbuffer class which increases performance
2080 over a regular list.
2081 """
2082 def __init__(self):
2083 self.idx = 0
2084 def find(self, offset):
2085 idx = self.idx
2086 while True:
2087 item = self[idx]
2088 if offset in item:
2089 break
2090 idx += 1
2091 if idx == len(self):
2092 idx = 0
2093 if idx == self.idx:
2094 # End of File
2095 return None
2096 self.idx = idx
2097 return item
2098
2099#---------------------------------------------
2100# zipfile compatible TarFile class
2101#---------------------------------------------
2102TAR_PLAIN = 0 # zipfile.ZIP_STORED
2103TAR_GZIPPED = 8 # zipfile.ZIP_DEFLATED
2104class TarFileCompat:
2105 """TarFile class compatible with standard module zipfile's
2106 ZipFile class.
2107 """
2108 def __init__(self, file, mode="r", compression=TAR_PLAIN):
2109 if compression == TAR_PLAIN:
2110 self.tarfile = TarFile.taropen(file, mode)
2111 elif compression == TAR_GZIPPED:
2112 self.tarfile = TarFile.gzopen(file, mode)
2113 else:
Georg Brandle4751e32006-05-18 06:11:19 +00002114 raise ValueError("unknown compression constant")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002115 if mode[0:1] == "r":
2116 members = self.tarfile.getmembers()
Raymond Hettingera1d09e22005-09-11 16:34:05 +00002117 for m in members:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002118 m.filename = m.name
2119 m.file_size = m.size
2120 m.date_time = time.gmtime(m.mtime)[:6]
2121 def namelist(self):
2122 return map(lambda m: m.name, self.infolist())
2123 def infolist(self):
2124 return filter(lambda m: m.type in REGULAR_TYPES,
2125 self.tarfile.getmembers())
2126 def printdir(self):
2127 self.tarfile.list()
2128 def testzip(self):
2129 return
2130 def getinfo(self, name):
2131 return self.tarfile.getmember(name)
2132 def read(self, name):
2133 return self.tarfile.extractfile(self.tarfile.getmember(name)).read()
2134 def write(self, filename, arcname=None, compress_type=None):
2135 self.tarfile.add(filename, arcname)
2136 def writestr(self, zinfo, bytes):
Raymond Hettingera6172712004-12-31 19:15:26 +00002137 try:
2138 from cStringIO import StringIO
2139 except ImportError:
2140 from StringIO import StringIO
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002141 import calendar
2142 zinfo.name = zinfo.filename
2143 zinfo.size = zinfo.file_size
2144 zinfo.mtime = calendar.timegm(zinfo.date_time)
Raymond Hettingera6172712004-12-31 19:15:26 +00002145 self.tarfile.addfile(zinfo, StringIO(bytes))
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002146 def close(self):
2147 self.tarfile.close()
2148#class TarFileCompat
2149
2150#--------------------
2151# exported functions
2152#--------------------
2153def is_tarfile(name):
2154 """Return True if name points to a tar archive that we
2155 are able to handle, else return False.
2156 """
2157 try:
2158 t = open(name)
2159 t.close()
2160 return True
2161 except TarError:
2162 return False
2163
2164open = TarFile.open