blob: cffde457f14157c1f8f061e2a34d79ff7fab8583 [file] [log] [blame]
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001#!/usr/bin/env python
2# -*- coding: iso-8859-1 -*-
3#-------------------------------------------------------------------
4# tarfile.py
5#-------------------------------------------------------------------
6# Copyright (C) 2002 Lars Gustäbel <lars@gustaebel.de>
7# All rights reserved.
8#
9# Permission is hereby granted, free of charge, to any person
10# obtaining a copy of this software and associated documentation
11# files (the "Software"), to deal in the Software without
12# restriction, including without limitation the rights to use,
13# copy, modify, merge, publish, distribute, sublicense, and/or sell
14# copies of the Software, and to permit persons to whom the
15# Software is furnished to do so, subject to the following
16# conditions:
17#
18# The above copyright notice and this permission notice shall be
19# included in all copies or substantial portions of the Software.
20#
21# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
22# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
23# OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
24# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
25# HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
26# WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
27# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
28# OTHER DEALINGS IN THE SOFTWARE.
29#
30"""Read from and write to tar format archives.
31"""
32
33__version__ = "$Revision$"
34# $Source$
35
Georg Brandl38c6a222006-05-10 16:26:03 +000036version = "0.8.0"
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +000037__author__ = "Lars Gustäbel (lars@gustaebel.de)"
38__date__ = "$Date$"
39__cvsid__ = "$Id$"
40__credits__ = "Gustavo Niemeyer, Niels Gustäbel, Richard Townsend."
41
42#---------
43# Imports
44#---------
45import sys
46import os
47import shutil
48import stat
49import errno
50import time
51import struct
Georg Brandl2527f7f2006-10-29 09:16:15 +000052import copy
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +000053
Jack Jansencfc49022003-03-07 13:37:32 +000054if sys.platform == 'mac':
55 # This module needs work for MacOS9, especially in the area of pathname
56 # handling. In many places it is assumed a simple substitution of / by the
57 # local os.path.sep is good enough to convert pathnames, but this does not
58 # work with the mac rooted:path:name versus :nonrooted:path:name syntax
59 raise ImportError, "tarfile does not work for platform==mac"
60
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +000061try:
62 import grp, pwd
63except ImportError:
64 grp = pwd = None
65
66# from tarfile import *
67__all__ = ["TarFile", "TarInfo", "is_tarfile", "TarError"]
68
69#---------------------------------------------------------
70# tar constants
71#---------------------------------------------------------
72NUL = "\0" # the null character
73BLOCKSIZE = 512 # length of processing blocks
74RECORDSIZE = BLOCKSIZE * 20 # length of records
75MAGIC = "ustar" # magic tar string
76VERSION = "00" # version number
77
78LENGTH_NAME = 100 # maximum length of a filename
79LENGTH_LINK = 100 # maximum length of a linkname
80LENGTH_PREFIX = 155 # maximum length of the prefix field
81MAXSIZE_MEMBER = 077777777777L # maximum size of a file (11 octal digits)
82
83REGTYPE = "0" # regular file
84AREGTYPE = "\0" # regular file
85LNKTYPE = "1" # link (inside tarfile)
86SYMTYPE = "2" # symbolic link
87CHRTYPE = "3" # character special device
88BLKTYPE = "4" # block special device
89DIRTYPE = "5" # directory
90FIFOTYPE = "6" # fifo special device
91CONTTYPE = "7" # contiguous file
92
93GNUTYPE_LONGNAME = "L" # GNU tar extension for longnames
94GNUTYPE_LONGLINK = "K" # GNU tar extension for longlink
95GNUTYPE_SPARSE = "S" # GNU tar extension for sparse file
96
97#---------------------------------------------------------
98# tarfile constants
99#---------------------------------------------------------
100SUPPORTED_TYPES = (REGTYPE, AREGTYPE, LNKTYPE, # file types that tarfile
101 SYMTYPE, DIRTYPE, FIFOTYPE, # can cope with.
102 CONTTYPE, CHRTYPE, BLKTYPE,
103 GNUTYPE_LONGNAME, GNUTYPE_LONGLINK,
104 GNUTYPE_SPARSE)
105
106REGULAR_TYPES = (REGTYPE, AREGTYPE, # file types that somehow
107 CONTTYPE, GNUTYPE_SPARSE) # represent regular files
108
109#---------------------------------------------------------
110# Bits used in the mode field, values in octal.
111#---------------------------------------------------------
112S_IFLNK = 0120000 # symbolic link
113S_IFREG = 0100000 # regular file
114S_IFBLK = 0060000 # block device
115S_IFDIR = 0040000 # directory
116S_IFCHR = 0020000 # character device
117S_IFIFO = 0010000 # fifo
118
119TSUID = 04000 # set UID on execution
120TSGID = 02000 # set GID on execution
121TSVTX = 01000 # reserved
122
123TUREAD = 0400 # read by owner
124TUWRITE = 0200 # write by owner
125TUEXEC = 0100 # execute/search by owner
126TGREAD = 0040 # read by group
127TGWRITE = 0020 # write by group
128TGEXEC = 0010 # execute/search by group
129TOREAD = 0004 # read by other
130TOWRITE = 0002 # write by other
131TOEXEC = 0001 # execute/search by other
132
133#---------------------------------------------------------
134# Some useful functions
135#---------------------------------------------------------
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000136
Georg Brandl38c6a222006-05-10 16:26:03 +0000137def stn(s, length):
138 """Convert a python string to a null-terminated string buffer.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000139 """
Georg Brandlee23f4b2006-10-24 16:54:23 +0000140 return s[:length] + (length - len(s)) * NUL
Georg Brandl38c6a222006-05-10 16:26:03 +0000141
142def nti(s):
143 """Convert a number field to a python number.
144 """
145 # There are two possible encodings for a number field, see
146 # itn() below.
147 if s[0] != chr(0200):
Georg Brandl58bf57f2006-10-12 12:03:11 +0000148 n = int(s.rstrip(NUL + " ") or "0", 8)
Georg Brandl38c6a222006-05-10 16:26:03 +0000149 else:
150 n = 0L
151 for i in xrange(len(s) - 1):
152 n <<= 8
153 n += ord(s[i + 1])
154 return n
155
156def itn(n, digits=8, posix=False):
157 """Convert a python number to a number field.
158 """
159 # POSIX 1003.1-1988 requires numbers to be encoded as a string of
160 # octal digits followed by a null-byte, this allows values up to
161 # (8**(digits-1))-1. GNU tar allows storing numbers greater than
162 # that if necessary. A leading 0200 byte indicates this particular
163 # encoding, the following digits-1 bytes are a big-endian
164 # representation. This allows values up to (256**(digits-1))-1.
165 if 0 <= n < 8 ** (digits - 1):
166 s = "%0*o" % (digits - 1, n) + NUL
167 else:
168 if posix:
Georg Brandle4751e32006-05-18 06:11:19 +0000169 raise ValueError("overflow in number field")
Georg Brandl38c6a222006-05-10 16:26:03 +0000170
171 if n < 0:
172 # XXX We mimic GNU tar's behaviour with negative numbers,
173 # this could raise OverflowError.
174 n = struct.unpack("L", struct.pack("l", n))[0]
175
176 s = ""
177 for i in xrange(digits - 1):
178 s = chr(n & 0377) + s
179 n >>= 8
180 s = chr(0200) + s
181 return s
182
183def calc_chksums(buf):
184 """Calculate the checksum for a member's header by summing up all
185 characters except for the chksum field which is treated as if
186 it was filled with spaces. According to the GNU tar sources,
187 some tars (Sun and NeXT) calculate chksum with signed char,
188 which will be different if there are chars in the buffer with
189 the high bit set. So we calculate two checksums, unsigned and
190 signed.
191 """
192 unsigned_chksum = 256 + sum(struct.unpack("148B", buf[:148]) + struct.unpack("356B", buf[156:512]))
193 signed_chksum = 256 + sum(struct.unpack("148b", buf[:148]) + struct.unpack("356b", buf[156:512]))
194 return unsigned_chksum, signed_chksum
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000195
196def copyfileobj(src, dst, length=None):
197 """Copy length bytes from fileobj src to fileobj dst.
198 If length is None, copy the entire content.
199 """
200 if length == 0:
201 return
202 if length is None:
203 shutil.copyfileobj(src, dst)
204 return
205
206 BUFSIZE = 16 * 1024
207 blocks, remainder = divmod(length, BUFSIZE)
208 for b in xrange(blocks):
209 buf = src.read(BUFSIZE)
210 if len(buf) < BUFSIZE:
Georg Brandle4751e32006-05-18 06:11:19 +0000211 raise IOError("end of file reached")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000212 dst.write(buf)
213
214 if remainder != 0:
215 buf = src.read(remainder)
216 if len(buf) < remainder:
Georg Brandle4751e32006-05-18 06:11:19 +0000217 raise IOError("end of file reached")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000218 dst.write(buf)
219 return
220
221filemode_table = (
Andrew M. Kuchling8bc462f2004-10-20 11:48:42 +0000222 ((S_IFLNK, "l"),
223 (S_IFREG, "-"),
224 (S_IFBLK, "b"),
225 (S_IFDIR, "d"),
226 (S_IFCHR, "c"),
227 (S_IFIFO, "p")),
228
229 ((TUREAD, "r"),),
230 ((TUWRITE, "w"),),
231 ((TUEXEC|TSUID, "s"),
232 (TSUID, "S"),
233 (TUEXEC, "x")),
234
235 ((TGREAD, "r"),),
236 ((TGWRITE, "w"),),
237 ((TGEXEC|TSGID, "s"),
238 (TSGID, "S"),
239 (TGEXEC, "x")),
240
241 ((TOREAD, "r"),),
242 ((TOWRITE, "w"),),
243 ((TOEXEC|TSVTX, "t"),
244 (TSVTX, "T"),
245 (TOEXEC, "x"))
246)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000247
248def filemode(mode):
249 """Convert a file's mode to a string of the form
250 -rwxrwxrwx.
251 Used by TarFile.list()
252 """
Andrew M. Kuchling8bc462f2004-10-20 11:48:42 +0000253 perm = []
254 for table in filemode_table:
255 for bit, char in table:
256 if mode & bit == bit:
257 perm.append(char)
258 break
259 else:
260 perm.append("-")
261 return "".join(perm)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000262
263if os.sep != "/":
264 normpath = lambda path: os.path.normpath(path).replace(os.sep, "/")
265else:
266 normpath = os.path.normpath
267
268class TarError(Exception):
269 """Base exception."""
270 pass
271class ExtractError(TarError):
272 """General exception for extract errors."""
273 pass
274class ReadError(TarError):
275 """Exception for unreadble tar archives."""
276 pass
277class CompressionError(TarError):
278 """Exception for unavailable compression methods."""
279 pass
280class StreamError(TarError):
281 """Exception for unsupported operations on stream-like TarFiles."""
282 pass
283
284#---------------------------
285# internal stream interface
286#---------------------------
287class _LowLevelFile:
288 """Low-level file object. Supports reading and writing.
289 It is used instead of a regular file object for streaming
290 access.
291 """
292
293 def __init__(self, name, mode):
294 mode = {
295 "r": os.O_RDONLY,
296 "w": os.O_WRONLY | os.O_CREAT | os.O_TRUNC,
297 }[mode]
298 if hasattr(os, "O_BINARY"):
299 mode |= os.O_BINARY
300 self.fd = os.open(name, mode)
301
302 def close(self):
303 os.close(self.fd)
304
305 def read(self, size):
306 return os.read(self.fd, size)
307
308 def write(self, s):
309 os.write(self.fd, s)
310
311class _Stream:
312 """Class that serves as an adapter between TarFile and
313 a stream-like object. The stream-like object only
314 needs to have a read() or write() method and is accessed
315 blockwise. Use of gzip or bzip2 compression is possible.
316 A stream-like object could be for example: sys.stdin,
317 sys.stdout, a socket, a tape device etc.
318
319 _Stream is intended to be used only internally.
320 """
321
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000322 def __init__(self, name, mode, comptype, fileobj, bufsize):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000323 """Construct a _Stream object.
324 """
325 self._extfileobj = True
326 if fileobj is None:
327 fileobj = _LowLevelFile(name, mode)
328 self._extfileobj = False
329
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000330 if comptype == '*':
331 # Enable transparent compression detection for the
332 # stream interface
333 fileobj = _StreamProxy(fileobj)
334 comptype = fileobj.getcomptype()
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000335
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000336 self.name = name or ""
337 self.mode = mode
338 self.comptype = comptype
339 self.fileobj = fileobj
340 self.bufsize = bufsize
341 self.buf = ""
342 self.pos = 0L
343 self.closed = False
344
345 if comptype == "gz":
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000346 try:
347 import zlib
348 except ImportError:
Georg Brandle4751e32006-05-18 06:11:19 +0000349 raise CompressionError("zlib module is not available")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000350 self.zlib = zlib
351 self.crc = zlib.crc32("")
352 if mode == "r":
353 self._init_read_gz()
354 else:
355 self._init_write_gz()
356
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000357 if comptype == "bz2":
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000358 try:
359 import bz2
360 except ImportError:
Georg Brandle4751e32006-05-18 06:11:19 +0000361 raise CompressionError("bz2 module is not available")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000362 if mode == "r":
363 self.dbuf = ""
364 self.cmp = bz2.BZ2Decompressor()
365 else:
366 self.cmp = bz2.BZ2Compressor()
367
368 def __del__(self):
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000369 if hasattr(self, "closed") and not self.closed:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000370 self.close()
371
372 def _init_write_gz(self):
373 """Initialize for writing with gzip compression.
374 """
375 self.cmp = self.zlib.compressobj(9, self.zlib.DEFLATED,
376 -self.zlib.MAX_WBITS,
377 self.zlib.DEF_MEM_LEVEL,
378 0)
379 timestamp = struct.pack("<L", long(time.time()))
380 self.__write("\037\213\010\010%s\002\377" % timestamp)
381 if self.name.endswith(".gz"):
382 self.name = self.name[:-3]
383 self.__write(self.name + NUL)
384
385 def write(self, s):
386 """Write string s to the stream.
387 """
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000388 if self.comptype == "gz":
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000389 self.crc = self.zlib.crc32(s, self.crc)
390 self.pos += len(s)
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000391 if self.comptype != "tar":
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000392 s = self.cmp.compress(s)
393 self.__write(s)
394
395 def __write(self, s):
396 """Write string s to the stream if a whole new block
397 is ready to be written.
398 """
399 self.buf += s
400 while len(self.buf) > self.bufsize:
401 self.fileobj.write(self.buf[:self.bufsize])
402 self.buf = self.buf[self.bufsize:]
403
404 def close(self):
405 """Close the _Stream object. No operation should be
406 done on it afterwards.
407 """
408 if self.closed:
409 return
410
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000411 if self.mode == "w" and self.comptype != "tar":
Martin v. Löwisc234a522004-08-22 21:28:33 +0000412 self.buf += self.cmp.flush()
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000413
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000414 if self.mode == "w" and self.buf:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000415 self.fileobj.write(self.buf)
416 self.buf = ""
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000417 if self.comptype == "gz":
Tim Petersa05f6e22006-08-02 05:20:08 +0000418 # The native zlib crc is an unsigned 32-bit integer, but
419 # the Python wrapper implicitly casts that to a signed C
420 # long. So, on a 32-bit box self.crc may "look negative",
421 # while the same crc on a 64-bit box may "look positive".
422 # To avoid irksome warnings from the `struct` module, force
423 # it to look positive on all boxes.
424 self.fileobj.write(struct.pack("<L", self.crc & 0xffffffffL))
Andrew M. Kuchling10a44492003-10-24 17:38:34 +0000425 self.fileobj.write(struct.pack("<L", self.pos & 0xffffFFFFL))
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000426
427 if not self._extfileobj:
428 self.fileobj.close()
429
430 self.closed = True
431
432 def _init_read_gz(self):
433 """Initialize for reading a gzip compressed fileobj.
434 """
435 self.cmp = self.zlib.decompressobj(-self.zlib.MAX_WBITS)
436 self.dbuf = ""
437
438 # taken from gzip.GzipFile with some alterations
439 if self.__read(2) != "\037\213":
Georg Brandle4751e32006-05-18 06:11:19 +0000440 raise ReadError("not a gzip file")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000441 if self.__read(1) != "\010":
Georg Brandle4751e32006-05-18 06:11:19 +0000442 raise CompressionError("unsupported compression method")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000443
444 flag = ord(self.__read(1))
445 self.__read(6)
446
447 if flag & 4:
448 xlen = ord(self.__read(1)) + 256 * ord(self.__read(1))
449 self.read(xlen)
450 if flag & 8:
451 while True:
452 s = self.__read(1)
453 if not s or s == NUL:
454 break
455 if flag & 16:
456 while True:
457 s = self.__read(1)
458 if not s or s == NUL:
459 break
460 if flag & 2:
461 self.__read(2)
462
463 def tell(self):
464 """Return the stream's file pointer position.
465 """
466 return self.pos
467
468 def seek(self, pos=0):
469 """Set the stream's file pointer to pos. Negative seeking
470 is forbidden.
471 """
472 if pos - self.pos >= 0:
473 blocks, remainder = divmod(pos - self.pos, self.bufsize)
474 for i in xrange(blocks):
475 self.read(self.bufsize)
476 self.read(remainder)
477 else:
Georg Brandle4751e32006-05-18 06:11:19 +0000478 raise StreamError("seeking backwards is not allowed")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000479 return self.pos
480
481 def read(self, size=None):
482 """Return the next size number of bytes from the stream.
483 If size is not defined, return all bytes of the stream
484 up to EOF.
485 """
486 if size is None:
487 t = []
488 while True:
489 buf = self._read(self.bufsize)
490 if not buf:
491 break
492 t.append(buf)
493 buf = "".join(t)
494 else:
495 buf = self._read(size)
496 self.pos += len(buf)
497 return buf
498
499 def _read(self, size):
500 """Return size bytes from the stream.
501 """
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000502 if self.comptype == "tar":
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000503 return self.__read(size)
504
505 c = len(self.dbuf)
506 t = [self.dbuf]
507 while c < size:
508 buf = self.__read(self.bufsize)
509 if not buf:
510 break
511 buf = self.cmp.decompress(buf)
512 t.append(buf)
513 c += len(buf)
514 t = "".join(t)
515 self.dbuf = t[size:]
516 return t[:size]
517
518 def __read(self, size):
519 """Return size bytes from stream. If internal buffer is empty,
520 read another block from the stream.
521 """
522 c = len(self.buf)
523 t = [self.buf]
524 while c < size:
525 buf = self.fileobj.read(self.bufsize)
526 if not buf:
527 break
528 t.append(buf)
529 c += len(buf)
530 t = "".join(t)
531 self.buf = t[size:]
532 return t[:size]
533# class _Stream
534
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000535class _StreamProxy(object):
536 """Small proxy class that enables transparent compression
537 detection for the Stream interface (mode 'r|*').
538 """
539
540 def __init__(self, fileobj):
541 self.fileobj = fileobj
542 self.buf = self.fileobj.read(BLOCKSIZE)
543
544 def read(self, size):
545 self.read = self.fileobj.read
546 return self.buf
547
548 def getcomptype(self):
549 if self.buf.startswith("\037\213\010"):
550 return "gz"
551 if self.buf.startswith("BZh91"):
552 return "bz2"
553 return "tar"
554
555 def close(self):
556 self.fileobj.close()
557# class StreamProxy
558
Georg Brandl49c8f4c2006-05-15 19:30:35 +0000559class _BZ2Proxy(object):
560 """Small proxy class that enables external file object
561 support for "r:bz2" and "w:bz2" modes. This is actually
562 a workaround for a limitation in bz2 module's BZ2File
563 class which (unlike gzip.GzipFile) has no support for
564 a file object argument.
565 """
566
567 blocksize = 16 * 1024
568
569 def __init__(self, fileobj, mode):
570 self.fileobj = fileobj
571 self.mode = mode
572 self.init()
573
574 def init(self):
575 import bz2
576 self.pos = 0
577 if self.mode == "r":
578 self.bz2obj = bz2.BZ2Decompressor()
579 self.fileobj.seek(0)
580 self.buf = ""
581 else:
582 self.bz2obj = bz2.BZ2Compressor()
583
584 def read(self, size):
585 b = [self.buf]
586 x = len(self.buf)
587 while x < size:
588 try:
589 raw = self.fileobj.read(self.blocksize)
590 data = self.bz2obj.decompress(raw)
591 b.append(data)
592 except EOFError:
593 break
594 x += len(data)
595 self.buf = "".join(b)
596
597 buf = self.buf[:size]
598 self.buf = self.buf[size:]
599 self.pos += len(buf)
600 return buf
601
602 def seek(self, pos):
603 if pos < self.pos:
604 self.init()
605 self.read(pos - self.pos)
606
607 def tell(self):
608 return self.pos
609
610 def write(self, data):
611 self.pos += len(data)
612 raw = self.bz2obj.compress(data)
613 self.fileobj.write(raw)
614
615 def close(self):
616 if self.mode == "w":
617 raw = self.bz2obj.flush()
618 self.fileobj.write(raw)
Georg Brandle8953182006-05-27 14:02:03 +0000619 self.fileobj.close()
Georg Brandl49c8f4c2006-05-15 19:30:35 +0000620# class _BZ2Proxy
621
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000622#------------------------
623# Extraction file object
624#------------------------
Lars Gustäbelaedb92e2006-12-23 16:51:47 +0000625class _FileInFile(object):
626 """A thin wrapper around an existing file object that
627 provides a part of its data as an individual file
628 object.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000629 """
630
Lars Gustäbelaedb92e2006-12-23 16:51:47 +0000631 def __init__(self, fileobj, offset, size, sparse=None):
632 self.fileobj = fileobj
633 self.offset = offset
634 self.size = size
635 self.sparse = sparse
636 self.position = 0
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000637
Lars Gustäbelaedb92e2006-12-23 16:51:47 +0000638 def tell(self):
639 """Return the current file position.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000640 """
Lars Gustäbelaedb92e2006-12-23 16:51:47 +0000641 return self.position
642
643 def seek(self, position):
644 """Seek to a position in the file.
645 """
646 self.position = position
647
648 def read(self, size=None):
649 """Read data from the file.
650 """
651 if size is None:
652 size = self.size - self.position
653 else:
654 size = min(size, self.size - self.position)
655
656 if self.sparse is None:
657 return self.readnormal(size)
658 else:
659 return self.readsparse(size)
660
661 def readnormal(self, size):
662 """Read operation for regular files.
663 """
664 self.fileobj.seek(self.offset + self.position)
665 self.position += size
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000666 return self.fileobj.read(size)
667
Lars Gustäbelaedb92e2006-12-23 16:51:47 +0000668 def readsparse(self, size):
669 """Read operation for sparse files.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000670 """
Lars Gustäbelaedb92e2006-12-23 16:51:47 +0000671 data = []
672 while size > 0:
673 buf = self.readsparsesection(size)
674 if not buf:
675 break
676 size -= len(buf)
677 data.append(buf)
678 return "".join(data)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000679
Lars Gustäbelaedb92e2006-12-23 16:51:47 +0000680 def readsparsesection(self, size):
681 """Read a single section of a sparse file.
682 """
683 section = self.sparse.find(self.position)
684
685 if section is None:
686 return ""
687
688 size = min(size, section.offset + section.size - self.position)
689
690 if isinstance(section, _data):
691 realpos = section.realpos + self.position - section.offset
692 self.fileobj.seek(self.offset + realpos)
693 self.position += size
694 return self.fileobj.read(size)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000695 else:
Lars Gustäbelaedb92e2006-12-23 16:51:47 +0000696 self.position += size
697 return NUL * size
698#class _FileInFile
699
700
701class ExFileObject(object):
702 """File-like object for reading an archive member.
703 Is returned by TarFile.extractfile().
704 """
705 blocksize = 1024
706
707 def __init__(self, tarfile, tarinfo):
708 self.fileobj = _FileInFile(tarfile.fileobj,
709 tarinfo.offset_data,
710 tarinfo.size,
711 getattr(tarinfo, "sparse", None))
712 self.name = tarinfo.name
713 self.mode = "r"
714 self.closed = False
715 self.size = tarinfo.size
716
717 self.position = 0
718 self.buffer = ""
719
720 def read(self, size=None):
721 """Read at most size bytes from the file. If size is not
722 present or None, read all data until EOF is reached.
723 """
724 if self.closed:
725 raise ValueError("I/O operation on closed file")
726
727 buf = ""
728 if self.buffer:
729 if size is None:
730 buf = self.buffer
731 self.buffer = ""
732 else:
733 buf = self.buffer[:size]
734 self.buffer = self.buffer[size:]
735
736 if size is None:
737 buf += self.fileobj.read()
738 else:
739 buf += self.fileobj.read(size - len(buf))
740
741 self.position += len(buf)
742 return buf
743
744 def readline(self, size=-1):
745 """Read one entire line from the file. If size is present
746 and non-negative, return a string with at most that
747 size, which may be an incomplete line.
748 """
749 if self.closed:
750 raise ValueError("I/O operation on closed file")
751
752 if "\n" in self.buffer:
753 pos = self.buffer.find("\n") + 1
754 else:
755 buffers = [self.buffer]
756 while True:
757 buf = self.fileobj.read(self.blocksize)
758 buffers.append(buf)
759 if not buf or "\n" in buf:
760 self.buffer = "".join(buffers)
761 pos = self.buffer.find("\n") + 1
762 if pos == 0:
763 # no newline found.
764 pos = len(self.buffer)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000765 break
Lars Gustäbelaedb92e2006-12-23 16:51:47 +0000766
767 if size != -1:
768 pos = min(size, pos)
769
770 buf = self.buffer[:pos]
771 self.buffer = self.buffer[pos:]
772 self.position += len(buf)
773 return buf
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000774
775 def readlines(self):
Lars Gustäbelaedb92e2006-12-23 16:51:47 +0000776 """Return a list with all remaining lines.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000777 """
778 result = []
779 while True:
780 line = self.readline()
781 if not line: break
782 result.append(line)
783 return result
784
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000785 def tell(self):
786 """Return the current file position.
787 """
Lars Gustäbelaedb92e2006-12-23 16:51:47 +0000788 if self.closed:
789 raise ValueError("I/O operation on closed file")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000790
Lars Gustäbelaedb92e2006-12-23 16:51:47 +0000791 return self.position
792
793 def seek(self, pos, whence=os.SEEK_SET):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000794 """Seek to a position in the file.
795 """
Lars Gustäbelaedb92e2006-12-23 16:51:47 +0000796 if self.closed:
797 raise ValueError("I/O operation on closed file")
798
799 if whence == os.SEEK_SET:
800 self.position = min(max(pos, 0), self.size)
801 elif whence == os.SEEK_CUR:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000802 if pos < 0:
Lars Gustäbelaedb92e2006-12-23 16:51:47 +0000803 self.position = max(self.position + pos, 0)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000804 else:
Lars Gustäbelaedb92e2006-12-23 16:51:47 +0000805 self.position = min(self.position + pos, self.size)
806 elif whence == os.SEEK_END:
807 self.position = max(min(self.size + pos, self.size), 0)
808 else:
809 raise ValueError("Invalid argument")
810
811 self.buffer = ""
812 self.fileobj.seek(self.position)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000813
814 def close(self):
815 """Close the file object.
816 """
817 self.closed = True
Martin v. Löwisdf241532005-03-03 08:17:42 +0000818
819 def __iter__(self):
Lars Gustäbelaedb92e2006-12-23 16:51:47 +0000820 """Get an iterator over the file's lines.
Martin v. Löwisdf241532005-03-03 08:17:42 +0000821 """
Lars Gustäbelaedb92e2006-12-23 16:51:47 +0000822 while True:
823 line = self.readline()
824 if not line:
825 break
826 yield line
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000827#class ExFileObject
828
829#------------------
830# Exported Classes
831#------------------
832class TarInfo(object):
833 """Informational class which holds the details about an
834 archive member given by a tar header block.
835 TarInfo objects are returned by TarFile.getmember(),
836 TarFile.getmembers() and TarFile.gettarinfo() and are
837 usually created internally.
838 """
839
840 def __init__(self, name=""):
841 """Construct a TarInfo object. name is the optional name
842 of the member.
843 """
Georg Brandl38c6a222006-05-10 16:26:03 +0000844 self.name = name # member name (dirnames must end with '/')
845 self.mode = 0666 # file permissions
846 self.uid = 0 # user id
847 self.gid = 0 # group id
848 self.size = 0 # file size
849 self.mtime = 0 # modification time
850 self.chksum = 0 # header checksum
851 self.type = REGTYPE # member type
852 self.linkname = "" # link name
853 self.uname = "user" # user name
854 self.gname = "group" # group name
855 self.devmajor = 0 # device major number
856 self.devminor = 0 # device minor number
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000857
Georg Brandl38c6a222006-05-10 16:26:03 +0000858 self.offset = 0 # the tar header starts here
859 self.offset_data = 0 # the file's data starts here
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000860
861 def __repr__(self):
862 return "<%s %r at %#x>" % (self.__class__.__name__,self.name,id(self))
863
Guido van Rossum75b64e62005-01-16 00:16:11 +0000864 @classmethod
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000865 def frombuf(cls, buf):
866 """Construct a TarInfo object from a 512 byte string buffer.
867 """
Georg Brandl38c6a222006-05-10 16:26:03 +0000868 if len(buf) != BLOCKSIZE:
Georg Brandle4751e32006-05-18 06:11:19 +0000869 raise ValueError("truncated header")
Georg Brandl38c6a222006-05-10 16:26:03 +0000870 if buf.count(NUL) == BLOCKSIZE:
Georg Brandle4751e32006-05-18 06:11:19 +0000871 raise ValueError("empty header")
Georg Brandl38c6a222006-05-10 16:26:03 +0000872
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000873 tarinfo = cls()
Georg Brandl38c6a222006-05-10 16:26:03 +0000874 tarinfo.buf = buf
Georg Brandle8953182006-05-27 14:02:03 +0000875 tarinfo.name = buf[0:100].rstrip(NUL)
Georg Brandl38c6a222006-05-10 16:26:03 +0000876 tarinfo.mode = nti(buf[100:108])
877 tarinfo.uid = nti(buf[108:116])
878 tarinfo.gid = nti(buf[116:124])
879 tarinfo.size = nti(buf[124:136])
880 tarinfo.mtime = nti(buf[136:148])
881 tarinfo.chksum = nti(buf[148:156])
882 tarinfo.type = buf[156:157]
Georg Brandle8953182006-05-27 14:02:03 +0000883 tarinfo.linkname = buf[157:257].rstrip(NUL)
884 tarinfo.uname = buf[265:297].rstrip(NUL)
885 tarinfo.gname = buf[297:329].rstrip(NUL)
Georg Brandl38c6a222006-05-10 16:26:03 +0000886 tarinfo.devmajor = nti(buf[329:337])
887 tarinfo.devminor = nti(buf[337:345])
Georg Brandl2527f7f2006-10-29 09:16:15 +0000888 prefix = buf[345:500].rstrip(NUL)
889
890 if prefix and not tarinfo.issparse():
891 tarinfo.name = prefix + "/" + tarinfo.name
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000892
Georg Brandl38c6a222006-05-10 16:26:03 +0000893 if tarinfo.chksum not in calc_chksums(buf):
Georg Brandle4751e32006-05-18 06:11:19 +0000894 raise ValueError("invalid header")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000895 return tarinfo
896
Georg Brandl38c6a222006-05-10 16:26:03 +0000897 def tobuf(self, posix=False):
Georg Brandl2527f7f2006-10-29 09:16:15 +0000898 """Return a tar header as a string of 512 byte blocks.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000899 """
Georg Brandl2527f7f2006-10-29 09:16:15 +0000900 buf = ""
901 type = self.type
902 prefix = ""
903
904 if self.name.endswith("/"):
905 type = DIRTYPE
906
Georg Brandl25f58f62006-12-06 22:21:23 +0000907 if type in (GNUTYPE_LONGNAME, GNUTYPE_LONGLINK):
908 # Prevent "././@LongLink" from being normalized.
909 name = self.name
910 else:
911 name = normpath(self.name)
Georg Brandl2527f7f2006-10-29 09:16:15 +0000912
913 if type == DIRTYPE:
914 # directories should end with '/'
915 name += "/"
916
917 linkname = self.linkname
918 if linkname:
919 # if linkname is empty we end up with a '.'
920 linkname = normpath(linkname)
921
922 if posix:
923 if self.size > MAXSIZE_MEMBER:
924 raise ValueError("file is too large (>= 8 GB)")
925
926 if len(self.linkname) > LENGTH_LINK:
927 raise ValueError("linkname is too long (>%d)" % (LENGTH_LINK))
928
929 if len(name) > LENGTH_NAME:
930 prefix = name[:LENGTH_PREFIX + 1]
931 while prefix and prefix[-1] != "/":
932 prefix = prefix[:-1]
933
934 name = name[len(prefix):]
935 prefix = prefix[:-1]
936
937 if not prefix or len(name) > LENGTH_NAME:
938 raise ValueError("name is too long")
939
940 else:
941 if len(self.linkname) > LENGTH_LINK:
942 buf += self._create_gnulong(self.linkname, GNUTYPE_LONGLINK)
943
944 if len(name) > LENGTH_NAME:
945 buf += self._create_gnulong(name, GNUTYPE_LONGNAME)
946
Georg Brandl38c6a222006-05-10 16:26:03 +0000947 parts = [
Georg Brandl2527f7f2006-10-29 09:16:15 +0000948 stn(name, 100),
Georg Brandl38c6a222006-05-10 16:26:03 +0000949 itn(self.mode & 07777, 8, posix),
950 itn(self.uid, 8, posix),
951 itn(self.gid, 8, posix),
952 itn(self.size, 12, posix),
953 itn(self.mtime, 12, posix),
954 " ", # checksum field
Georg Brandl2527f7f2006-10-29 09:16:15 +0000955 type,
Georg Brandl38c6a222006-05-10 16:26:03 +0000956 stn(self.linkname, 100),
957 stn(MAGIC, 6),
958 stn(VERSION, 2),
959 stn(self.uname, 32),
960 stn(self.gname, 32),
961 itn(self.devmajor, 8, posix),
962 itn(self.devminor, 8, posix),
Georg Brandl2527f7f2006-10-29 09:16:15 +0000963 stn(prefix, 155)
Georg Brandl38c6a222006-05-10 16:26:03 +0000964 ]
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000965
Georg Brandl2527f7f2006-10-29 09:16:15 +0000966 buf += struct.pack("%ds" % BLOCKSIZE, "".join(parts))
Georg Brandl25f58f62006-12-06 22:21:23 +0000967 chksum = calc_chksums(buf[-BLOCKSIZE:])[0]
Georg Brandl2527f7f2006-10-29 09:16:15 +0000968 buf = buf[:-364] + "%06o\0" % chksum + buf[-357:]
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000969 self.buf = buf
970 return buf
971
Georg Brandl2527f7f2006-10-29 09:16:15 +0000972 def _create_gnulong(self, name, type):
973 """Create a GNU longname/longlink header from name.
974 It consists of an extended tar header, with the length
975 of the longname as size, followed by data blocks,
976 which contain the longname as a null terminated string.
977 """
978 name += NUL
979
980 tarinfo = self.__class__()
981 tarinfo.name = "././@LongLink"
982 tarinfo.type = type
983 tarinfo.mode = 0
984 tarinfo.size = len(name)
985
986 # create extended header
987 buf = tarinfo.tobuf()
988 # create name blocks
989 buf += name
990 blocks, remainder = divmod(len(name), BLOCKSIZE)
991 if remainder > 0:
992 buf += (BLOCKSIZE - remainder) * NUL
993 return buf
994
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000995 def isreg(self):
996 return self.type in REGULAR_TYPES
997 def isfile(self):
998 return self.isreg()
999 def isdir(self):
1000 return self.type == DIRTYPE
1001 def issym(self):
1002 return self.type == SYMTYPE
1003 def islnk(self):
1004 return self.type == LNKTYPE
1005 def ischr(self):
1006 return self.type == CHRTYPE
1007 def isblk(self):
1008 return self.type == BLKTYPE
1009 def isfifo(self):
1010 return self.type == FIFOTYPE
1011 def issparse(self):
1012 return self.type == GNUTYPE_SPARSE
1013 def isdev(self):
1014 return self.type in (CHRTYPE, BLKTYPE, FIFOTYPE)
1015# class TarInfo
1016
1017class TarFile(object):
1018 """The TarFile Class provides an interface to tar archives.
1019 """
1020
1021 debug = 0 # May be set from 0 (no msgs) to 3 (all msgs)
1022
1023 dereference = False # If true, add content of linked file to the
1024 # tar file, else the link.
1025
1026 ignore_zeros = False # If true, skips empty or invalid blocks and
1027 # continues processing.
1028
1029 errorlevel = 0 # If 0, fatal errors only appear in debug
1030 # messages (if debug >= 0). If > 0, errors
1031 # are passed to the caller as exceptions.
1032
Martin v. Löwis75b9da42004-08-18 13:57:44 +00001033 posix = False # If True, generates POSIX.1-1990-compliant
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001034 # archives (no GNU extensions!)
1035
1036 fileobject = ExFileObject
1037
1038 def __init__(self, name=None, mode="r", fileobj=None):
1039 """Open an (uncompressed) tar archive `name'. `mode' is either 'r' to
1040 read from an existing archive, 'a' to append data to an existing
1041 file or 'w' to create a new file overwriting an existing one. `mode'
1042 defaults to 'r'.
1043 If `fileobj' is given, it is used for reading or writing data. If it
1044 can be determined, `mode' is overridden by `fileobj's mode.
1045 `fileobj' is not closed, when TarFile is closed.
1046 """
Martin v. Löwisfaffa152005-08-24 06:43:09 +00001047 self.name = name
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001048
1049 if len(mode) > 1 or mode not in "raw":
Georg Brandle4751e32006-05-18 06:11:19 +00001050 raise ValueError("mode must be 'r', 'a' or 'w'")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001051 self._mode = mode
1052 self.mode = {"r": "rb", "a": "r+b", "w": "wb"}[mode]
1053
1054 if not fileobj:
1055 fileobj = file(self.name, self.mode)
1056 self._extfileobj = False
1057 else:
1058 if self.name is None and hasattr(fileobj, "name"):
Martin v. Löwisfaffa152005-08-24 06:43:09 +00001059 self.name = fileobj.name
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001060 if hasattr(fileobj, "mode"):
1061 self.mode = fileobj.mode
1062 self._extfileobj = True
1063 self.fileobj = fileobj
1064
1065 # Init datastructures
Georg Brandl38c6a222006-05-10 16:26:03 +00001066 self.closed = False
1067 self.members = [] # list of members as TarInfo objects
1068 self._loaded = False # flag if all members have been read
1069 self.offset = 0L # current position in the archive file
1070 self.inodes = {} # dictionary caching the inodes of
1071 # archive members already added
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001072
1073 if self._mode == "r":
1074 self.firstmember = None
1075 self.firstmember = self.next()
1076
1077 if self._mode == "a":
1078 # Move to the end of the archive,
1079 # before the first empty block.
1080 self.firstmember = None
1081 while True:
1082 try:
1083 tarinfo = self.next()
1084 except ReadError:
1085 self.fileobj.seek(0)
1086 break
1087 if tarinfo is None:
1088 self.fileobj.seek(- BLOCKSIZE, 1)
1089 break
1090
1091 if self._mode in "aw":
1092 self._loaded = True
1093
1094 #--------------------------------------------------------------------------
1095 # Below are the classmethods which act as alternate constructors to the
1096 # TarFile class. The open() method is the only one that is needed for
1097 # public use; it is the "super"-constructor and is able to select an
1098 # adequate "sub"-constructor for a particular compression using the mapping
1099 # from OPEN_METH.
1100 #
1101 # This concept allows one to subclass TarFile without losing the comfort of
1102 # the super-constructor. A sub-constructor is registered and made available
1103 # by adding it to the mapping in OPEN_METH.
1104
Guido van Rossum75b64e62005-01-16 00:16:11 +00001105 @classmethod
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001106 def open(cls, name=None, mode="r", fileobj=None, bufsize=20*512):
1107 """Open a tar archive for reading, writing or appending. Return
1108 an appropriate TarFile class.
1109
1110 mode:
Martin v. Löwis78be7df2005-03-05 12:47:42 +00001111 'r' or 'r:*' open for reading with transparent compression
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001112 'r:' open for reading exclusively uncompressed
1113 'r:gz' open for reading with gzip compression
1114 'r:bz2' open for reading with bzip2 compression
1115 'a' or 'a:' open for appending
1116 'w' or 'w:' open for writing without compression
1117 'w:gz' open for writing with gzip compression
1118 'w:bz2' open for writing with bzip2 compression
Martin v. Löwis78be7df2005-03-05 12:47:42 +00001119
1120 'r|*' open a stream of tar blocks with transparent compression
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001121 'r|' open an uncompressed stream of tar blocks for reading
1122 'r|gz' open a gzip compressed stream of tar blocks
1123 'r|bz2' open a bzip2 compressed stream of tar blocks
1124 'w|' open an uncompressed stream for writing
1125 'w|gz' open a gzip compressed stream for writing
1126 'w|bz2' open a bzip2 compressed stream for writing
1127 """
1128
1129 if not name and not fileobj:
Georg Brandle4751e32006-05-18 06:11:19 +00001130 raise ValueError("nothing to open")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001131
Martin v. Löwis78be7df2005-03-05 12:47:42 +00001132 if mode in ("r", "r:*"):
1133 # Find out which *open() is appropriate for opening the file.
1134 for comptype in cls.OPEN_METH:
1135 func = getattr(cls, cls.OPEN_METH[comptype])
1136 try:
1137 return func(name, "r", fileobj)
1138 except (ReadError, CompressionError):
1139 continue
Georg Brandle4751e32006-05-18 06:11:19 +00001140 raise ReadError("file could not be opened successfully")
Martin v. Löwis78be7df2005-03-05 12:47:42 +00001141
1142 elif ":" in mode:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001143 filemode, comptype = mode.split(":", 1)
1144 filemode = filemode or "r"
1145 comptype = comptype or "tar"
1146
1147 # Select the *open() function according to
1148 # given compression.
1149 if comptype in cls.OPEN_METH:
1150 func = getattr(cls, cls.OPEN_METH[comptype])
1151 else:
Georg Brandle4751e32006-05-18 06:11:19 +00001152 raise CompressionError("unknown compression type %r" % comptype)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001153 return func(name, filemode, fileobj)
1154
1155 elif "|" in mode:
1156 filemode, comptype = mode.split("|", 1)
1157 filemode = filemode or "r"
1158 comptype = comptype or "tar"
1159
1160 if filemode not in "rw":
Georg Brandle4751e32006-05-18 06:11:19 +00001161 raise ValueError("mode must be 'r' or 'w'")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001162
1163 t = cls(name, filemode,
1164 _Stream(name, filemode, comptype, fileobj, bufsize))
1165 t._extfileobj = False
1166 return t
1167
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001168 elif mode in "aw":
1169 return cls.taropen(name, mode, fileobj)
1170
Georg Brandle4751e32006-05-18 06:11:19 +00001171 raise ValueError("undiscernible mode")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001172
Guido van Rossum75b64e62005-01-16 00:16:11 +00001173 @classmethod
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001174 def taropen(cls, name, mode="r", fileobj=None):
1175 """Open uncompressed tar archive name for reading or writing.
1176 """
1177 if len(mode) > 1 or mode not in "raw":
Georg Brandle4751e32006-05-18 06:11:19 +00001178 raise ValueError("mode must be 'r', 'a' or 'w'")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001179 return cls(name, mode, fileobj)
1180
Guido van Rossum75b64e62005-01-16 00:16:11 +00001181 @classmethod
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001182 def gzopen(cls, name, mode="r", fileobj=None, compresslevel=9):
1183 """Open gzip compressed tar archive name for reading or writing.
1184 Appending is not allowed.
1185 """
1186 if len(mode) > 1 or mode not in "rw":
Georg Brandle4751e32006-05-18 06:11:19 +00001187 raise ValueError("mode must be 'r' or 'w'")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001188
1189 try:
1190 import gzip
Neal Norwitz4ec68242003-04-11 03:05:56 +00001191 gzip.GzipFile
1192 except (ImportError, AttributeError):
Georg Brandle4751e32006-05-18 06:11:19 +00001193 raise CompressionError("gzip module is not available")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001194
1195 pre, ext = os.path.splitext(name)
Martin v. Löwisfaffa152005-08-24 06:43:09 +00001196 pre = os.path.basename(pre)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001197 if ext == ".tgz":
1198 ext = ".tar"
1199 if ext == ".gz":
1200 ext = ""
Martin v. Löwisfaffa152005-08-24 06:43:09 +00001201 tarname = pre + ext
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001202
1203 if fileobj is None:
1204 fileobj = file(name, mode + "b")
1205
Martin v. Löwisfaffa152005-08-24 06:43:09 +00001206 if mode != "r":
1207 name = tarname
1208
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001209 try:
Martin v. Löwisfaffa152005-08-24 06:43:09 +00001210 t = cls.taropen(tarname, mode,
1211 gzip.GzipFile(name, mode, compresslevel, fileobj)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001212 )
1213 except IOError:
Georg Brandle4751e32006-05-18 06:11:19 +00001214 raise ReadError("not a gzip file")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001215 t._extfileobj = False
1216 return t
1217
Guido van Rossum75b64e62005-01-16 00:16:11 +00001218 @classmethod
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001219 def bz2open(cls, name, mode="r", fileobj=None, compresslevel=9):
1220 """Open bzip2 compressed tar archive name for reading or writing.
1221 Appending is not allowed.
1222 """
1223 if len(mode) > 1 or mode not in "rw":
Georg Brandle4751e32006-05-18 06:11:19 +00001224 raise ValueError("mode must be 'r' or 'w'.")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001225
1226 try:
1227 import bz2
1228 except ImportError:
Georg Brandle4751e32006-05-18 06:11:19 +00001229 raise CompressionError("bz2 module is not available")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001230
Martin v. Löwisfaffa152005-08-24 06:43:09 +00001231 pre, ext = os.path.splitext(name)
1232 pre = os.path.basename(pre)
1233 if ext == ".tbz2":
1234 ext = ".tar"
1235 if ext == ".bz2":
1236 ext = ""
1237 tarname = pre + ext
1238
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001239 if fileobj is not None:
Georg Brandl49c8f4c2006-05-15 19:30:35 +00001240 fileobj = _BZ2Proxy(fileobj, mode)
1241 else:
1242 fileobj = bz2.BZ2File(name, mode, compresslevel=compresslevel)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001243
1244 try:
Georg Brandl49c8f4c2006-05-15 19:30:35 +00001245 t = cls.taropen(tarname, mode, fileobj)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001246 except IOError:
Georg Brandle4751e32006-05-18 06:11:19 +00001247 raise ReadError("not a bzip2 file")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001248 t._extfileobj = False
1249 return t
1250
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001251 # All *open() methods are registered here.
1252 OPEN_METH = {
1253 "tar": "taropen", # uncompressed tar
1254 "gz": "gzopen", # gzip compressed tar
1255 "bz2": "bz2open" # bzip2 compressed tar
1256 }
1257
1258 #--------------------------------------------------------------------------
1259 # The public methods which TarFile provides:
1260
1261 def close(self):
1262 """Close the TarFile. In write-mode, two finishing zero blocks are
1263 appended to the archive.
1264 """
1265 if self.closed:
1266 return
1267
1268 if self._mode in "aw":
1269 self.fileobj.write(NUL * (BLOCKSIZE * 2))
1270 self.offset += (BLOCKSIZE * 2)
1271 # fill up the end with zero-blocks
1272 # (like option -b20 for tar does)
1273 blocks, remainder = divmod(self.offset, RECORDSIZE)
1274 if remainder > 0:
1275 self.fileobj.write(NUL * (RECORDSIZE - remainder))
1276
1277 if not self._extfileobj:
1278 self.fileobj.close()
1279 self.closed = True
1280
1281 def getmember(self, name):
1282 """Return a TarInfo object for member `name'. If `name' can not be
1283 found in the archive, KeyError is raised. If a member occurs more
1284 than once in the archive, its last occurence is assumed to be the
1285 most up-to-date version.
1286 """
Martin v. Löwisf3c56112004-09-18 09:08:52 +00001287 tarinfo = self._getmember(name)
1288 if tarinfo is None:
Georg Brandle4751e32006-05-18 06:11:19 +00001289 raise KeyError("filename %r not found" % name)
Martin v. Löwisf3c56112004-09-18 09:08:52 +00001290 return tarinfo
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001291
1292 def getmembers(self):
1293 """Return the members of the archive as a list of TarInfo objects. The
1294 list has the same order as the members in the archive.
1295 """
1296 self._check()
1297 if not self._loaded: # if we want to obtain a list of
1298 self._load() # all members, we first have to
1299 # scan the whole archive.
1300 return self.members
1301
1302 def getnames(self):
1303 """Return the members of the archive as a list of their names. It has
1304 the same order as the list returned by getmembers().
1305 """
Martin v. Löwisf3c56112004-09-18 09:08:52 +00001306 return [tarinfo.name for tarinfo in self.getmembers()]
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001307
1308 def gettarinfo(self, name=None, arcname=None, fileobj=None):
1309 """Create a TarInfo object for either the file `name' or the file
1310 object `fileobj' (using os.fstat on its file descriptor). You can
1311 modify some of the TarInfo's attributes before you add it using
1312 addfile(). If given, `arcname' specifies an alternative name for the
1313 file in the archive.
1314 """
1315 self._check("aw")
1316
1317 # When fileobj is given, replace name by
1318 # fileobj's real name.
1319 if fileobj is not None:
1320 name = fileobj.name
1321
1322 # Building the name of the member in the archive.
1323 # Backward slashes are converted to forward slashes,
1324 # Absolute paths are turned to relative paths.
1325 if arcname is None:
1326 arcname = name
1327 arcname = normpath(arcname)
1328 drv, arcname = os.path.splitdrive(arcname)
1329 while arcname[0:1] == "/":
1330 arcname = arcname[1:]
1331
1332 # Now, fill the TarInfo object with
1333 # information specific for the file.
1334 tarinfo = TarInfo()
1335
1336 # Use os.stat or os.lstat, depending on platform
1337 # and if symlinks shall be resolved.
1338 if fileobj is None:
1339 if hasattr(os, "lstat") and not self.dereference:
1340 statres = os.lstat(name)
1341 else:
1342 statres = os.stat(name)
1343 else:
1344 statres = os.fstat(fileobj.fileno())
1345 linkname = ""
1346
1347 stmd = statres.st_mode
1348 if stat.S_ISREG(stmd):
1349 inode = (statres.st_ino, statres.st_dev)
Neal Norwitzb0e32e22005-10-20 04:50:13 +00001350 if not self.dereference and \
1351 statres.st_nlink > 1 and inode in self.inodes:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001352 # Is it a hardlink to an already
1353 # archived file?
1354 type = LNKTYPE
1355 linkname = self.inodes[inode]
1356 else:
1357 # The inode is added only if its valid.
1358 # For win32 it is always 0.
1359 type = REGTYPE
1360 if inode[0]:
1361 self.inodes[inode] = arcname
1362 elif stat.S_ISDIR(stmd):
1363 type = DIRTYPE
1364 if arcname[-1:] != "/":
1365 arcname += "/"
1366 elif stat.S_ISFIFO(stmd):
1367 type = FIFOTYPE
1368 elif stat.S_ISLNK(stmd):
1369 type = SYMTYPE
1370 linkname = os.readlink(name)
1371 elif stat.S_ISCHR(stmd):
1372 type = CHRTYPE
1373 elif stat.S_ISBLK(stmd):
1374 type = BLKTYPE
1375 else:
1376 return None
1377
1378 # Fill the TarInfo object with all
1379 # information we can get.
Martin v. Löwis5dbdc592005-08-27 10:07:56 +00001380 tarinfo.name = arcname
1381 tarinfo.mode = stmd
1382 tarinfo.uid = statres.st_uid
1383 tarinfo.gid = statres.st_gid
1384 if stat.S_ISREG(stmd):
Martin v. Löwis61d77e02004-08-20 06:35:46 +00001385 tarinfo.size = statres.st_size
Martin v. Löwis5dbdc592005-08-27 10:07:56 +00001386 else:
1387 tarinfo.size = 0L
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001388 tarinfo.mtime = statres.st_mtime
Martin v. Löwis5dbdc592005-08-27 10:07:56 +00001389 tarinfo.type = type
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001390 tarinfo.linkname = linkname
1391 if pwd:
1392 try:
1393 tarinfo.uname = pwd.getpwuid(tarinfo.uid)[0]
1394 except KeyError:
1395 pass
1396 if grp:
1397 try:
1398 tarinfo.gname = grp.getgrgid(tarinfo.gid)[0]
1399 except KeyError:
1400 pass
1401
1402 if type in (CHRTYPE, BLKTYPE):
1403 if hasattr(os, "major") and hasattr(os, "minor"):
1404 tarinfo.devmajor = os.major(statres.st_rdev)
1405 tarinfo.devminor = os.minor(statres.st_rdev)
1406 return tarinfo
1407
1408 def list(self, verbose=True):
1409 """Print a table of contents to sys.stdout. If `verbose' is False, only
1410 the names of the members are printed. If it is True, an `ls -l'-like
1411 output is produced.
1412 """
1413 self._check()
1414
1415 for tarinfo in self:
1416 if verbose:
1417 print filemode(tarinfo.mode),
1418 print "%s/%s" % (tarinfo.uname or tarinfo.uid,
1419 tarinfo.gname or tarinfo.gid),
1420 if tarinfo.ischr() or tarinfo.isblk():
1421 print "%10s" % ("%d,%d" \
1422 % (tarinfo.devmajor, tarinfo.devminor)),
1423 else:
1424 print "%10d" % tarinfo.size,
1425 print "%d-%02d-%02d %02d:%02d:%02d" \
1426 % time.localtime(tarinfo.mtime)[:6],
1427
1428 print tarinfo.name,
1429
1430 if verbose:
1431 if tarinfo.issym():
1432 print "->", tarinfo.linkname,
1433 if tarinfo.islnk():
1434 print "link to", tarinfo.linkname,
1435 print
1436
1437 def add(self, name, arcname=None, recursive=True):
1438 """Add the file `name' to the archive. `name' may be any type of file
1439 (directory, fifo, symbolic link, etc.). If given, `arcname'
1440 specifies an alternative name for the file in the archive.
1441 Directories are added recursively by default. This can be avoided by
1442 setting `recursive' to False.
1443 """
1444 self._check("aw")
1445
1446 if arcname is None:
1447 arcname = name
1448
1449 # Skip if somebody tries to archive the archive...
Martin v. Löwisfaffa152005-08-24 06:43:09 +00001450 if self.name is not None \
1451 and os.path.abspath(name) == os.path.abspath(self.name):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001452 self._dbg(2, "tarfile: Skipped %r" % name)
1453 return
1454
1455 # Special case: The user wants to add the current
1456 # working directory.
1457 if name == ".":
1458 if recursive:
1459 if arcname == ".":
1460 arcname = ""
1461 for f in os.listdir("."):
1462 self.add(f, os.path.join(arcname, f))
1463 return
1464
1465 self._dbg(1, name)
1466
1467 # Create a TarInfo object from the file.
1468 tarinfo = self.gettarinfo(name, arcname)
1469
1470 if tarinfo is None:
1471 self._dbg(1, "tarfile: Unsupported type %r" % name)
1472 return
1473
1474 # Append the tar header and data to the archive.
1475 if tarinfo.isreg():
1476 f = file(name, "rb")
1477 self.addfile(tarinfo, f)
1478 f.close()
1479
Martin v. Löwis5dbdc592005-08-27 10:07:56 +00001480 elif tarinfo.isdir():
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001481 self.addfile(tarinfo)
1482 if recursive:
1483 for f in os.listdir(name):
1484 self.add(os.path.join(name, f), os.path.join(arcname, f))
1485
Martin v. Löwis5dbdc592005-08-27 10:07:56 +00001486 else:
1487 self.addfile(tarinfo)
1488
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001489 def addfile(self, tarinfo, fileobj=None):
1490 """Add the TarInfo object `tarinfo' to the archive. If `fileobj' is
1491 given, tarinfo.size bytes are read from it and added to the archive.
1492 You can create TarInfo objects using gettarinfo().
1493 On Windows platforms, `fileobj' should always be opened with mode
1494 'rb' to avoid irritation about the file size.
1495 """
1496 self._check("aw")
1497
Georg Brandl2527f7f2006-10-29 09:16:15 +00001498 tarinfo = copy.copy(tarinfo)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001499
Georg Brandl2527f7f2006-10-29 09:16:15 +00001500 buf = tarinfo.tobuf(self.posix)
1501 self.fileobj.write(buf)
1502 self.offset += len(buf)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001503
1504 # If there's data to follow, append it.
1505 if fileobj is not None:
1506 copyfileobj(fileobj, self.fileobj, tarinfo.size)
1507 blocks, remainder = divmod(tarinfo.size, BLOCKSIZE)
1508 if remainder > 0:
1509 self.fileobj.write(NUL * (BLOCKSIZE - remainder))
1510 blocks += 1
1511 self.offset += blocks * BLOCKSIZE
1512
Martin v. Löwisf3c56112004-09-18 09:08:52 +00001513 self.members.append(tarinfo)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001514
Martin v. Löwis00a73e72005-03-04 19:40:34 +00001515 def extractall(self, path=".", members=None):
1516 """Extract all members from the archive to the current working
1517 directory and set owner, modification time and permissions on
1518 directories afterwards. `path' specifies a different directory
1519 to extract to. `members' is optional and must be a subset of the
1520 list returned by getmembers().
1521 """
1522 directories = []
1523
1524 if members is None:
1525 members = self
1526
1527 for tarinfo in members:
1528 if tarinfo.isdir():
1529 # Extract directory with a safe mode, so that
1530 # all files below can be extracted as well.
1531 try:
1532 os.makedirs(os.path.join(path, tarinfo.name), 0777)
1533 except EnvironmentError:
1534 pass
1535 directories.append(tarinfo)
1536 else:
1537 self.extract(tarinfo, path)
1538
1539 # Reverse sort directories.
1540 directories.sort(lambda a, b: cmp(a.name, b.name))
1541 directories.reverse()
1542
1543 # Set correct owner, mtime and filemode on directories.
1544 for tarinfo in directories:
1545 path = os.path.join(path, tarinfo.name)
1546 try:
1547 self.chown(tarinfo, path)
1548 self.utime(tarinfo, path)
1549 self.chmod(tarinfo, path)
1550 except ExtractError, e:
1551 if self.errorlevel > 1:
1552 raise
1553 else:
1554 self._dbg(1, "tarfile: %s" % e)
1555
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001556 def extract(self, member, path=""):
1557 """Extract a member from the archive to the current working directory,
1558 using its full name. Its file information is extracted as accurately
1559 as possible. `member' may be a filename or a TarInfo object. You can
1560 specify a different directory using `path'.
1561 """
1562 self._check("r")
1563
1564 if isinstance(member, TarInfo):
1565 tarinfo = member
1566 else:
1567 tarinfo = self.getmember(member)
1568
Neal Norwitza4f651a2004-07-20 22:07:44 +00001569 # Prepare the link target for makelink().
1570 if tarinfo.islnk():
1571 tarinfo._link_target = os.path.join(path, tarinfo.linkname)
1572
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001573 try:
1574 self._extract_member(tarinfo, os.path.join(path, tarinfo.name))
1575 except EnvironmentError, e:
1576 if self.errorlevel > 0:
1577 raise
1578 else:
1579 if e.filename is None:
1580 self._dbg(1, "tarfile: %s" % e.strerror)
1581 else:
1582 self._dbg(1, "tarfile: %s %r" % (e.strerror, e.filename))
1583 except ExtractError, e:
1584 if self.errorlevel > 1:
1585 raise
1586 else:
1587 self._dbg(1, "tarfile: %s" % e)
1588
1589 def extractfile(self, member):
1590 """Extract a member from the archive as a file object. `member' may be
1591 a filename or a TarInfo object. If `member' is a regular file, a
1592 file-like object is returned. If `member' is a link, a file-like
1593 object is constructed from the link's target. If `member' is none of
1594 the above, None is returned.
1595 The file-like object is read-only and provides the following
1596 methods: read(), readline(), readlines(), seek() and tell()
1597 """
1598 self._check("r")
1599
1600 if isinstance(member, TarInfo):
1601 tarinfo = member
1602 else:
1603 tarinfo = self.getmember(member)
1604
1605 if tarinfo.isreg():
1606 return self.fileobject(self, tarinfo)
1607
1608 elif tarinfo.type not in SUPPORTED_TYPES:
1609 # If a member's type is unknown, it is treated as a
1610 # regular file.
1611 return self.fileobject(self, tarinfo)
1612
1613 elif tarinfo.islnk() or tarinfo.issym():
1614 if isinstance(self.fileobj, _Stream):
1615 # A small but ugly workaround for the case that someone tries
1616 # to extract a (sym)link as a file-object from a non-seekable
1617 # stream of tar blocks.
Georg Brandle4751e32006-05-18 06:11:19 +00001618 raise StreamError("cannot extract (sym)link as file object")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001619 else:
Georg Brandl7eb4b7d2005-07-22 21:49:32 +00001620 # A (sym)link's file object is its target's file object.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001621 return self.extractfile(self._getmember(tarinfo.linkname,
1622 tarinfo))
1623 else:
1624 # If there's no data associated with the member (directory, chrdev,
1625 # blkdev, etc.), return None instead of a file object.
1626 return None
1627
1628 def _extract_member(self, tarinfo, targetpath):
1629 """Extract the TarInfo object tarinfo to a physical
1630 file called targetpath.
1631 """
1632 # Fetch the TarInfo object for the given name
1633 # and build the destination pathname, replacing
1634 # forward slashes to platform specific separators.
1635 if targetpath[-1:] == "/":
1636 targetpath = targetpath[:-1]
1637 targetpath = os.path.normpath(targetpath)
1638
1639 # Create all upper directories.
1640 upperdirs = os.path.dirname(targetpath)
1641 if upperdirs and not os.path.exists(upperdirs):
1642 ti = TarInfo()
1643 ti.name = upperdirs
1644 ti.type = DIRTYPE
1645 ti.mode = 0777
1646 ti.mtime = tarinfo.mtime
1647 ti.uid = tarinfo.uid
1648 ti.gid = tarinfo.gid
1649 ti.uname = tarinfo.uname
1650 ti.gname = tarinfo.gname
1651 try:
1652 self._extract_member(ti, ti.name)
1653 except:
1654 pass
1655
1656 if tarinfo.islnk() or tarinfo.issym():
1657 self._dbg(1, "%s -> %s" % (tarinfo.name, tarinfo.linkname))
1658 else:
1659 self._dbg(1, tarinfo.name)
1660
1661 if tarinfo.isreg():
1662 self.makefile(tarinfo, targetpath)
1663 elif tarinfo.isdir():
1664 self.makedir(tarinfo, targetpath)
1665 elif tarinfo.isfifo():
1666 self.makefifo(tarinfo, targetpath)
1667 elif tarinfo.ischr() or tarinfo.isblk():
1668 self.makedev(tarinfo, targetpath)
1669 elif tarinfo.islnk() or tarinfo.issym():
1670 self.makelink(tarinfo, targetpath)
1671 elif tarinfo.type not in SUPPORTED_TYPES:
1672 self.makeunknown(tarinfo, targetpath)
1673 else:
1674 self.makefile(tarinfo, targetpath)
1675
1676 self.chown(tarinfo, targetpath)
1677 if not tarinfo.issym():
1678 self.chmod(tarinfo, targetpath)
1679 self.utime(tarinfo, targetpath)
1680
1681 #--------------------------------------------------------------------------
1682 # Below are the different file methods. They are called via
1683 # _extract_member() when extract() is called. They can be replaced in a
1684 # subclass to implement other functionality.
1685
1686 def makedir(self, tarinfo, targetpath):
1687 """Make a directory called targetpath.
1688 """
1689 try:
1690 os.mkdir(targetpath)
1691 except EnvironmentError, e:
1692 if e.errno != errno.EEXIST:
1693 raise
1694
1695 def makefile(self, tarinfo, targetpath):
1696 """Make a file called targetpath.
1697 """
1698 source = self.extractfile(tarinfo)
1699 target = file(targetpath, "wb")
1700 copyfileobj(source, target)
1701 source.close()
1702 target.close()
1703
1704 def makeunknown(self, tarinfo, targetpath):
1705 """Make a file from a TarInfo object with an unknown type
1706 at targetpath.
1707 """
1708 self.makefile(tarinfo, targetpath)
1709 self._dbg(1, "tarfile: Unknown file type %r, " \
1710 "extracted as regular file." % tarinfo.type)
1711
1712 def makefifo(self, tarinfo, targetpath):
1713 """Make a fifo called targetpath.
1714 """
1715 if hasattr(os, "mkfifo"):
1716 os.mkfifo(targetpath)
1717 else:
Georg Brandle4751e32006-05-18 06:11:19 +00001718 raise ExtractError("fifo not supported by system")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001719
1720 def makedev(self, tarinfo, targetpath):
1721 """Make a character or block device called targetpath.
1722 """
1723 if not hasattr(os, "mknod") or not hasattr(os, "makedev"):
Georg Brandle4751e32006-05-18 06:11:19 +00001724 raise ExtractError("special devices not supported by system")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001725
1726 mode = tarinfo.mode
1727 if tarinfo.isblk():
1728 mode |= stat.S_IFBLK
1729 else:
1730 mode |= stat.S_IFCHR
1731
1732 os.mknod(targetpath, mode,
1733 os.makedev(tarinfo.devmajor, tarinfo.devminor))
1734
1735 def makelink(self, tarinfo, targetpath):
1736 """Make a (symbolic) link called targetpath. If it cannot be created
1737 (platform limitation), we try to make a copy of the referenced file
1738 instead of a link.
1739 """
1740 linkpath = tarinfo.linkname
1741 try:
1742 if tarinfo.issym():
1743 os.symlink(linkpath, targetpath)
1744 else:
Neal Norwitza4f651a2004-07-20 22:07:44 +00001745 # See extract().
1746 os.link(tarinfo._link_target, targetpath)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001747 except AttributeError:
1748 if tarinfo.issym():
1749 linkpath = os.path.join(os.path.dirname(tarinfo.name),
1750 linkpath)
1751 linkpath = normpath(linkpath)
1752
1753 try:
1754 self._extract_member(self.getmember(linkpath), targetpath)
1755 except (EnvironmentError, KeyError), e:
1756 linkpath = os.path.normpath(linkpath)
1757 try:
1758 shutil.copy2(linkpath, targetpath)
1759 except EnvironmentError, e:
Georg Brandle4751e32006-05-18 06:11:19 +00001760 raise IOError("link could not be created")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001761
1762 def chown(self, tarinfo, targetpath):
1763 """Set owner of targetpath according to tarinfo.
1764 """
1765 if pwd and hasattr(os, "geteuid") and os.geteuid() == 0:
1766 # We have to be root to do so.
1767 try:
1768 g = grp.getgrnam(tarinfo.gname)[2]
1769 except KeyError:
1770 try:
1771 g = grp.getgrgid(tarinfo.gid)[2]
1772 except KeyError:
1773 g = os.getgid()
1774 try:
1775 u = pwd.getpwnam(tarinfo.uname)[2]
1776 except KeyError:
1777 try:
1778 u = pwd.getpwuid(tarinfo.uid)[2]
1779 except KeyError:
1780 u = os.getuid()
1781 try:
1782 if tarinfo.issym() and hasattr(os, "lchown"):
1783 os.lchown(targetpath, u, g)
1784 else:
Andrew MacIntyre7970d202003-02-19 12:51:34 +00001785 if sys.platform != "os2emx":
1786 os.chown(targetpath, u, g)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001787 except EnvironmentError, e:
Georg Brandle4751e32006-05-18 06:11:19 +00001788 raise ExtractError("could not change owner")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001789
1790 def chmod(self, tarinfo, targetpath):
1791 """Set file permissions of targetpath according to tarinfo.
1792 """
Jack Jansen834eff62003-03-07 12:47:06 +00001793 if hasattr(os, 'chmod'):
1794 try:
1795 os.chmod(targetpath, tarinfo.mode)
1796 except EnvironmentError, e:
Georg Brandle4751e32006-05-18 06:11:19 +00001797 raise ExtractError("could not change mode")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001798
1799 def utime(self, tarinfo, targetpath):
1800 """Set modification time of targetpath according to tarinfo.
1801 """
Jack Jansen834eff62003-03-07 12:47:06 +00001802 if not hasattr(os, 'utime'):
Tim Petersf9347782003-03-07 15:36:41 +00001803 return
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001804 if sys.platform == "win32" and tarinfo.isdir():
1805 # According to msdn.microsoft.com, it is an error (EACCES)
1806 # to use utime() on directories.
1807 return
1808 try:
1809 os.utime(targetpath, (tarinfo.mtime, tarinfo.mtime))
1810 except EnvironmentError, e:
Georg Brandle4751e32006-05-18 06:11:19 +00001811 raise ExtractError("could not change modification time")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001812
1813 #--------------------------------------------------------------------------
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001814 def next(self):
1815 """Return the next member of the archive as a TarInfo object, when
1816 TarFile is opened for reading. Return None if there is no more
1817 available.
1818 """
1819 self._check("ra")
1820 if self.firstmember is not None:
1821 m = self.firstmember
1822 self.firstmember = None
1823 return m
1824
1825 # Read the next block.
Andrew M. Kuchling864bba12004-07-10 22:02:11 +00001826 self.fileobj.seek(self.offset)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001827 while True:
1828 buf = self.fileobj.read(BLOCKSIZE)
1829 if not buf:
1830 return None
Georg Brandl38c6a222006-05-10 16:26:03 +00001831
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001832 try:
1833 tarinfo = TarInfo.frombuf(buf)
Tim Peters8a299d22006-05-19 19:16:34 +00001834
Georg Brandl38c6a222006-05-10 16:26:03 +00001835 # Set the TarInfo object's offset to the current position of the
1836 # TarFile and set self.offset to the position where the data blocks
1837 # should begin.
1838 tarinfo.offset = self.offset
1839 self.offset += BLOCKSIZE
1840
1841 tarinfo = self.proc_member(tarinfo)
1842
1843 except ValueError, e:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001844 if self.ignore_zeros:
Georg Brandle4751e32006-05-18 06:11:19 +00001845 self._dbg(2, "0x%X: empty or invalid block: %s" %
1846 (self.offset, e))
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001847 self.offset += BLOCKSIZE
1848 continue
1849 else:
Andrew M. Kuchling864bba12004-07-10 22:02:11 +00001850 if self.offset == 0:
Georg Brandle4751e32006-05-18 06:11:19 +00001851 raise ReadError("empty, unreadable or compressed "
1852 "file: %s" % e)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001853 return None
1854 break
1855
Georg Brandl38c6a222006-05-10 16:26:03 +00001856 # Some old tar programs represent a directory as a regular
1857 # file with a trailing slash.
1858 if tarinfo.isreg() and tarinfo.name.endswith("/"):
1859 tarinfo.type = DIRTYPE
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001860
Georg Brandl38c6a222006-05-10 16:26:03 +00001861 # Directory names should have a '/' at the end.
1862 if tarinfo.isdir():
1863 tarinfo.name += "/"
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001864
Martin v. Löwisf3c56112004-09-18 09:08:52 +00001865 self.members.append(tarinfo)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001866 return tarinfo
1867
1868 #--------------------------------------------------------------------------
Georg Brandl38c6a222006-05-10 16:26:03 +00001869 # The following are methods that are called depending on the type of a
1870 # member. The entry point is proc_member() which is called with a TarInfo
1871 # object created from the header block from the current offset. The
1872 # proc_member() method can be overridden in a subclass to add custom
1873 # proc_*() methods. A proc_*() method MUST implement the following
1874 # operations:
1875 # 1. Set tarinfo.offset_data to the position where the data blocks begin,
1876 # if there is data that follows.
1877 # 2. Set self.offset to the position where the next member's header will
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001878 # begin.
Georg Brandl38c6a222006-05-10 16:26:03 +00001879 # 3. Return tarinfo or another valid TarInfo object.
1880 def proc_member(self, tarinfo):
1881 """Choose the right processing method for tarinfo depending
1882 on its type and call it.
1883 """
1884 if tarinfo.type in (GNUTYPE_LONGNAME, GNUTYPE_LONGLINK):
1885 return self.proc_gnulong(tarinfo)
1886 elif tarinfo.type == GNUTYPE_SPARSE:
1887 return self.proc_sparse(tarinfo)
1888 else:
1889 return self.proc_builtin(tarinfo)
1890
1891 def proc_builtin(self, tarinfo):
1892 """Process a builtin type member or an unknown member
1893 which will be treated as a regular file.
1894 """
1895 tarinfo.offset_data = self.offset
1896 if tarinfo.isreg() or tarinfo.type not in SUPPORTED_TYPES:
1897 # Skip the following data blocks.
1898 self.offset += self._block(tarinfo.size)
1899 return tarinfo
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001900
1901 def proc_gnulong(self, tarinfo):
Georg Brandl38c6a222006-05-10 16:26:03 +00001902 """Process the blocks that hold a GNU longname
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001903 or longlink member.
1904 """
1905 buf = ""
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001906 count = tarinfo.size
1907 while count > 0:
1908 block = self.fileobj.read(BLOCKSIZE)
1909 buf += block
1910 self.offset += BLOCKSIZE
1911 count -= BLOCKSIZE
1912
Georg Brandl38c6a222006-05-10 16:26:03 +00001913 # Fetch the next header and process it.
1914 b = self.fileobj.read(BLOCKSIZE)
1915 t = TarInfo.frombuf(b)
1916 t.offset = self.offset
1917 self.offset += BLOCKSIZE
1918 next = self.proc_member(t)
Andrew M. Kuchling864bba12004-07-10 22:02:11 +00001919
Georg Brandl38c6a222006-05-10 16:26:03 +00001920 # Patch the TarInfo object from the next header with
1921 # the longname information.
Andrew M. Kuchling864bba12004-07-10 22:02:11 +00001922 next.offset = tarinfo.offset
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001923 if tarinfo.type == GNUTYPE_LONGNAME:
Georg Brandle8953182006-05-27 14:02:03 +00001924 next.name = buf.rstrip(NUL)
Andrew M. Kuchling864bba12004-07-10 22:02:11 +00001925 elif tarinfo.type == GNUTYPE_LONGLINK:
Georg Brandle8953182006-05-27 14:02:03 +00001926 next.linkname = buf.rstrip(NUL)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001927
Andrew M. Kuchling864bba12004-07-10 22:02:11 +00001928 return next
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001929
1930 def proc_sparse(self, tarinfo):
Georg Brandl38c6a222006-05-10 16:26:03 +00001931 """Process a GNU sparse header plus extra headers.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001932 """
Georg Brandl38c6a222006-05-10 16:26:03 +00001933 buf = tarinfo.buf
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001934 sp = _ringbuffer()
1935 pos = 386
1936 lastpos = 0L
1937 realpos = 0L
1938 # There are 4 possible sparse structs in the
1939 # first header.
1940 for i in xrange(4):
1941 try:
Georg Brandl38c6a222006-05-10 16:26:03 +00001942 offset = nti(buf[pos:pos + 12])
1943 numbytes = nti(buf[pos + 12:pos + 24])
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001944 except ValueError:
1945 break
1946 if offset > lastpos:
1947 sp.append(_hole(lastpos, offset - lastpos))
1948 sp.append(_data(offset, numbytes, realpos))
1949 realpos += numbytes
1950 lastpos = offset + numbytes
1951 pos += 24
1952
1953 isextended = ord(buf[482])
Georg Brandl38c6a222006-05-10 16:26:03 +00001954 origsize = nti(buf[483:495])
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001955
1956 # If the isextended flag is given,
1957 # there are extra headers to process.
1958 while isextended == 1:
1959 buf = self.fileobj.read(BLOCKSIZE)
1960 self.offset += BLOCKSIZE
1961 pos = 0
1962 for i in xrange(21):
1963 try:
Georg Brandl38c6a222006-05-10 16:26:03 +00001964 offset = nti(buf[pos:pos + 12])
1965 numbytes = nti(buf[pos + 12:pos + 24])
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001966 except ValueError:
1967 break
1968 if offset > lastpos:
1969 sp.append(_hole(lastpos, offset - lastpos))
1970 sp.append(_data(offset, numbytes, realpos))
1971 realpos += numbytes
1972 lastpos = offset + numbytes
1973 pos += 24
1974 isextended = ord(buf[504])
1975
1976 if lastpos < origsize:
1977 sp.append(_hole(lastpos, origsize - lastpos))
1978
1979 tarinfo.sparse = sp
1980
1981 tarinfo.offset_data = self.offset
1982 self.offset += self._block(tarinfo.size)
1983 tarinfo.size = origsize
Andrew M. Kuchling864bba12004-07-10 22:02:11 +00001984
Georg Brandl38c6a222006-05-10 16:26:03 +00001985 return tarinfo
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001986
1987 #--------------------------------------------------------------------------
1988 # Little helper methods:
1989
1990 def _block(self, count):
1991 """Round up a byte count by BLOCKSIZE and return it,
1992 e.g. _block(834) => 1024.
1993 """
1994 blocks, remainder = divmod(count, BLOCKSIZE)
1995 if remainder:
1996 blocks += 1
1997 return blocks * BLOCKSIZE
1998
1999 def _getmember(self, name, tarinfo=None):
2000 """Find an archive member by name from bottom to top.
2001 If tarinfo is given, it is used as the starting point.
2002 """
Martin v. Löwisf3c56112004-09-18 09:08:52 +00002003 # Ensure that all members have been loaded.
2004 members = self.getmembers()
2005
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002006 if tarinfo is None:
Martin v. Löwisf3c56112004-09-18 09:08:52 +00002007 end = len(members)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002008 else:
Martin v. Löwisf3c56112004-09-18 09:08:52 +00002009 end = members.index(tarinfo)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002010
2011 for i in xrange(end - 1, -1, -1):
Martin v. Löwisf3c56112004-09-18 09:08:52 +00002012 if name == members[i].name:
2013 return members[i]
Andrew M. Kuchling864bba12004-07-10 22:02:11 +00002014
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002015 def _load(self):
2016 """Read through the entire archive file and look for readable
2017 members.
2018 """
2019 while True:
2020 tarinfo = self.next()
2021 if tarinfo is None:
2022 break
2023 self._loaded = True
2024
2025 def _check(self, mode=None):
2026 """Check if TarFile is still open, and if the operation's mode
2027 corresponds to TarFile's mode.
2028 """
2029 if self.closed:
Georg Brandle4751e32006-05-18 06:11:19 +00002030 raise IOError("%s is closed" % self.__class__.__name__)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002031 if mode is not None and self._mode not in mode:
Georg Brandle4751e32006-05-18 06:11:19 +00002032 raise IOError("bad operation for mode %r" % self._mode)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002033
2034 def __iter__(self):
2035 """Provide an iterator object.
2036 """
2037 if self._loaded:
2038 return iter(self.members)
2039 else:
2040 return TarIter(self)
2041
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002042 def _dbg(self, level, msg):
2043 """Write debugging output to sys.stderr.
2044 """
2045 if level <= self.debug:
2046 print >> sys.stderr, msg
2047# class TarFile
2048
2049class TarIter:
2050 """Iterator Class.
2051
2052 for tarinfo in TarFile(...):
2053 suite...
2054 """
2055
2056 def __init__(self, tarfile):
2057 """Construct a TarIter object.
2058 """
2059 self.tarfile = tarfile
Martin v. Löwis637431b2005-03-03 23:12:42 +00002060 self.index = 0
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002061 def __iter__(self):
2062 """Return iterator object.
2063 """
2064 return self
2065 def next(self):
2066 """Return the next item using TarFile's next() method.
2067 When all members have been read, set TarFile as _loaded.
2068 """
Martin v. Löwis637431b2005-03-03 23:12:42 +00002069 # Fix for SF #1100429: Under rare circumstances it can
2070 # happen that getmembers() is called during iteration,
2071 # which will cause TarIter to stop prematurely.
2072 if not self.tarfile._loaded:
2073 tarinfo = self.tarfile.next()
2074 if not tarinfo:
2075 self.tarfile._loaded = True
2076 raise StopIteration
2077 else:
2078 try:
2079 tarinfo = self.tarfile.members[self.index]
2080 except IndexError:
2081 raise StopIteration
2082 self.index += 1
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002083 return tarinfo
2084
2085# Helper classes for sparse file support
2086class _section:
2087 """Base class for _data and _hole.
2088 """
2089 def __init__(self, offset, size):
2090 self.offset = offset
2091 self.size = size
2092 def __contains__(self, offset):
2093 return self.offset <= offset < self.offset + self.size
2094
2095class _data(_section):
2096 """Represent a data section in a sparse file.
2097 """
2098 def __init__(self, offset, size, realpos):
2099 _section.__init__(self, offset, size)
2100 self.realpos = realpos
2101
2102class _hole(_section):
2103 """Represent a hole section in a sparse file.
2104 """
2105 pass
2106
2107class _ringbuffer(list):
2108 """Ringbuffer class which increases performance
2109 over a regular list.
2110 """
2111 def __init__(self):
2112 self.idx = 0
2113 def find(self, offset):
2114 idx = self.idx
2115 while True:
2116 item = self[idx]
2117 if offset in item:
2118 break
2119 idx += 1
2120 if idx == len(self):
2121 idx = 0
2122 if idx == self.idx:
2123 # End of File
2124 return None
2125 self.idx = idx
2126 return item
2127
2128#---------------------------------------------
2129# zipfile compatible TarFile class
2130#---------------------------------------------
2131TAR_PLAIN = 0 # zipfile.ZIP_STORED
2132TAR_GZIPPED = 8 # zipfile.ZIP_DEFLATED
2133class TarFileCompat:
2134 """TarFile class compatible with standard module zipfile's
2135 ZipFile class.
2136 """
2137 def __init__(self, file, mode="r", compression=TAR_PLAIN):
2138 if compression == TAR_PLAIN:
2139 self.tarfile = TarFile.taropen(file, mode)
2140 elif compression == TAR_GZIPPED:
2141 self.tarfile = TarFile.gzopen(file, mode)
2142 else:
Georg Brandle4751e32006-05-18 06:11:19 +00002143 raise ValueError("unknown compression constant")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002144 if mode[0:1] == "r":
2145 members = self.tarfile.getmembers()
Raymond Hettingera1d09e22005-09-11 16:34:05 +00002146 for m in members:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002147 m.filename = m.name
2148 m.file_size = m.size
2149 m.date_time = time.gmtime(m.mtime)[:6]
2150 def namelist(self):
2151 return map(lambda m: m.name, self.infolist())
2152 def infolist(self):
2153 return filter(lambda m: m.type in REGULAR_TYPES,
2154 self.tarfile.getmembers())
2155 def printdir(self):
2156 self.tarfile.list()
2157 def testzip(self):
2158 return
2159 def getinfo(self, name):
2160 return self.tarfile.getmember(name)
2161 def read(self, name):
2162 return self.tarfile.extractfile(self.tarfile.getmember(name)).read()
2163 def write(self, filename, arcname=None, compress_type=None):
2164 self.tarfile.add(filename, arcname)
2165 def writestr(self, zinfo, bytes):
Raymond Hettingera6172712004-12-31 19:15:26 +00002166 try:
2167 from cStringIO import StringIO
2168 except ImportError:
2169 from StringIO import StringIO
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002170 import calendar
2171 zinfo.name = zinfo.filename
2172 zinfo.size = zinfo.file_size
2173 zinfo.mtime = calendar.timegm(zinfo.date_time)
Raymond Hettingera6172712004-12-31 19:15:26 +00002174 self.tarfile.addfile(zinfo, StringIO(bytes))
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002175 def close(self):
2176 self.tarfile.close()
2177#class TarFileCompat
2178
2179#--------------------
2180# exported functions
2181#--------------------
2182def is_tarfile(name):
2183 """Return True if name points to a tar archive that we
2184 are able to handle, else return False.
2185 """
2186 try:
2187 t = open(name)
2188 t.close()
2189 return True
2190 except TarError:
2191 return False
2192
2193open = TarFile.open