blob: 29e12f021674edb3d038cc89e588291942ce9d70 [file] [log] [blame]
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001#!/usr/bin/env python
2# -*- coding: iso-8859-1 -*-
3#-------------------------------------------------------------------
4# tarfile.py
5#-------------------------------------------------------------------
6# Copyright (C) 2002 Lars Gustäbel <lars@gustaebel.de>
7# All rights reserved.
8#
9# Permission is hereby granted, free of charge, to any person
10# obtaining a copy of this software and associated documentation
11# files (the "Software"), to deal in the Software without
12# restriction, including without limitation the rights to use,
13# copy, modify, merge, publish, distribute, sublicense, and/or sell
14# copies of the Software, and to permit persons to whom the
15# Software is furnished to do so, subject to the following
16# conditions:
17#
18# The above copyright notice and this permission notice shall be
19# included in all copies or substantial portions of the Software.
20#
21# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
22# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
23# OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
24# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
25# HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
26# WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
27# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
28# OTHER DEALINGS IN THE SOFTWARE.
29#
30"""Read from and write to tar format archives.
31"""
32
33__version__ = "$Revision$"
34# $Source$
35
Lars Gustäbelc64e4022007-03-13 10:47:19 +000036version = "0.9.0"
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +000037__author__ = "Lars Gustäbel (lars@gustaebel.de)"
38__date__ = "$Date$"
39__cvsid__ = "$Id$"
40__credits__ = "Gustavo Niemeyer, Niels Gustäbel, Richard Townsend."
41
42#---------
43# Imports
44#---------
45import sys
46import os
47import shutil
48import stat
49import errno
50import time
51import struct
Georg Brandl3354f282006-10-29 09:16:12 +000052import copy
Lars Gustäbelc64e4022007-03-13 10:47:19 +000053import re
Brett Cannon132fc542008-08-04 21:23:07 +000054import operator
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +000055
Jack Jansencfc49022003-03-07 13:37:32 +000056if sys.platform == 'mac':
57 # This module needs work for MacOS9, especially in the area of pathname
58 # handling. In many places it is assumed a simple substitution of / by the
59 # local os.path.sep is good enough to convert pathnames, but this does not
60 # work with the mac rooted:path:name versus :nonrooted:path:name syntax
61 raise ImportError, "tarfile does not work for platform==mac"
62
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +000063try:
64 import grp, pwd
65except ImportError:
66 grp = pwd = None
67
68# from tarfile import *
69__all__ = ["TarFile", "TarInfo", "is_tarfile", "TarError"]
70
71#---------------------------------------------------------
72# tar constants
73#---------------------------------------------------------
Lars Gustäbelc64e4022007-03-13 10:47:19 +000074NUL = "\0" # the null character
75BLOCKSIZE = 512 # length of processing blocks
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +000076RECORDSIZE = BLOCKSIZE * 20 # length of records
Lars Gustäbelc64e4022007-03-13 10:47:19 +000077GNU_MAGIC = "ustar \0" # magic gnu tar string
78POSIX_MAGIC = "ustar\x0000" # magic posix tar string
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +000079
Lars Gustäbelc64e4022007-03-13 10:47:19 +000080LENGTH_NAME = 100 # maximum length of a filename
81LENGTH_LINK = 100 # maximum length of a linkname
82LENGTH_PREFIX = 155 # maximum length of the prefix field
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +000083
Lars Gustäbelc64e4022007-03-13 10:47:19 +000084REGTYPE = "0" # regular file
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +000085AREGTYPE = "\0" # regular file
Lars Gustäbelc64e4022007-03-13 10:47:19 +000086LNKTYPE = "1" # link (inside tarfile)
87SYMTYPE = "2" # symbolic link
88CHRTYPE = "3" # character special device
89BLKTYPE = "4" # block special device
90DIRTYPE = "5" # directory
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +000091FIFOTYPE = "6" # fifo special device
92CONTTYPE = "7" # contiguous file
93
Lars Gustäbelc64e4022007-03-13 10:47:19 +000094GNUTYPE_LONGNAME = "L" # GNU tar longname
95GNUTYPE_LONGLINK = "K" # GNU tar longlink
96GNUTYPE_SPARSE = "S" # GNU tar sparse file
97
98XHDTYPE = "x" # POSIX.1-2001 extended header
99XGLTYPE = "g" # POSIX.1-2001 global header
100SOLARIS_XHDTYPE = "X" # Solaris extended header
101
102USTAR_FORMAT = 0 # POSIX.1-1988 (ustar) format
103GNU_FORMAT = 1 # GNU tar format
104PAX_FORMAT = 2 # POSIX.1-2001 (pax) format
105DEFAULT_FORMAT = GNU_FORMAT
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000106
107#---------------------------------------------------------
108# tarfile constants
109#---------------------------------------------------------
Lars Gustäbelc64e4022007-03-13 10:47:19 +0000110# File types that tarfile supports:
111SUPPORTED_TYPES = (REGTYPE, AREGTYPE, LNKTYPE,
112 SYMTYPE, DIRTYPE, FIFOTYPE,
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000113 CONTTYPE, CHRTYPE, BLKTYPE,
114 GNUTYPE_LONGNAME, GNUTYPE_LONGLINK,
115 GNUTYPE_SPARSE)
116
Lars Gustäbelc64e4022007-03-13 10:47:19 +0000117# File types that will be treated as a regular file.
118REGULAR_TYPES = (REGTYPE, AREGTYPE,
119 CONTTYPE, GNUTYPE_SPARSE)
120
121# File types that are part of the GNU tar format.
122GNU_TYPES = (GNUTYPE_LONGNAME, GNUTYPE_LONGLINK,
123 GNUTYPE_SPARSE)
124
125# Fields from a pax header that override a TarInfo attribute.
126PAX_FIELDS = ("path", "linkpath", "size", "mtime",
127 "uid", "gid", "uname", "gname")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000128
Lars Gustäbela0fcb932007-05-27 19:49:30 +0000129# Fields in a pax header that are numbers, all other fields
130# are treated as strings.
131PAX_NUMBER_FIELDS = {
132 "atime": float,
133 "ctime": float,
134 "mtime": float,
135 "uid": int,
136 "gid": int,
137 "size": int
138}
139
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000140#---------------------------------------------------------
141# Bits used in the mode field, values in octal.
142#---------------------------------------------------------
143S_IFLNK = 0120000 # symbolic link
144S_IFREG = 0100000 # regular file
145S_IFBLK = 0060000 # block device
146S_IFDIR = 0040000 # directory
147S_IFCHR = 0020000 # character device
148S_IFIFO = 0010000 # fifo
149
150TSUID = 04000 # set UID on execution
151TSGID = 02000 # set GID on execution
152TSVTX = 01000 # reserved
153
154TUREAD = 0400 # read by owner
155TUWRITE = 0200 # write by owner
156TUEXEC = 0100 # execute/search by owner
157TGREAD = 0040 # read by group
158TGWRITE = 0020 # write by group
159TGEXEC = 0010 # execute/search by group
160TOREAD = 0004 # read by other
161TOWRITE = 0002 # write by other
162TOEXEC = 0001 # execute/search by other
163
164#---------------------------------------------------------
Lars Gustäbelc64e4022007-03-13 10:47:19 +0000165# initialization
166#---------------------------------------------------------
167ENCODING = sys.getfilesystemencoding()
168if ENCODING is None:
Lars Gustäbela0fcb932007-05-27 19:49:30 +0000169 ENCODING = sys.getdefaultencoding()
Lars Gustäbelc64e4022007-03-13 10:47:19 +0000170
171#---------------------------------------------------------
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000172# Some useful functions
173#---------------------------------------------------------
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000174
Georg Brandl38c6a222006-05-10 16:26:03 +0000175def stn(s, length):
176 """Convert a python string to a null-terminated string buffer.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000177 """
Georg Brandla32e0a02006-10-24 16:54:16 +0000178 return s[:length] + (length - len(s)) * NUL
Georg Brandl38c6a222006-05-10 16:26:03 +0000179
Lars Gustäbelc64e4022007-03-13 10:47:19 +0000180def nts(s):
181 """Convert a null-terminated string field to a python string.
182 """
183 # Use the string up to the first null char.
184 p = s.find("\0")
185 if p == -1:
186 return s
187 return s[:p]
188
Georg Brandl38c6a222006-05-10 16:26:03 +0000189def nti(s):
190 """Convert a number field to a python number.
191 """
192 # There are two possible encodings for a number field, see
193 # itn() below.
194 if s[0] != chr(0200):
Georg Brandlded1c4d2006-12-20 11:55:16 +0000195 try:
Lars Gustäbelc64e4022007-03-13 10:47:19 +0000196 n = int(nts(s) or "0", 8)
Georg Brandlded1c4d2006-12-20 11:55:16 +0000197 except ValueError:
198 raise HeaderError("invalid header")
Georg Brandl38c6a222006-05-10 16:26:03 +0000199 else:
200 n = 0L
201 for i in xrange(len(s) - 1):
202 n <<= 8
203 n += ord(s[i + 1])
204 return n
205
Lars Gustäbelc64e4022007-03-13 10:47:19 +0000206def itn(n, digits=8, format=DEFAULT_FORMAT):
Georg Brandl38c6a222006-05-10 16:26:03 +0000207 """Convert a python number to a number field.
208 """
209 # POSIX 1003.1-1988 requires numbers to be encoded as a string of
210 # octal digits followed by a null-byte, this allows values up to
211 # (8**(digits-1))-1. GNU tar allows storing numbers greater than
212 # that if necessary. A leading 0200 byte indicates this particular
213 # encoding, the following digits-1 bytes are a big-endian
214 # representation. This allows values up to (256**(digits-1))-1.
215 if 0 <= n < 8 ** (digits - 1):
216 s = "%0*o" % (digits - 1, n) + NUL
217 else:
Lars Gustäbelc64e4022007-03-13 10:47:19 +0000218 if format != GNU_FORMAT or n >= 256 ** (digits - 1):
Georg Brandle4751e32006-05-18 06:11:19 +0000219 raise ValueError("overflow in number field")
Georg Brandl38c6a222006-05-10 16:26:03 +0000220
221 if n < 0:
222 # XXX We mimic GNU tar's behaviour with negative numbers,
223 # this could raise OverflowError.
224 n = struct.unpack("L", struct.pack("l", n))[0]
225
226 s = ""
227 for i in xrange(digits - 1):
228 s = chr(n & 0377) + s
229 n >>= 8
230 s = chr(0200) + s
231 return s
232
Lars Gustäbela0fcb932007-05-27 19:49:30 +0000233def uts(s, encoding, errors):
234 """Convert a unicode object to a string.
235 """
236 if errors == "utf-8":
237 # An extra error handler similar to the -o invalid=UTF-8 option
238 # in POSIX.1-2001. Replace untranslatable characters with their
239 # UTF-8 representation.
240 try:
241 return s.encode(encoding, "strict")
242 except UnicodeEncodeError:
243 x = []
244 for c in s:
245 try:
246 x.append(c.encode(encoding, "strict"))
247 except UnicodeEncodeError:
248 x.append(c.encode("utf8"))
249 return "".join(x)
250 else:
251 return s.encode(encoding, errors)
252
Georg Brandl38c6a222006-05-10 16:26:03 +0000253def calc_chksums(buf):
254 """Calculate the checksum for a member's header by summing up all
255 characters except for the chksum field which is treated as if
256 it was filled with spaces. According to the GNU tar sources,
257 some tars (Sun and NeXT) calculate chksum with signed char,
258 which will be different if there are chars in the buffer with
259 the high bit set. So we calculate two checksums, unsigned and
260 signed.
261 """
262 unsigned_chksum = 256 + sum(struct.unpack("148B", buf[:148]) + struct.unpack("356B", buf[156:512]))
263 signed_chksum = 256 + sum(struct.unpack("148b", buf[:148]) + struct.unpack("356b", buf[156:512]))
264 return unsigned_chksum, signed_chksum
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000265
266def copyfileobj(src, dst, length=None):
267 """Copy length bytes from fileobj src to fileobj dst.
268 If length is None, copy the entire content.
269 """
270 if length == 0:
271 return
272 if length is None:
273 shutil.copyfileobj(src, dst)
274 return
275
276 BUFSIZE = 16 * 1024
277 blocks, remainder = divmod(length, BUFSIZE)
278 for b in xrange(blocks):
279 buf = src.read(BUFSIZE)
280 if len(buf) < BUFSIZE:
Georg Brandle4751e32006-05-18 06:11:19 +0000281 raise IOError("end of file reached")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000282 dst.write(buf)
283
284 if remainder != 0:
285 buf = src.read(remainder)
286 if len(buf) < remainder:
Georg Brandle4751e32006-05-18 06:11:19 +0000287 raise IOError("end of file reached")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000288 dst.write(buf)
289 return
290
291filemode_table = (
Andrew M. Kuchling8bc462f2004-10-20 11:48:42 +0000292 ((S_IFLNK, "l"),
293 (S_IFREG, "-"),
294 (S_IFBLK, "b"),
295 (S_IFDIR, "d"),
296 (S_IFCHR, "c"),
297 (S_IFIFO, "p")),
298
299 ((TUREAD, "r"),),
300 ((TUWRITE, "w"),),
301 ((TUEXEC|TSUID, "s"),
302 (TSUID, "S"),
303 (TUEXEC, "x")),
304
305 ((TGREAD, "r"),),
306 ((TGWRITE, "w"),),
307 ((TGEXEC|TSGID, "s"),
308 (TSGID, "S"),
309 (TGEXEC, "x")),
310
311 ((TOREAD, "r"),),
312 ((TOWRITE, "w"),),
313 ((TOEXEC|TSVTX, "t"),
314 (TSVTX, "T"),
315 (TOEXEC, "x"))
316)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000317
318def filemode(mode):
319 """Convert a file's mode to a string of the form
320 -rwxrwxrwx.
321 Used by TarFile.list()
322 """
Andrew M. Kuchling8bc462f2004-10-20 11:48:42 +0000323 perm = []
324 for table in filemode_table:
325 for bit, char in table:
326 if mode & bit == bit:
327 perm.append(char)
328 break
329 else:
330 perm.append("-")
331 return "".join(perm)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000332
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000333class TarError(Exception):
334 """Base exception."""
335 pass
336class ExtractError(TarError):
337 """General exception for extract errors."""
338 pass
339class ReadError(TarError):
340 """Exception for unreadble tar archives."""
341 pass
342class CompressionError(TarError):
343 """Exception for unavailable compression methods."""
344 pass
345class StreamError(TarError):
346 """Exception for unsupported operations on stream-like TarFiles."""
347 pass
Georg Brandlebbeed72006-12-19 22:06:46 +0000348class HeaderError(TarError):
349 """Exception for invalid headers."""
350 pass
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000351
352#---------------------------
353# internal stream interface
354#---------------------------
355class _LowLevelFile:
356 """Low-level file object. Supports reading and writing.
357 It is used instead of a regular file object for streaming
358 access.
359 """
360
361 def __init__(self, name, mode):
362 mode = {
363 "r": os.O_RDONLY,
364 "w": os.O_WRONLY | os.O_CREAT | os.O_TRUNC,
365 }[mode]
366 if hasattr(os, "O_BINARY"):
367 mode |= os.O_BINARY
368 self.fd = os.open(name, mode)
369
370 def close(self):
371 os.close(self.fd)
372
373 def read(self, size):
374 return os.read(self.fd, size)
375
376 def write(self, s):
377 os.write(self.fd, s)
378
379class _Stream:
380 """Class that serves as an adapter between TarFile and
381 a stream-like object. The stream-like object only
382 needs to have a read() or write() method and is accessed
383 blockwise. Use of gzip or bzip2 compression is possible.
384 A stream-like object could be for example: sys.stdin,
385 sys.stdout, a socket, a tape device etc.
386
387 _Stream is intended to be used only internally.
388 """
389
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000390 def __init__(self, name, mode, comptype, fileobj, bufsize):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000391 """Construct a _Stream object.
392 """
393 self._extfileobj = True
394 if fileobj is None:
395 fileobj = _LowLevelFile(name, mode)
396 self._extfileobj = False
397
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000398 if comptype == '*':
399 # Enable transparent compression detection for the
400 # stream interface
401 fileobj = _StreamProxy(fileobj)
402 comptype = fileobj.getcomptype()
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000403
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000404 self.name = name or ""
405 self.mode = mode
406 self.comptype = comptype
407 self.fileobj = fileobj
408 self.bufsize = bufsize
409 self.buf = ""
410 self.pos = 0L
411 self.closed = False
412
413 if comptype == "gz":
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000414 try:
415 import zlib
416 except ImportError:
Georg Brandle4751e32006-05-18 06:11:19 +0000417 raise CompressionError("zlib module is not available")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000418 self.zlib = zlib
Gregory P. Smith88440962008-03-25 06:12:45 +0000419 self.crc = zlib.crc32("") & 0xffffffffL
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000420 if mode == "r":
421 self._init_read_gz()
422 else:
423 self._init_write_gz()
424
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000425 if comptype == "bz2":
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000426 try:
427 import bz2
428 except ImportError:
Georg Brandle4751e32006-05-18 06:11:19 +0000429 raise CompressionError("bz2 module is not available")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000430 if mode == "r":
431 self.dbuf = ""
432 self.cmp = bz2.BZ2Decompressor()
433 else:
434 self.cmp = bz2.BZ2Compressor()
435
436 def __del__(self):
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000437 if hasattr(self, "closed") and not self.closed:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000438 self.close()
439
440 def _init_write_gz(self):
441 """Initialize for writing with gzip compression.
442 """
443 self.cmp = self.zlib.compressobj(9, self.zlib.DEFLATED,
444 -self.zlib.MAX_WBITS,
445 self.zlib.DEF_MEM_LEVEL,
446 0)
447 timestamp = struct.pack("<L", long(time.time()))
448 self.__write("\037\213\010\010%s\002\377" % timestamp)
449 if self.name.endswith(".gz"):
450 self.name = self.name[:-3]
451 self.__write(self.name + NUL)
452
453 def write(self, s):
454 """Write string s to the stream.
455 """
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000456 if self.comptype == "gz":
Gregory P. Smith88440962008-03-25 06:12:45 +0000457 self.crc = self.zlib.crc32(s, self.crc) & 0xffffffffL
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000458 self.pos += len(s)
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000459 if self.comptype != "tar":
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000460 s = self.cmp.compress(s)
461 self.__write(s)
462
463 def __write(self, s):
464 """Write string s to the stream if a whole new block
465 is ready to be written.
466 """
467 self.buf += s
468 while len(self.buf) > self.bufsize:
469 self.fileobj.write(self.buf[:self.bufsize])
470 self.buf = self.buf[self.bufsize:]
471
472 def close(self):
473 """Close the _Stream object. No operation should be
474 done on it afterwards.
475 """
476 if self.closed:
477 return
478
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000479 if self.mode == "w" and self.comptype != "tar":
Martin v. Löwisc234a522004-08-22 21:28:33 +0000480 self.buf += self.cmp.flush()
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000481
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000482 if self.mode == "w" and self.buf:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000483 self.fileobj.write(self.buf)
484 self.buf = ""
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000485 if self.comptype == "gz":
Tim Petersa05f6e22006-08-02 05:20:08 +0000486 # The native zlib crc is an unsigned 32-bit integer, but
487 # the Python wrapper implicitly casts that to a signed C
488 # long. So, on a 32-bit box self.crc may "look negative",
489 # while the same crc on a 64-bit box may "look positive".
490 # To avoid irksome warnings from the `struct` module, force
491 # it to look positive on all boxes.
492 self.fileobj.write(struct.pack("<L", self.crc & 0xffffffffL))
Andrew M. Kuchling10a44492003-10-24 17:38:34 +0000493 self.fileobj.write(struct.pack("<L", self.pos & 0xffffFFFFL))
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000494
495 if not self._extfileobj:
496 self.fileobj.close()
497
498 self.closed = True
499
500 def _init_read_gz(self):
501 """Initialize for reading a gzip compressed fileobj.
502 """
503 self.cmp = self.zlib.decompressobj(-self.zlib.MAX_WBITS)
504 self.dbuf = ""
505
506 # taken from gzip.GzipFile with some alterations
507 if self.__read(2) != "\037\213":
Georg Brandle4751e32006-05-18 06:11:19 +0000508 raise ReadError("not a gzip file")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000509 if self.__read(1) != "\010":
Georg Brandle4751e32006-05-18 06:11:19 +0000510 raise CompressionError("unsupported compression method")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000511
512 flag = ord(self.__read(1))
513 self.__read(6)
514
515 if flag & 4:
516 xlen = ord(self.__read(1)) + 256 * ord(self.__read(1))
517 self.read(xlen)
518 if flag & 8:
519 while True:
520 s = self.__read(1)
521 if not s or s == NUL:
522 break
523 if flag & 16:
524 while True:
525 s = self.__read(1)
526 if not s or s == NUL:
527 break
528 if flag & 2:
529 self.__read(2)
530
531 def tell(self):
532 """Return the stream's file pointer position.
533 """
534 return self.pos
535
536 def seek(self, pos=0):
537 """Set the stream's file pointer to pos. Negative seeking
538 is forbidden.
539 """
540 if pos - self.pos >= 0:
541 blocks, remainder = divmod(pos - self.pos, self.bufsize)
542 for i in xrange(blocks):
543 self.read(self.bufsize)
544 self.read(remainder)
545 else:
Georg Brandle4751e32006-05-18 06:11:19 +0000546 raise StreamError("seeking backwards is not allowed")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000547 return self.pos
548
549 def read(self, size=None):
550 """Return the next size number of bytes from the stream.
551 If size is not defined, return all bytes of the stream
552 up to EOF.
553 """
554 if size is None:
555 t = []
556 while True:
557 buf = self._read(self.bufsize)
558 if not buf:
559 break
560 t.append(buf)
561 buf = "".join(t)
562 else:
563 buf = self._read(size)
564 self.pos += len(buf)
565 return buf
566
567 def _read(self, size):
568 """Return size bytes from the stream.
569 """
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000570 if self.comptype == "tar":
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000571 return self.__read(size)
572
573 c = len(self.dbuf)
574 t = [self.dbuf]
575 while c < size:
576 buf = self.__read(self.bufsize)
577 if not buf:
578 break
Lars Gustäbelc64e4022007-03-13 10:47:19 +0000579 try:
580 buf = self.cmp.decompress(buf)
581 except IOError:
582 raise ReadError("invalid compressed data")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000583 t.append(buf)
584 c += len(buf)
585 t = "".join(t)
586 self.dbuf = t[size:]
587 return t[:size]
588
589 def __read(self, size):
590 """Return size bytes from stream. If internal buffer is empty,
591 read another block from the stream.
592 """
593 c = len(self.buf)
594 t = [self.buf]
595 while c < size:
596 buf = self.fileobj.read(self.bufsize)
597 if not buf:
598 break
599 t.append(buf)
600 c += len(buf)
601 t = "".join(t)
602 self.buf = t[size:]
603 return t[:size]
604# class _Stream
605
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000606class _StreamProxy(object):
607 """Small proxy class that enables transparent compression
608 detection for the Stream interface (mode 'r|*').
609 """
610
611 def __init__(self, fileobj):
612 self.fileobj = fileobj
613 self.buf = self.fileobj.read(BLOCKSIZE)
614
615 def read(self, size):
616 self.read = self.fileobj.read
617 return self.buf
618
619 def getcomptype(self):
620 if self.buf.startswith("\037\213\010"):
621 return "gz"
622 if self.buf.startswith("BZh91"):
623 return "bz2"
624 return "tar"
625
626 def close(self):
627 self.fileobj.close()
628# class StreamProxy
629
Georg Brandl49c8f4c2006-05-15 19:30:35 +0000630class _BZ2Proxy(object):
631 """Small proxy class that enables external file object
632 support for "r:bz2" and "w:bz2" modes. This is actually
633 a workaround for a limitation in bz2 module's BZ2File
634 class which (unlike gzip.GzipFile) has no support for
635 a file object argument.
636 """
637
638 blocksize = 16 * 1024
639
640 def __init__(self, fileobj, mode):
641 self.fileobj = fileobj
642 self.mode = mode
Lars Gustäbelc64e4022007-03-13 10:47:19 +0000643 self.name = getattr(self.fileobj, "name", None)
Georg Brandl49c8f4c2006-05-15 19:30:35 +0000644 self.init()
645
646 def init(self):
647 import bz2
648 self.pos = 0
649 if self.mode == "r":
650 self.bz2obj = bz2.BZ2Decompressor()
651 self.fileobj.seek(0)
652 self.buf = ""
653 else:
654 self.bz2obj = bz2.BZ2Compressor()
655
656 def read(self, size):
657 b = [self.buf]
658 x = len(self.buf)
659 while x < size:
Lars Gustäbel2020a592009-03-22 20:09:33 +0000660 raw = self.fileobj.read(self.blocksize)
661 if not raw:
Georg Brandl49c8f4c2006-05-15 19:30:35 +0000662 break
Lars Gustäbel2020a592009-03-22 20:09:33 +0000663 data = self.bz2obj.decompress(raw)
664 b.append(data)
Georg Brandl49c8f4c2006-05-15 19:30:35 +0000665 x += len(data)
666 self.buf = "".join(b)
667
668 buf = self.buf[:size]
669 self.buf = self.buf[size:]
670 self.pos += len(buf)
671 return buf
672
673 def seek(self, pos):
674 if pos < self.pos:
675 self.init()
676 self.read(pos - self.pos)
677
678 def tell(self):
679 return self.pos
680
681 def write(self, data):
682 self.pos += len(data)
683 raw = self.bz2obj.compress(data)
684 self.fileobj.write(raw)
685
686 def close(self):
687 if self.mode == "w":
688 raw = self.bz2obj.flush()
689 self.fileobj.write(raw)
Georg Brandl49c8f4c2006-05-15 19:30:35 +0000690# class _BZ2Proxy
691
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000692#------------------------
693# Extraction file object
694#------------------------
Lars Gustäbel6baa5022006-12-23 16:40:13 +0000695class _FileInFile(object):
696 """A thin wrapper around an existing file object that
697 provides a part of its data as an individual file
698 object.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000699 """
700
Lars Gustäbel6baa5022006-12-23 16:40:13 +0000701 def __init__(self, fileobj, offset, size, sparse=None):
702 self.fileobj = fileobj
703 self.offset = offset
704 self.size = size
705 self.sparse = sparse
706 self.position = 0
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000707
Lars Gustäbel6baa5022006-12-23 16:40:13 +0000708 def tell(self):
709 """Return the current file position.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000710 """
Lars Gustäbel6baa5022006-12-23 16:40:13 +0000711 return self.position
712
713 def seek(self, position):
714 """Seek to a position in the file.
715 """
716 self.position = position
717
718 def read(self, size=None):
719 """Read data from the file.
720 """
721 if size is None:
722 size = self.size - self.position
723 else:
724 size = min(size, self.size - self.position)
725
726 if self.sparse is None:
727 return self.readnormal(size)
728 else:
729 return self.readsparse(size)
730
731 def readnormal(self, size):
732 """Read operation for regular files.
733 """
734 self.fileobj.seek(self.offset + self.position)
735 self.position += size
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000736 return self.fileobj.read(size)
737
Lars Gustäbel6baa5022006-12-23 16:40:13 +0000738 def readsparse(self, size):
739 """Read operation for sparse files.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000740 """
Lars Gustäbel6baa5022006-12-23 16:40:13 +0000741 data = []
742 while size > 0:
743 buf = self.readsparsesection(size)
744 if not buf:
745 break
746 size -= len(buf)
747 data.append(buf)
748 return "".join(data)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000749
Lars Gustäbel6baa5022006-12-23 16:40:13 +0000750 def readsparsesection(self, size):
751 """Read a single section of a sparse file.
752 """
753 section = self.sparse.find(self.position)
754
755 if section is None:
756 return ""
757
758 size = min(size, section.offset + section.size - self.position)
759
760 if isinstance(section, _data):
761 realpos = section.realpos + self.position - section.offset
762 self.fileobj.seek(self.offset + realpos)
763 self.position += size
764 return self.fileobj.read(size)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000765 else:
Lars Gustäbel6baa5022006-12-23 16:40:13 +0000766 self.position += size
767 return NUL * size
768#class _FileInFile
769
770
771class ExFileObject(object):
772 """File-like object for reading an archive member.
773 Is returned by TarFile.extractfile().
774 """
775 blocksize = 1024
776
777 def __init__(self, tarfile, tarinfo):
778 self.fileobj = _FileInFile(tarfile.fileobj,
779 tarinfo.offset_data,
780 tarinfo.size,
781 getattr(tarinfo, "sparse", None))
782 self.name = tarinfo.name
783 self.mode = "r"
784 self.closed = False
785 self.size = tarinfo.size
786
787 self.position = 0
788 self.buffer = ""
789
790 def read(self, size=None):
791 """Read at most size bytes from the file. If size is not
792 present or None, read all data until EOF is reached.
793 """
794 if self.closed:
795 raise ValueError("I/O operation on closed file")
796
797 buf = ""
798 if self.buffer:
799 if size is None:
800 buf = self.buffer
801 self.buffer = ""
802 else:
803 buf = self.buffer[:size]
804 self.buffer = self.buffer[size:]
805
806 if size is None:
807 buf += self.fileobj.read()
808 else:
809 buf += self.fileobj.read(size - len(buf))
810
811 self.position += len(buf)
812 return buf
813
814 def readline(self, size=-1):
815 """Read one entire line from the file. If size is present
816 and non-negative, return a string with at most that
817 size, which may be an incomplete line.
818 """
819 if self.closed:
820 raise ValueError("I/O operation on closed file")
821
822 if "\n" in self.buffer:
823 pos = self.buffer.find("\n") + 1
824 else:
825 buffers = [self.buffer]
826 while True:
827 buf = self.fileobj.read(self.blocksize)
828 buffers.append(buf)
829 if not buf or "\n" in buf:
830 self.buffer = "".join(buffers)
831 pos = self.buffer.find("\n") + 1
832 if pos == 0:
833 # no newline found.
834 pos = len(self.buffer)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000835 break
Lars Gustäbel6baa5022006-12-23 16:40:13 +0000836
837 if size != -1:
838 pos = min(size, pos)
839
840 buf = self.buffer[:pos]
841 self.buffer = self.buffer[pos:]
842 self.position += len(buf)
843 return buf
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000844
845 def readlines(self):
Lars Gustäbel6baa5022006-12-23 16:40:13 +0000846 """Return a list with all remaining lines.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000847 """
848 result = []
849 while True:
850 line = self.readline()
851 if not line: break
852 result.append(line)
853 return result
854
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000855 def tell(self):
856 """Return the current file position.
857 """
Lars Gustäbel6baa5022006-12-23 16:40:13 +0000858 if self.closed:
859 raise ValueError("I/O operation on closed file")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000860
Lars Gustäbel6baa5022006-12-23 16:40:13 +0000861 return self.position
862
863 def seek(self, pos, whence=os.SEEK_SET):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000864 """Seek to a position in the file.
865 """
Lars Gustäbel6baa5022006-12-23 16:40:13 +0000866 if self.closed:
867 raise ValueError("I/O operation on closed file")
868
869 if whence == os.SEEK_SET:
870 self.position = min(max(pos, 0), self.size)
871 elif whence == os.SEEK_CUR:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000872 if pos < 0:
Lars Gustäbel6baa5022006-12-23 16:40:13 +0000873 self.position = max(self.position + pos, 0)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000874 else:
Lars Gustäbel6baa5022006-12-23 16:40:13 +0000875 self.position = min(self.position + pos, self.size)
876 elif whence == os.SEEK_END:
877 self.position = max(min(self.size + pos, self.size), 0)
878 else:
879 raise ValueError("Invalid argument")
880
881 self.buffer = ""
882 self.fileobj.seek(self.position)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000883
884 def close(self):
885 """Close the file object.
886 """
887 self.closed = True
Martin v. Löwisdf241532005-03-03 08:17:42 +0000888
889 def __iter__(self):
Lars Gustäbel6baa5022006-12-23 16:40:13 +0000890 """Get an iterator over the file's lines.
Martin v. Löwisdf241532005-03-03 08:17:42 +0000891 """
Lars Gustäbel6baa5022006-12-23 16:40:13 +0000892 while True:
893 line = self.readline()
894 if not line:
895 break
896 yield line
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000897#class ExFileObject
898
899#------------------
900# Exported Classes
901#------------------
902class TarInfo(object):
903 """Informational class which holds the details about an
904 archive member given by a tar header block.
905 TarInfo objects are returned by TarFile.getmember(),
906 TarFile.getmembers() and TarFile.gettarinfo() and are
907 usually created internally.
908 """
909
910 def __init__(self, name=""):
911 """Construct a TarInfo object. name is the optional name
912 of the member.
913 """
Lars Gustäbelc64e4022007-03-13 10:47:19 +0000914 self.name = name # member name
915 self.mode = 0644 # file permissions
Georg Brandl38c6a222006-05-10 16:26:03 +0000916 self.uid = 0 # user id
917 self.gid = 0 # group id
918 self.size = 0 # file size
919 self.mtime = 0 # modification time
920 self.chksum = 0 # header checksum
921 self.type = REGTYPE # member type
922 self.linkname = "" # link name
Lars Gustäbelc64e4022007-03-13 10:47:19 +0000923 self.uname = "root" # user name
924 self.gname = "root" # group name
Georg Brandl38c6a222006-05-10 16:26:03 +0000925 self.devmajor = 0 # device major number
926 self.devminor = 0 # device minor number
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000927
Georg Brandl38c6a222006-05-10 16:26:03 +0000928 self.offset = 0 # the tar header starts here
929 self.offset_data = 0 # the file's data starts here
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000930
Lars Gustäbelc64e4022007-03-13 10:47:19 +0000931 self.pax_headers = {} # pax header information
932
933 # In pax headers the "name" and "linkname" field are called
934 # "path" and "linkpath".
935 def _getpath(self):
936 return self.name
937 def _setpath(self, name):
938 self.name = name
939 path = property(_getpath, _setpath)
940
941 def _getlinkpath(self):
942 return self.linkname
943 def _setlinkpath(self, linkname):
944 self.linkname = linkname
945 linkpath = property(_getlinkpath, _setlinkpath)
946
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000947 def __repr__(self):
948 return "<%s %r at %#x>" % (self.__class__.__name__,self.name,id(self))
949
Lars Gustäbela0fcb932007-05-27 19:49:30 +0000950 def get_info(self, encoding, errors):
Lars Gustäbelc64e4022007-03-13 10:47:19 +0000951 """Return the TarInfo's attributes as a dictionary.
952 """
953 info = {
Lars Gustäbelf7cda522009-08-28 19:23:44 +0000954 "name": self.name,
Lars Gustäbelc64e4022007-03-13 10:47:19 +0000955 "mode": self.mode & 07777,
956 "uid": self.uid,
957 "gid": self.gid,
958 "size": self.size,
959 "mtime": self.mtime,
960 "chksum": self.chksum,
961 "type": self.type,
Lars Gustäbelf7cda522009-08-28 19:23:44 +0000962 "linkname": self.linkname,
Lars Gustäbelc64e4022007-03-13 10:47:19 +0000963 "uname": self.uname,
964 "gname": self.gname,
965 "devmajor": self.devmajor,
966 "devminor": self.devminor
967 }
968
969 if info["type"] == DIRTYPE and not info["name"].endswith("/"):
970 info["name"] += "/"
971
Lars Gustäbela0fcb932007-05-27 19:49:30 +0000972 for key in ("name", "linkname", "uname", "gname"):
973 if type(info[key]) is unicode:
974 info[key] = info[key].encode(encoding, errors)
975
Lars Gustäbelc64e4022007-03-13 10:47:19 +0000976 return info
977
Lars Gustäbela0fcb932007-05-27 19:49:30 +0000978 def tobuf(self, format=DEFAULT_FORMAT, encoding=ENCODING, errors="strict"):
Lars Gustäbelc64e4022007-03-13 10:47:19 +0000979 """Return a tar header as a string of 512 byte blocks.
980 """
Lars Gustäbela0fcb932007-05-27 19:49:30 +0000981 info = self.get_info(encoding, errors)
982
Lars Gustäbelc64e4022007-03-13 10:47:19 +0000983 if format == USTAR_FORMAT:
Lars Gustäbela0fcb932007-05-27 19:49:30 +0000984 return self.create_ustar_header(info)
Lars Gustäbelc64e4022007-03-13 10:47:19 +0000985 elif format == GNU_FORMAT:
Lars Gustäbela0fcb932007-05-27 19:49:30 +0000986 return self.create_gnu_header(info)
Lars Gustäbelc64e4022007-03-13 10:47:19 +0000987 elif format == PAX_FORMAT:
Lars Gustäbela0fcb932007-05-27 19:49:30 +0000988 return self.create_pax_header(info, encoding, errors)
Lars Gustäbelc64e4022007-03-13 10:47:19 +0000989 else:
990 raise ValueError("invalid format")
991
Lars Gustäbela0fcb932007-05-27 19:49:30 +0000992 def create_ustar_header(self, info):
Lars Gustäbelc64e4022007-03-13 10:47:19 +0000993 """Return the object as a ustar header block.
994 """
Lars Gustäbelc64e4022007-03-13 10:47:19 +0000995 info["magic"] = POSIX_MAGIC
996
997 if len(info["linkname"]) > LENGTH_LINK:
998 raise ValueError("linkname is too long")
999
1000 if len(info["name"]) > LENGTH_NAME:
1001 info["prefix"], info["name"] = self._posix_split_name(info["name"])
1002
1003 return self._create_header(info, USTAR_FORMAT)
1004
Lars Gustäbela0fcb932007-05-27 19:49:30 +00001005 def create_gnu_header(self, info):
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001006 """Return the object as a GNU header block sequence.
1007 """
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001008 info["magic"] = GNU_MAGIC
1009
1010 buf = ""
1011 if len(info["linkname"]) > LENGTH_LINK:
1012 buf += self._create_gnu_long_header(info["linkname"], GNUTYPE_LONGLINK)
1013
1014 if len(info["name"]) > LENGTH_NAME:
1015 buf += self._create_gnu_long_header(info["name"], GNUTYPE_LONGNAME)
1016
1017 return buf + self._create_header(info, GNU_FORMAT)
1018
Lars Gustäbela0fcb932007-05-27 19:49:30 +00001019 def create_pax_header(self, info, encoding, errors):
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001020 """Return the object as a ustar header block. If it cannot be
1021 represented this way, prepend a pax extended header sequence
1022 with supplement information.
1023 """
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001024 info["magic"] = POSIX_MAGIC
1025 pax_headers = self.pax_headers.copy()
1026
1027 # Test string fields for values that exceed the field length or cannot
1028 # be represented in ASCII encoding.
1029 for name, hname, length in (
1030 ("name", "path", LENGTH_NAME), ("linkname", "linkpath", LENGTH_LINK),
1031 ("uname", "uname", 32), ("gname", "gname", 32)):
1032
Lars Gustäbela0fcb932007-05-27 19:49:30 +00001033 if hname in pax_headers:
1034 # The pax header has priority.
1035 continue
1036
1037 val = info[name].decode(encoding, errors)
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001038
1039 # Try to encode the string as ASCII.
1040 try:
1041 val.encode("ascii")
1042 except UnicodeEncodeError:
1043 pax_headers[hname] = val
1044 continue
1045
Lars Gustäbela0fcb932007-05-27 19:49:30 +00001046 if len(info[name]) > length:
1047 pax_headers[hname] = val
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001048
1049 # Test number fields for values that exceed the field limit or values
1050 # that like to be stored as float.
1051 for name, digits in (("uid", 8), ("gid", 8), ("size", 12), ("mtime", 12)):
Lars Gustäbela0fcb932007-05-27 19:49:30 +00001052 if name in pax_headers:
1053 # The pax header has priority. Avoid overflow.
1054 info[name] = 0
1055 continue
1056
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001057 val = info[name]
1058 if not 0 <= val < 8 ** (digits - 1) or isinstance(val, float):
1059 pax_headers[name] = unicode(val)
1060 info[name] = 0
1061
Lars Gustäbela0fcb932007-05-27 19:49:30 +00001062 # Create a pax extended header if necessary.
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001063 if pax_headers:
1064 buf = self._create_pax_generic_header(pax_headers)
1065 else:
1066 buf = ""
1067
1068 return buf + self._create_header(info, USTAR_FORMAT)
1069
1070 @classmethod
Lars Gustäbela0fcb932007-05-27 19:49:30 +00001071 def create_pax_global_header(cls, pax_headers):
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001072 """Return the object as a pax global header block sequence.
1073 """
Lars Gustäbela0fcb932007-05-27 19:49:30 +00001074 return cls._create_pax_generic_header(pax_headers, type=XGLTYPE)
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001075
1076 def _posix_split_name(self, name):
1077 """Split a name longer than 100 chars into a prefix
1078 and a name part.
1079 """
1080 prefix = name[:LENGTH_PREFIX + 1]
1081 while prefix and prefix[-1] != "/":
1082 prefix = prefix[:-1]
1083
1084 name = name[len(prefix):]
1085 prefix = prefix[:-1]
1086
1087 if not prefix or len(name) > LENGTH_NAME:
1088 raise ValueError("name is too long")
1089 return prefix, name
1090
1091 @staticmethod
1092 def _create_header(info, format):
1093 """Return a header block. info is a dictionary with file
1094 information, format must be one of the *_FORMAT constants.
1095 """
1096 parts = [
1097 stn(info.get("name", ""), 100),
1098 itn(info.get("mode", 0) & 07777, 8, format),
1099 itn(info.get("uid", 0), 8, format),
1100 itn(info.get("gid", 0), 8, format),
1101 itn(info.get("size", 0), 12, format),
1102 itn(info.get("mtime", 0), 12, format),
1103 " ", # checksum field
1104 info.get("type", REGTYPE),
1105 stn(info.get("linkname", ""), 100),
Lars Gustäbela0fcb932007-05-27 19:49:30 +00001106 stn(info.get("magic", POSIX_MAGIC), 8),
1107 stn(info.get("uname", "root"), 32),
1108 stn(info.get("gname", "root"), 32),
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001109 itn(info.get("devmajor", 0), 8, format),
1110 itn(info.get("devminor", 0), 8, format),
1111 stn(info.get("prefix", ""), 155)
1112 ]
1113
1114 buf = struct.pack("%ds" % BLOCKSIZE, "".join(parts))
1115 chksum = calc_chksums(buf[-BLOCKSIZE:])[0]
1116 buf = buf[:-364] + "%06o\0" % chksum + buf[-357:]
1117 return buf
1118
1119 @staticmethod
1120 def _create_payload(payload):
1121 """Return the string payload filled with zero bytes
1122 up to the next 512 byte border.
1123 """
1124 blocks, remainder = divmod(len(payload), BLOCKSIZE)
1125 if remainder > 0:
1126 payload += (BLOCKSIZE - remainder) * NUL
1127 return payload
1128
1129 @classmethod
1130 def _create_gnu_long_header(cls, name, type):
1131 """Return a GNUTYPE_LONGNAME or GNUTYPE_LONGLINK sequence
1132 for name.
1133 """
1134 name += NUL
1135
1136 info = {}
1137 info["name"] = "././@LongLink"
1138 info["type"] = type
1139 info["size"] = len(name)
1140 info["magic"] = GNU_MAGIC
1141
1142 # create extended header + name blocks.
1143 return cls._create_header(info, USTAR_FORMAT) + \
1144 cls._create_payload(name)
1145
1146 @classmethod
1147 def _create_pax_generic_header(cls, pax_headers, type=XHDTYPE):
1148 """Return a POSIX.1-2001 extended or global header sequence
1149 that contains a list of keyword, value pairs. The values
1150 must be unicode objects.
1151 """
1152 records = []
1153 for keyword, value in pax_headers.iteritems():
1154 keyword = keyword.encode("utf8")
1155 value = value.encode("utf8")
1156 l = len(keyword) + len(value) + 3 # ' ' + '=' + '\n'
1157 n = p = 0
1158 while True:
1159 n = l + len(str(p))
1160 if n == p:
1161 break
1162 p = n
1163 records.append("%d %s=%s\n" % (p, keyword, value))
1164 records = "".join(records)
1165
1166 # We use a hardcoded "././@PaxHeader" name like star does
1167 # instead of the one that POSIX recommends.
1168 info = {}
1169 info["name"] = "././@PaxHeader"
1170 info["type"] = type
1171 info["size"] = len(records)
1172 info["magic"] = POSIX_MAGIC
1173
1174 # Create pax header + record blocks.
1175 return cls._create_header(info, USTAR_FORMAT) + \
1176 cls._create_payload(records)
1177
Guido van Rossum75b64e62005-01-16 00:16:11 +00001178 @classmethod
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001179 def frombuf(cls, buf):
1180 """Construct a TarInfo object from a 512 byte string buffer.
1181 """
Georg Brandl38c6a222006-05-10 16:26:03 +00001182 if len(buf) != BLOCKSIZE:
Georg Brandlebbeed72006-12-19 22:06:46 +00001183 raise HeaderError("truncated header")
Georg Brandl38c6a222006-05-10 16:26:03 +00001184 if buf.count(NUL) == BLOCKSIZE:
Georg Brandlebbeed72006-12-19 22:06:46 +00001185 raise HeaderError("empty header")
1186
Georg Brandlded1c4d2006-12-20 11:55:16 +00001187 chksum = nti(buf[148:156])
Georg Brandlebbeed72006-12-19 22:06:46 +00001188 if chksum not in calc_chksums(buf):
1189 raise HeaderError("bad checksum")
Georg Brandl38c6a222006-05-10 16:26:03 +00001190
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001191 obj = cls()
1192 obj.buf = buf
1193 obj.name = nts(buf[0:100])
1194 obj.mode = nti(buf[100:108])
1195 obj.uid = nti(buf[108:116])
1196 obj.gid = nti(buf[116:124])
1197 obj.size = nti(buf[124:136])
1198 obj.mtime = nti(buf[136:148])
1199 obj.chksum = chksum
1200 obj.type = buf[156:157]
1201 obj.linkname = nts(buf[157:257])
1202 obj.uname = nts(buf[265:297])
1203 obj.gname = nts(buf[297:329])
1204 obj.devmajor = nti(buf[329:337])
1205 obj.devminor = nti(buf[337:345])
1206 prefix = nts(buf[345:500])
Georg Brandl3354f282006-10-29 09:16:12 +00001207
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001208 # Old V7 tar format represents a directory as a regular
1209 # file with a trailing slash.
1210 if obj.type == AREGTYPE and obj.name.endswith("/"):
1211 obj.type = DIRTYPE
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001212
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001213 # Remove redundant slashes from directories.
1214 if obj.isdir():
1215 obj.name = obj.name.rstrip("/")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001216
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001217 # Reconstruct a ustar longname.
1218 if prefix and obj.type not in GNU_TYPES:
1219 obj.name = prefix + "/" + obj.name
1220 return obj
1221
1222 @classmethod
1223 def fromtarfile(cls, tarfile):
1224 """Return the next TarInfo object from TarFile object
1225 tarfile.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001226 """
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001227 buf = tarfile.fileobj.read(BLOCKSIZE)
1228 if not buf:
1229 return
1230 obj = cls.frombuf(buf)
1231 obj.offset = tarfile.fileobj.tell() - BLOCKSIZE
1232 return obj._proc_member(tarfile)
Georg Brandl3354f282006-10-29 09:16:12 +00001233
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001234 #--------------------------------------------------------------------------
1235 # The following are methods that are called depending on the type of a
1236 # member. The entry point is _proc_member() which can be overridden in a
1237 # subclass to add custom _proc_*() methods. A _proc_*() method MUST
1238 # implement the following
1239 # operations:
1240 # 1. Set self.offset_data to the position where the data blocks begin,
1241 # if there is data that follows.
1242 # 2. Set tarfile.offset to the position where the next member's header will
1243 # begin.
1244 # 3. Return self or another valid TarInfo object.
1245 def _proc_member(self, tarfile):
1246 """Choose the right processing method depending on
1247 the type and call it.
Georg Brandl3354f282006-10-29 09:16:12 +00001248 """
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001249 if self.type in (GNUTYPE_LONGNAME, GNUTYPE_LONGLINK):
1250 return self._proc_gnulong(tarfile)
1251 elif self.type == GNUTYPE_SPARSE:
1252 return self._proc_sparse(tarfile)
1253 elif self.type in (XHDTYPE, XGLTYPE, SOLARIS_XHDTYPE):
1254 return self._proc_pax(tarfile)
1255 else:
1256 return self._proc_builtin(tarfile)
Georg Brandl3354f282006-10-29 09:16:12 +00001257
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001258 def _proc_builtin(self, tarfile):
1259 """Process a builtin type or an unknown type which
1260 will be treated as a regular file.
1261 """
1262 self.offset_data = tarfile.fileobj.tell()
1263 offset = self.offset_data
1264 if self.isreg() or self.type not in SUPPORTED_TYPES:
1265 # Skip the following data blocks.
1266 offset += self._block(self.size)
1267 tarfile.offset = offset
Georg Brandl3354f282006-10-29 09:16:12 +00001268
Lars Gustäbela0fcb932007-05-27 19:49:30 +00001269 # Patch the TarInfo object with saved global
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001270 # header information.
Lars Gustäbela0fcb932007-05-27 19:49:30 +00001271 self._apply_pax_info(tarfile.pax_headers, tarfile.encoding, tarfile.errors)
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001272
1273 return self
1274
1275 def _proc_gnulong(self, tarfile):
1276 """Process the blocks that hold a GNU longname
1277 or longlink member.
1278 """
1279 buf = tarfile.fileobj.read(self._block(self.size))
1280
1281 # Fetch the next header and process it.
Lars Gustäbela0fcb932007-05-27 19:49:30 +00001282 next = self.fromtarfile(tarfile)
1283 if next is None:
1284 raise HeaderError("missing subsequent header")
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001285
1286 # Patch the TarInfo object from the next header with
1287 # the longname information.
1288 next.offset = self.offset
1289 if self.type == GNUTYPE_LONGNAME:
Lars Gustäbela0fcb932007-05-27 19:49:30 +00001290 next.name = nts(buf)
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001291 elif self.type == GNUTYPE_LONGLINK:
Lars Gustäbela0fcb932007-05-27 19:49:30 +00001292 next.linkname = nts(buf)
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001293
1294 return next
1295
1296 def _proc_sparse(self, tarfile):
1297 """Process a GNU sparse header plus extra headers.
1298 """
1299 buf = self.buf
1300 sp = _ringbuffer()
1301 pos = 386
1302 lastpos = 0L
1303 realpos = 0L
1304 # There are 4 possible sparse structs in the
1305 # first header.
1306 for i in xrange(4):
1307 try:
1308 offset = nti(buf[pos:pos + 12])
1309 numbytes = nti(buf[pos + 12:pos + 24])
1310 except ValueError:
1311 break
1312 if offset > lastpos:
1313 sp.append(_hole(lastpos, offset - lastpos))
1314 sp.append(_data(offset, numbytes, realpos))
1315 realpos += numbytes
1316 lastpos = offset + numbytes
1317 pos += 24
1318
1319 isextended = ord(buf[482])
1320 origsize = nti(buf[483:495])
1321
1322 # If the isextended flag is given,
1323 # there are extra headers to process.
1324 while isextended == 1:
1325 buf = tarfile.fileobj.read(BLOCKSIZE)
1326 pos = 0
1327 for i in xrange(21):
1328 try:
1329 offset = nti(buf[pos:pos + 12])
1330 numbytes = nti(buf[pos + 12:pos + 24])
1331 except ValueError:
1332 break
1333 if offset > lastpos:
1334 sp.append(_hole(lastpos, offset - lastpos))
1335 sp.append(_data(offset, numbytes, realpos))
1336 realpos += numbytes
1337 lastpos = offset + numbytes
1338 pos += 24
1339 isextended = ord(buf[504])
1340
1341 if lastpos < origsize:
1342 sp.append(_hole(lastpos, origsize - lastpos))
1343
1344 self.sparse = sp
1345
1346 self.offset_data = tarfile.fileobj.tell()
1347 tarfile.offset = self.offset_data + self._block(self.size)
1348 self.size = origsize
1349
1350 return self
1351
1352 def _proc_pax(self, tarfile):
1353 """Process an extended or global header as described in
1354 POSIX.1-2001.
1355 """
1356 # Read the header information.
1357 buf = tarfile.fileobj.read(self._block(self.size))
1358
1359 # A pax header stores supplemental information for either
1360 # the following file (extended) or all following files
1361 # (global).
1362 if self.type == XGLTYPE:
1363 pax_headers = tarfile.pax_headers
1364 else:
1365 pax_headers = tarfile.pax_headers.copy()
1366
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001367 # Parse pax header information. A record looks like that:
1368 # "%d %s=%s\n" % (length, keyword, value). length is the size
1369 # of the complete record including the length field itself and
Lars Gustäbela0fcb932007-05-27 19:49:30 +00001370 # the newline. keyword and value are both UTF-8 encoded strings.
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001371 regex = re.compile(r"(\d+) ([^=]+)=", re.U)
1372 pos = 0
1373 while True:
1374 match = regex.match(buf, pos)
1375 if not match:
1376 break
1377
1378 length, keyword = match.groups()
1379 length = int(length)
1380 value = buf[match.end(2) + 1:match.start(1) + length - 1]
1381
1382 keyword = keyword.decode("utf8")
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001383 value = value.decode("utf8")
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001384
1385 pax_headers[keyword] = value
1386 pos += length
1387
Lars Gustäbela0fcb932007-05-27 19:49:30 +00001388 # Fetch the next header.
1389 next = self.fromtarfile(tarfile)
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001390
Lars Gustäbela0fcb932007-05-27 19:49:30 +00001391 if self.type in (XHDTYPE, SOLARIS_XHDTYPE):
1392 if next is None:
1393 raise HeaderError("missing subsequent header")
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001394
Lars Gustäbela0fcb932007-05-27 19:49:30 +00001395 # Patch the TarInfo object with the extended header info.
1396 next._apply_pax_info(pax_headers, tarfile.encoding, tarfile.errors)
1397 next.offset = self.offset
1398
Brett Cannon132fc542008-08-04 21:23:07 +00001399 if "size" in pax_headers:
Lars Gustäbela0fcb932007-05-27 19:49:30 +00001400 # If the extended header replaces the size field,
1401 # we need to recalculate the offset where the next
1402 # header starts.
1403 offset = next.offset_data
1404 if next.isreg() or next.type not in SUPPORTED_TYPES:
1405 offset += next._block(next.size)
1406 tarfile.offset = offset
1407
1408 return next
1409
1410 def _apply_pax_info(self, pax_headers, encoding, errors):
1411 """Replace fields with supplemental information from a previous
1412 pax extended or global header.
1413 """
1414 for keyword, value in pax_headers.iteritems():
1415 if keyword not in PAX_FIELDS:
1416 continue
1417
1418 if keyword == "path":
1419 value = value.rstrip("/")
1420
1421 if keyword in PAX_NUMBER_FIELDS:
1422 try:
1423 value = PAX_NUMBER_FIELDS[keyword](value)
1424 except ValueError:
1425 value = 0
1426 else:
1427 value = uts(value, encoding, errors)
1428
1429 setattr(self, keyword, value)
1430
1431 self.pax_headers = pax_headers.copy()
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001432
1433 def _block(self, count):
1434 """Round up a byte count by BLOCKSIZE and return it,
1435 e.g. _block(834) => 1024.
1436 """
1437 blocks, remainder = divmod(count, BLOCKSIZE)
1438 if remainder:
1439 blocks += 1
1440 return blocks * BLOCKSIZE
Georg Brandl3354f282006-10-29 09:16:12 +00001441
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001442 def isreg(self):
1443 return self.type in REGULAR_TYPES
1444 def isfile(self):
1445 return self.isreg()
1446 def isdir(self):
1447 return self.type == DIRTYPE
1448 def issym(self):
1449 return self.type == SYMTYPE
1450 def islnk(self):
1451 return self.type == LNKTYPE
1452 def ischr(self):
1453 return self.type == CHRTYPE
1454 def isblk(self):
1455 return self.type == BLKTYPE
1456 def isfifo(self):
1457 return self.type == FIFOTYPE
1458 def issparse(self):
1459 return self.type == GNUTYPE_SPARSE
1460 def isdev(self):
1461 return self.type in (CHRTYPE, BLKTYPE, FIFOTYPE)
1462# class TarInfo
1463
1464class TarFile(object):
1465 """The TarFile Class provides an interface to tar archives.
1466 """
1467
1468 debug = 0 # May be set from 0 (no msgs) to 3 (all msgs)
1469
1470 dereference = False # If true, add content of linked file to the
1471 # tar file, else the link.
1472
1473 ignore_zeros = False # If true, skips empty or invalid blocks and
1474 # continues processing.
1475
1476 errorlevel = 0 # If 0, fatal errors only appear in debug
1477 # messages (if debug >= 0). If > 0, errors
1478 # are passed to the caller as exceptions.
1479
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001480 format = DEFAULT_FORMAT # The format to use when creating an archive.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001481
Lars Gustäbela0fcb932007-05-27 19:49:30 +00001482 encoding = ENCODING # Encoding for 8-bit character strings.
1483
1484 errors = None # Error handler for unicode conversion.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001485
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001486 tarinfo = TarInfo # The default TarInfo class to use.
1487
1488 fileobject = ExFileObject # The default ExFileObject class to use.
1489
1490 def __init__(self, name=None, mode="r", fileobj=None, format=None,
1491 tarinfo=None, dereference=None, ignore_zeros=None, encoding=None,
Lars Gustäbela0fcb932007-05-27 19:49:30 +00001492 errors=None, pax_headers=None, debug=None, errorlevel=None):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001493 """Open an (uncompressed) tar archive `name'. `mode' is either 'r' to
1494 read from an existing archive, 'a' to append data to an existing
1495 file or 'w' to create a new file overwriting an existing one. `mode'
1496 defaults to 'r'.
1497 If `fileobj' is given, it is used for reading or writing data. If it
1498 can be determined, `mode' is overridden by `fileobj's mode.
1499 `fileobj' is not closed, when TarFile is closed.
1500 """
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001501 if len(mode) > 1 or mode not in "raw":
Georg Brandle4751e32006-05-18 06:11:19 +00001502 raise ValueError("mode must be 'r', 'a' or 'w'")
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001503 self.mode = mode
1504 self._mode = {"r": "rb", "a": "r+b", "w": "wb"}[mode]
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001505
1506 if not fileobj:
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001507 if self.mode == "a" and not os.path.exists(name):
Lars Gustäbel3f8aca12007-02-06 18:38:13 +00001508 # Create nonexistent files in append mode.
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001509 self.mode = "w"
1510 self._mode = "wb"
Brett Cannon6cef0762007-05-25 20:17:15 +00001511 fileobj = bltn_open(name, self._mode)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001512 self._extfileobj = False
1513 else:
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001514 if name is None and hasattr(fileobj, "name"):
1515 name = fileobj.name
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001516 if hasattr(fileobj, "mode"):
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001517 self._mode = fileobj.mode
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001518 self._extfileobj = True
Lars Gustäbel0f4a14b2007-08-28 12:31:09 +00001519 self.name = os.path.abspath(name) if name else None
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001520 self.fileobj = fileobj
1521
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001522 # Init attributes.
1523 if format is not None:
1524 self.format = format
1525 if tarinfo is not None:
1526 self.tarinfo = tarinfo
1527 if dereference is not None:
1528 self.dereference = dereference
1529 if ignore_zeros is not None:
1530 self.ignore_zeros = ignore_zeros
1531 if encoding is not None:
1532 self.encoding = encoding
Lars Gustäbela0fcb932007-05-27 19:49:30 +00001533
1534 if errors is not None:
1535 self.errors = errors
1536 elif mode == "r":
1537 self.errors = "utf-8"
1538 else:
1539 self.errors = "strict"
1540
1541 if pax_headers is not None and self.format == PAX_FORMAT:
1542 self.pax_headers = pax_headers
1543 else:
1544 self.pax_headers = {}
1545
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001546 if debug is not None:
1547 self.debug = debug
1548 if errorlevel is not None:
1549 self.errorlevel = errorlevel
1550
1551 # Init datastructures.
Georg Brandl38c6a222006-05-10 16:26:03 +00001552 self.closed = False
1553 self.members = [] # list of members as TarInfo objects
1554 self._loaded = False # flag if all members have been read
Lars Gustäbel77b2d632007-12-01 21:02:12 +00001555 self.offset = self.fileobj.tell()
1556 # current position in the archive file
Georg Brandl38c6a222006-05-10 16:26:03 +00001557 self.inodes = {} # dictionary caching the inodes of
1558 # archive members already added
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001559
Lars Gustäbel355538e2009-11-18 20:24:54 +00001560 try:
1561 if self.mode == "r":
1562 self.firstmember = None
1563 self.firstmember = self.next()
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001564
Lars Gustäbel355538e2009-11-18 20:24:54 +00001565 if self.mode == "a":
1566 # Move to the end of the archive,
1567 # before the first empty block.
1568 self.firstmember = None
1569 while True:
1570 if self.next() is None:
1571 if self.offset > 0:
1572 self.fileobj.seek(- BLOCKSIZE, 1)
1573 break
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001574
Lars Gustäbel355538e2009-11-18 20:24:54 +00001575 if self.mode in "aw":
1576 self._loaded = True
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001577
Lars Gustäbel355538e2009-11-18 20:24:54 +00001578 if self.pax_headers:
1579 buf = self.tarinfo.create_pax_global_header(self.pax_headers.copy())
1580 self.fileobj.write(buf)
1581 self.offset += len(buf)
1582 except:
1583 if not self._extfileobj:
1584 self.fileobj.close()
1585 self.closed = True
1586 raise
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001587
1588 def _getposix(self):
1589 return self.format == USTAR_FORMAT
1590 def _setposix(self, value):
1591 import warnings
Philip Jenveyd846f1d2009-05-08 02:28:39 +00001592 warnings.warn("use the format attribute instead", DeprecationWarning,
1593 2)
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001594 if value:
1595 self.format = USTAR_FORMAT
1596 else:
1597 self.format = GNU_FORMAT
1598 posix = property(_getposix, _setposix)
1599
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001600 #--------------------------------------------------------------------------
1601 # Below are the classmethods which act as alternate constructors to the
1602 # TarFile class. The open() method is the only one that is needed for
1603 # public use; it is the "super"-constructor and is able to select an
1604 # adequate "sub"-constructor for a particular compression using the mapping
1605 # from OPEN_METH.
1606 #
1607 # This concept allows one to subclass TarFile without losing the comfort of
1608 # the super-constructor. A sub-constructor is registered and made available
1609 # by adding it to the mapping in OPEN_METH.
1610
Guido van Rossum75b64e62005-01-16 00:16:11 +00001611 @classmethod
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001612 def open(cls, name=None, mode="r", fileobj=None, bufsize=RECORDSIZE, **kwargs):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001613 """Open a tar archive for reading, writing or appending. Return
1614 an appropriate TarFile class.
1615
1616 mode:
Martin v. Löwis78be7df2005-03-05 12:47:42 +00001617 'r' or 'r:*' open for reading with transparent compression
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001618 'r:' open for reading exclusively uncompressed
1619 'r:gz' open for reading with gzip compression
1620 'r:bz2' open for reading with bzip2 compression
Lars Gustäbel3f8aca12007-02-06 18:38:13 +00001621 'a' or 'a:' open for appending, creating the file if necessary
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001622 'w' or 'w:' open for writing without compression
1623 'w:gz' open for writing with gzip compression
1624 'w:bz2' open for writing with bzip2 compression
Martin v. Löwis78be7df2005-03-05 12:47:42 +00001625
1626 'r|*' open a stream of tar blocks with transparent compression
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001627 'r|' open an uncompressed stream of tar blocks for reading
1628 'r|gz' open a gzip compressed stream of tar blocks
1629 'r|bz2' open a bzip2 compressed stream of tar blocks
1630 'w|' open an uncompressed stream for writing
1631 'w|gz' open a gzip compressed stream for writing
1632 'w|bz2' open a bzip2 compressed stream for writing
1633 """
1634
1635 if not name and not fileobj:
Georg Brandle4751e32006-05-18 06:11:19 +00001636 raise ValueError("nothing to open")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001637
Martin v. Löwis78be7df2005-03-05 12:47:42 +00001638 if mode in ("r", "r:*"):
1639 # Find out which *open() is appropriate for opening the file.
1640 for comptype in cls.OPEN_METH:
1641 func = getattr(cls, cls.OPEN_METH[comptype])
Lars Gustäbela7ba6fc2006-12-27 10:30:46 +00001642 if fileobj is not None:
1643 saved_pos = fileobj.tell()
Martin v. Löwis78be7df2005-03-05 12:47:42 +00001644 try:
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001645 return func(name, "r", fileobj, **kwargs)
1646 except (ReadError, CompressionError), e:
Lars Gustäbela7ba6fc2006-12-27 10:30:46 +00001647 if fileobj is not None:
1648 fileobj.seek(saved_pos)
Martin v. Löwis78be7df2005-03-05 12:47:42 +00001649 continue
Georg Brandle4751e32006-05-18 06:11:19 +00001650 raise ReadError("file could not be opened successfully")
Martin v. Löwis78be7df2005-03-05 12:47:42 +00001651
1652 elif ":" in mode:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001653 filemode, comptype = mode.split(":", 1)
1654 filemode = filemode or "r"
1655 comptype = comptype or "tar"
1656
1657 # Select the *open() function according to
1658 # given compression.
1659 if comptype in cls.OPEN_METH:
1660 func = getattr(cls, cls.OPEN_METH[comptype])
1661 else:
Georg Brandle4751e32006-05-18 06:11:19 +00001662 raise CompressionError("unknown compression type %r" % comptype)
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001663 return func(name, filemode, fileobj, **kwargs)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001664
1665 elif "|" in mode:
1666 filemode, comptype = mode.split("|", 1)
1667 filemode = filemode or "r"
1668 comptype = comptype or "tar"
1669
1670 if filemode not in "rw":
Georg Brandle4751e32006-05-18 06:11:19 +00001671 raise ValueError("mode must be 'r' or 'w'")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001672
1673 t = cls(name, filemode,
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001674 _Stream(name, filemode, comptype, fileobj, bufsize),
1675 **kwargs)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001676 t._extfileobj = False
1677 return t
1678
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001679 elif mode in "aw":
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001680 return cls.taropen(name, mode, fileobj, **kwargs)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001681
Georg Brandle4751e32006-05-18 06:11:19 +00001682 raise ValueError("undiscernible mode")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001683
Guido van Rossum75b64e62005-01-16 00:16:11 +00001684 @classmethod
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001685 def taropen(cls, name, mode="r", fileobj=None, **kwargs):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001686 """Open uncompressed tar archive name for reading or writing.
1687 """
1688 if len(mode) > 1 or mode not in "raw":
Georg Brandle4751e32006-05-18 06:11:19 +00001689 raise ValueError("mode must be 'r', 'a' or 'w'")
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001690 return cls(name, mode, fileobj, **kwargs)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001691
Guido van Rossum75b64e62005-01-16 00:16:11 +00001692 @classmethod
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001693 def gzopen(cls, name, mode="r", fileobj=None, compresslevel=9, **kwargs):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001694 """Open gzip compressed tar archive name for reading or writing.
1695 Appending is not allowed.
1696 """
1697 if len(mode) > 1 or mode not in "rw":
Georg Brandle4751e32006-05-18 06:11:19 +00001698 raise ValueError("mode must be 'r' or 'w'")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001699
1700 try:
1701 import gzip
Neal Norwitz4ec68242003-04-11 03:05:56 +00001702 gzip.GzipFile
1703 except (ImportError, AttributeError):
Georg Brandle4751e32006-05-18 06:11:19 +00001704 raise CompressionError("gzip module is not available")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001705
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001706 if fileobj is None:
Brett Cannon6cef0762007-05-25 20:17:15 +00001707 fileobj = bltn_open(name, mode + "b")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001708
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001709 try:
Lars Gustäbela4b23812006-12-23 17:57:23 +00001710 t = cls.taropen(name, mode,
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001711 gzip.GzipFile(name, mode, compresslevel, fileobj),
1712 **kwargs)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001713 except IOError:
Georg Brandle4751e32006-05-18 06:11:19 +00001714 raise ReadError("not a gzip file")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001715 t._extfileobj = False
1716 return t
1717
Guido van Rossum75b64e62005-01-16 00:16:11 +00001718 @classmethod
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001719 def bz2open(cls, name, mode="r", fileobj=None, compresslevel=9, **kwargs):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001720 """Open bzip2 compressed tar archive name for reading or writing.
1721 Appending is not allowed.
1722 """
1723 if len(mode) > 1 or mode not in "rw":
Georg Brandle4751e32006-05-18 06:11:19 +00001724 raise ValueError("mode must be 'r' or 'w'.")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001725
1726 try:
1727 import bz2
1728 except ImportError:
Georg Brandle4751e32006-05-18 06:11:19 +00001729 raise CompressionError("bz2 module is not available")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001730
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001731 if fileobj is not None:
Georg Brandl49c8f4c2006-05-15 19:30:35 +00001732 fileobj = _BZ2Proxy(fileobj, mode)
1733 else:
1734 fileobj = bz2.BZ2File(name, mode, compresslevel=compresslevel)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001735
1736 try:
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001737 t = cls.taropen(name, mode, fileobj, **kwargs)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001738 except IOError:
Georg Brandle4751e32006-05-18 06:11:19 +00001739 raise ReadError("not a bzip2 file")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001740 t._extfileobj = False
1741 return t
1742
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001743 # All *open() methods are registered here.
1744 OPEN_METH = {
1745 "tar": "taropen", # uncompressed tar
1746 "gz": "gzopen", # gzip compressed tar
1747 "bz2": "bz2open" # bzip2 compressed tar
1748 }
1749
1750 #--------------------------------------------------------------------------
1751 # The public methods which TarFile provides:
1752
1753 def close(self):
1754 """Close the TarFile. In write-mode, two finishing zero blocks are
1755 appended to the archive.
1756 """
1757 if self.closed:
1758 return
1759
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001760 if self.mode in "aw":
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001761 self.fileobj.write(NUL * (BLOCKSIZE * 2))
1762 self.offset += (BLOCKSIZE * 2)
1763 # fill up the end with zero-blocks
1764 # (like option -b20 for tar does)
1765 blocks, remainder = divmod(self.offset, RECORDSIZE)
1766 if remainder > 0:
1767 self.fileobj.write(NUL * (RECORDSIZE - remainder))
1768
1769 if not self._extfileobj:
1770 self.fileobj.close()
1771 self.closed = True
1772
1773 def getmember(self, name):
1774 """Return a TarInfo object for member `name'. If `name' can not be
1775 found in the archive, KeyError is raised. If a member occurs more
Mark Dickinson3e4caeb2009-02-21 20:27:01 +00001776 than once in the archive, its last occurrence is assumed to be the
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001777 most up-to-date version.
1778 """
Martin v. Löwisf3c56112004-09-18 09:08:52 +00001779 tarinfo = self._getmember(name)
1780 if tarinfo is None:
Georg Brandle4751e32006-05-18 06:11:19 +00001781 raise KeyError("filename %r not found" % name)
Martin v. Löwisf3c56112004-09-18 09:08:52 +00001782 return tarinfo
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001783
1784 def getmembers(self):
1785 """Return the members of the archive as a list of TarInfo objects. The
1786 list has the same order as the members in the archive.
1787 """
1788 self._check()
1789 if not self._loaded: # if we want to obtain a list of
1790 self._load() # all members, we first have to
1791 # scan the whole archive.
1792 return self.members
1793
1794 def getnames(self):
1795 """Return the members of the archive as a list of their names. It has
1796 the same order as the list returned by getmembers().
1797 """
Martin v. Löwisf3c56112004-09-18 09:08:52 +00001798 return [tarinfo.name for tarinfo in self.getmembers()]
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001799
1800 def gettarinfo(self, name=None, arcname=None, fileobj=None):
1801 """Create a TarInfo object for either the file `name' or the file
1802 object `fileobj' (using os.fstat on its file descriptor). You can
1803 modify some of the TarInfo's attributes before you add it using
1804 addfile(). If given, `arcname' specifies an alternative name for the
1805 file in the archive.
1806 """
1807 self._check("aw")
1808
1809 # When fileobj is given, replace name by
1810 # fileobj's real name.
1811 if fileobj is not None:
1812 name = fileobj.name
1813
1814 # Building the name of the member in the archive.
1815 # Backward slashes are converted to forward slashes,
1816 # Absolute paths are turned to relative paths.
1817 if arcname is None:
1818 arcname = name
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001819 drv, arcname = os.path.splitdrive(arcname)
Lars Gustäbelf7cda522009-08-28 19:23:44 +00001820 arcname = arcname.replace(os.sep, "/")
1821 arcname = arcname.lstrip("/")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001822
1823 # Now, fill the TarInfo object with
1824 # information specific for the file.
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001825 tarinfo = self.tarinfo()
1826 tarinfo.tarfile = self
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001827
1828 # Use os.stat or os.lstat, depending on platform
1829 # and if symlinks shall be resolved.
1830 if fileobj is None:
1831 if hasattr(os, "lstat") and not self.dereference:
1832 statres = os.lstat(name)
1833 else:
1834 statres = os.stat(name)
1835 else:
1836 statres = os.fstat(fileobj.fileno())
1837 linkname = ""
1838
1839 stmd = statres.st_mode
1840 if stat.S_ISREG(stmd):
1841 inode = (statres.st_ino, statres.st_dev)
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001842 if not self.dereference and statres.st_nlink > 1 and \
1843 inode in self.inodes and arcname != self.inodes[inode]:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001844 # Is it a hardlink to an already
1845 # archived file?
1846 type = LNKTYPE
1847 linkname = self.inodes[inode]
1848 else:
1849 # The inode is added only if its valid.
1850 # For win32 it is always 0.
1851 type = REGTYPE
1852 if inode[0]:
1853 self.inodes[inode] = arcname
1854 elif stat.S_ISDIR(stmd):
1855 type = DIRTYPE
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001856 elif stat.S_ISFIFO(stmd):
1857 type = FIFOTYPE
1858 elif stat.S_ISLNK(stmd):
1859 type = SYMTYPE
1860 linkname = os.readlink(name)
1861 elif stat.S_ISCHR(stmd):
1862 type = CHRTYPE
1863 elif stat.S_ISBLK(stmd):
1864 type = BLKTYPE
1865 else:
1866 return None
1867
1868 # Fill the TarInfo object with all
1869 # information we can get.
Martin v. Löwis5dbdc592005-08-27 10:07:56 +00001870 tarinfo.name = arcname
1871 tarinfo.mode = stmd
1872 tarinfo.uid = statres.st_uid
1873 tarinfo.gid = statres.st_gid
1874 if stat.S_ISREG(stmd):
Martin v. Löwis61d77e02004-08-20 06:35:46 +00001875 tarinfo.size = statres.st_size
Martin v. Löwis5dbdc592005-08-27 10:07:56 +00001876 else:
1877 tarinfo.size = 0L
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001878 tarinfo.mtime = statres.st_mtime
Martin v. Löwis5dbdc592005-08-27 10:07:56 +00001879 tarinfo.type = type
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001880 tarinfo.linkname = linkname
1881 if pwd:
1882 try:
1883 tarinfo.uname = pwd.getpwuid(tarinfo.uid)[0]
1884 except KeyError:
1885 pass
1886 if grp:
1887 try:
1888 tarinfo.gname = grp.getgrgid(tarinfo.gid)[0]
1889 except KeyError:
1890 pass
1891
1892 if type in (CHRTYPE, BLKTYPE):
1893 if hasattr(os, "major") and hasattr(os, "minor"):
1894 tarinfo.devmajor = os.major(statres.st_rdev)
1895 tarinfo.devminor = os.minor(statres.st_rdev)
1896 return tarinfo
1897
1898 def list(self, verbose=True):
1899 """Print a table of contents to sys.stdout. If `verbose' is False, only
1900 the names of the members are printed. If it is True, an `ls -l'-like
1901 output is produced.
1902 """
1903 self._check()
1904
1905 for tarinfo in self:
1906 if verbose:
1907 print filemode(tarinfo.mode),
1908 print "%s/%s" % (tarinfo.uname or tarinfo.uid,
1909 tarinfo.gname or tarinfo.gid),
1910 if tarinfo.ischr() or tarinfo.isblk():
1911 print "%10s" % ("%d,%d" \
1912 % (tarinfo.devmajor, tarinfo.devminor)),
1913 else:
1914 print "%10d" % tarinfo.size,
1915 print "%d-%02d-%02d %02d:%02d:%02d" \
1916 % time.localtime(tarinfo.mtime)[:6],
1917
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001918 print tarinfo.name + ("/" if tarinfo.isdir() else ""),
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001919
1920 if verbose:
1921 if tarinfo.issym():
1922 print "->", tarinfo.linkname,
1923 if tarinfo.islnk():
1924 print "link to", tarinfo.linkname,
1925 print
1926
Lars Gustäbel21121e62009-09-12 10:28:15 +00001927 def add(self, name, arcname=None, recursive=True, exclude=None, filter=None):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001928 """Add the file `name' to the archive. `name' may be any type of file
1929 (directory, fifo, symbolic link, etc.). If given, `arcname'
1930 specifies an alternative name for the file in the archive.
1931 Directories are added recursively by default. This can be avoided by
Lars Gustäbel104490e2007-06-18 11:42:11 +00001932 setting `recursive' to False. `exclude' is a function that should
Lars Gustäbel21121e62009-09-12 10:28:15 +00001933 return True for each filename to be excluded. `filter' is a function
1934 that expects a TarInfo object argument and returns the changed
1935 TarInfo object, if it returns None the TarInfo object will be
1936 excluded from the archive.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001937 """
1938 self._check("aw")
1939
1940 if arcname is None:
1941 arcname = name
1942
Lars Gustäbel104490e2007-06-18 11:42:11 +00001943 # Exclude pathnames.
Lars Gustäbel21121e62009-09-12 10:28:15 +00001944 if exclude is not None:
1945 import warnings
1946 warnings.warn("use the filter argument instead",
1947 DeprecationWarning, 2)
1948 if exclude(name):
1949 self._dbg(2, "tarfile: Excluded %r" % name)
1950 return
Lars Gustäbel104490e2007-06-18 11:42:11 +00001951
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001952 # Skip if somebody tries to archive the archive...
Lars Gustäbela4b23812006-12-23 17:57:23 +00001953 if self.name is not None and os.path.abspath(name) == self.name:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001954 self._dbg(2, "tarfile: Skipped %r" % name)
1955 return
1956
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001957 self._dbg(1, name)
1958
1959 # Create a TarInfo object from the file.
1960 tarinfo = self.gettarinfo(name, arcname)
1961
1962 if tarinfo is None:
1963 self._dbg(1, "tarfile: Unsupported type %r" % name)
1964 return
1965
Lars Gustäbel21121e62009-09-12 10:28:15 +00001966 # Change or exclude the TarInfo object.
1967 if filter is not None:
1968 tarinfo = filter(tarinfo)
1969 if tarinfo is None:
1970 self._dbg(2, "tarfile: Excluded %r" % name)
1971 return
1972
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001973 # Append the tar header and data to the archive.
1974 if tarinfo.isreg():
Brett Cannon6cef0762007-05-25 20:17:15 +00001975 f = bltn_open(name, "rb")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001976 self.addfile(tarinfo, f)
1977 f.close()
1978
Martin v. Löwis5dbdc592005-08-27 10:07:56 +00001979 elif tarinfo.isdir():
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001980 self.addfile(tarinfo)
1981 if recursive:
1982 for f in os.listdir(name):
Lars Gustäbel21121e62009-09-12 10:28:15 +00001983 self.add(os.path.join(name, f), os.path.join(arcname, f),
1984 recursive, exclude, filter)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001985
Martin v. Löwis5dbdc592005-08-27 10:07:56 +00001986 else:
1987 self.addfile(tarinfo)
1988
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001989 def addfile(self, tarinfo, fileobj=None):
1990 """Add the TarInfo object `tarinfo' to the archive. If `fileobj' is
1991 given, tarinfo.size bytes are read from it and added to the archive.
1992 You can create TarInfo objects using gettarinfo().
1993 On Windows platforms, `fileobj' should always be opened with mode
1994 'rb' to avoid irritation about the file size.
1995 """
1996 self._check("aw")
1997
Georg Brandl3354f282006-10-29 09:16:12 +00001998 tarinfo = copy.copy(tarinfo)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001999
Lars Gustäbela0fcb932007-05-27 19:49:30 +00002000 buf = tarinfo.tobuf(self.format, self.encoding, self.errors)
Georg Brandl3354f282006-10-29 09:16:12 +00002001 self.fileobj.write(buf)
2002 self.offset += len(buf)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002003
2004 # If there's data to follow, append it.
2005 if fileobj is not None:
2006 copyfileobj(fileobj, self.fileobj, tarinfo.size)
2007 blocks, remainder = divmod(tarinfo.size, BLOCKSIZE)
2008 if remainder > 0:
2009 self.fileobj.write(NUL * (BLOCKSIZE - remainder))
2010 blocks += 1
2011 self.offset += blocks * BLOCKSIZE
2012
Martin v. Löwisf3c56112004-09-18 09:08:52 +00002013 self.members.append(tarinfo)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002014
Martin v. Löwis00a73e72005-03-04 19:40:34 +00002015 def extractall(self, path=".", members=None):
2016 """Extract all members from the archive to the current working
2017 directory and set owner, modification time and permissions on
2018 directories afterwards. `path' specifies a different directory
2019 to extract to. `members' is optional and must be a subset of the
2020 list returned by getmembers().
2021 """
2022 directories = []
2023
2024 if members is None:
2025 members = self
2026
2027 for tarinfo in members:
2028 if tarinfo.isdir():
Lars Gustäbel0192e432008-02-05 11:51:40 +00002029 # Extract directories with a safe mode.
Martin v. Löwis00a73e72005-03-04 19:40:34 +00002030 directories.append(tarinfo)
Lars Gustäbel0192e432008-02-05 11:51:40 +00002031 tarinfo = copy.copy(tarinfo)
2032 tarinfo.mode = 0700
2033 self.extract(tarinfo, path)
Martin v. Löwis00a73e72005-03-04 19:40:34 +00002034
2035 # Reverse sort directories.
Brett Cannon132fc542008-08-04 21:23:07 +00002036 directories.sort(key=operator.attrgetter('name'))
Martin v. Löwis00a73e72005-03-04 19:40:34 +00002037 directories.reverse()
2038
2039 # Set correct owner, mtime and filemode on directories.
2040 for tarinfo in directories:
Lars Gustäbel2ee1c762008-01-04 14:00:33 +00002041 dirpath = os.path.join(path, tarinfo.name)
Martin v. Löwis00a73e72005-03-04 19:40:34 +00002042 try:
Lars Gustäbel2ee1c762008-01-04 14:00:33 +00002043 self.chown(tarinfo, dirpath)
2044 self.utime(tarinfo, dirpath)
2045 self.chmod(tarinfo, dirpath)
Martin v. Löwis00a73e72005-03-04 19:40:34 +00002046 except ExtractError, e:
2047 if self.errorlevel > 1:
2048 raise
2049 else:
2050 self._dbg(1, "tarfile: %s" % e)
2051
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002052 def extract(self, member, path=""):
2053 """Extract a member from the archive to the current working directory,
2054 using its full name. Its file information is extracted as accurately
2055 as possible. `member' may be a filename or a TarInfo object. You can
2056 specify a different directory using `path'.
2057 """
2058 self._check("r")
2059
Lars Gustäbelc64e4022007-03-13 10:47:19 +00002060 if isinstance(member, basestring):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002061 tarinfo = self.getmember(member)
Lars Gustäbelc64e4022007-03-13 10:47:19 +00002062 else:
2063 tarinfo = member
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002064
Neal Norwitza4f651a2004-07-20 22:07:44 +00002065 # Prepare the link target for makelink().
2066 if tarinfo.islnk():
2067 tarinfo._link_target = os.path.join(path, tarinfo.linkname)
2068
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002069 try:
2070 self._extract_member(tarinfo, os.path.join(path, tarinfo.name))
2071 except EnvironmentError, e:
2072 if self.errorlevel > 0:
2073 raise
2074 else:
2075 if e.filename is None:
2076 self._dbg(1, "tarfile: %s" % e.strerror)
2077 else:
2078 self._dbg(1, "tarfile: %s %r" % (e.strerror, e.filename))
2079 except ExtractError, e:
2080 if self.errorlevel > 1:
2081 raise
2082 else:
2083 self._dbg(1, "tarfile: %s" % e)
2084
2085 def extractfile(self, member):
2086 """Extract a member from the archive as a file object. `member' may be
2087 a filename or a TarInfo object. If `member' is a regular file, a
2088 file-like object is returned. If `member' is a link, a file-like
2089 object is constructed from the link's target. If `member' is none of
2090 the above, None is returned.
2091 The file-like object is read-only and provides the following
2092 methods: read(), readline(), readlines(), seek() and tell()
2093 """
2094 self._check("r")
2095
Lars Gustäbelc64e4022007-03-13 10:47:19 +00002096 if isinstance(member, basestring):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002097 tarinfo = self.getmember(member)
Lars Gustäbelc64e4022007-03-13 10:47:19 +00002098 else:
2099 tarinfo = member
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002100
2101 if tarinfo.isreg():
2102 return self.fileobject(self, tarinfo)
2103
2104 elif tarinfo.type not in SUPPORTED_TYPES:
2105 # If a member's type is unknown, it is treated as a
2106 # regular file.
2107 return self.fileobject(self, tarinfo)
2108
2109 elif tarinfo.islnk() or tarinfo.issym():
2110 if isinstance(self.fileobj, _Stream):
2111 # A small but ugly workaround for the case that someone tries
2112 # to extract a (sym)link as a file-object from a non-seekable
2113 # stream of tar blocks.
Georg Brandle4751e32006-05-18 06:11:19 +00002114 raise StreamError("cannot extract (sym)link as file object")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002115 else:
Georg Brandl7eb4b7d2005-07-22 21:49:32 +00002116 # A (sym)link's file object is its target's file object.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002117 return self.extractfile(self._getmember(tarinfo.linkname,
2118 tarinfo))
2119 else:
2120 # If there's no data associated with the member (directory, chrdev,
2121 # blkdev, etc.), return None instead of a file object.
2122 return None
2123
2124 def _extract_member(self, tarinfo, targetpath):
2125 """Extract the TarInfo object tarinfo to a physical
2126 file called targetpath.
2127 """
2128 # Fetch the TarInfo object for the given name
2129 # and build the destination pathname, replacing
2130 # forward slashes to platform specific separators.
Lars Gustäbelf7cda522009-08-28 19:23:44 +00002131 targetpath = targetpath.rstrip("/")
2132 targetpath = targetpath.replace("/", os.sep)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002133
2134 # Create all upper directories.
2135 upperdirs = os.path.dirname(targetpath)
2136 if upperdirs and not os.path.exists(upperdirs):
Lars Gustäbel0192e432008-02-05 11:51:40 +00002137 # Create directories that are not part of the archive with
2138 # default permissions.
Lars Gustäbeld2e22902007-01-23 11:17:33 +00002139 os.makedirs(upperdirs)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002140
2141 if tarinfo.islnk() or tarinfo.issym():
2142 self._dbg(1, "%s -> %s" % (tarinfo.name, tarinfo.linkname))
2143 else:
2144 self._dbg(1, tarinfo.name)
2145
2146 if tarinfo.isreg():
2147 self.makefile(tarinfo, targetpath)
2148 elif tarinfo.isdir():
2149 self.makedir(tarinfo, targetpath)
2150 elif tarinfo.isfifo():
2151 self.makefifo(tarinfo, targetpath)
2152 elif tarinfo.ischr() or tarinfo.isblk():
2153 self.makedev(tarinfo, targetpath)
2154 elif tarinfo.islnk() or tarinfo.issym():
2155 self.makelink(tarinfo, targetpath)
2156 elif tarinfo.type not in SUPPORTED_TYPES:
2157 self.makeunknown(tarinfo, targetpath)
2158 else:
2159 self.makefile(tarinfo, targetpath)
2160
2161 self.chown(tarinfo, targetpath)
2162 if not tarinfo.issym():
2163 self.chmod(tarinfo, targetpath)
2164 self.utime(tarinfo, targetpath)
2165
2166 #--------------------------------------------------------------------------
2167 # Below are the different file methods. They are called via
2168 # _extract_member() when extract() is called. They can be replaced in a
2169 # subclass to implement other functionality.
2170
2171 def makedir(self, tarinfo, targetpath):
2172 """Make a directory called targetpath.
2173 """
2174 try:
Lars Gustäbel0192e432008-02-05 11:51:40 +00002175 # Use a safe mode for the directory, the real mode is set
2176 # later in _extract_member().
2177 os.mkdir(targetpath, 0700)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002178 except EnvironmentError, e:
2179 if e.errno != errno.EEXIST:
2180 raise
2181
2182 def makefile(self, tarinfo, targetpath):
2183 """Make a file called targetpath.
2184 """
2185 source = self.extractfile(tarinfo)
Brett Cannon6cef0762007-05-25 20:17:15 +00002186 target = bltn_open(targetpath, "wb")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002187 copyfileobj(source, target)
2188 source.close()
2189 target.close()
2190
2191 def makeunknown(self, tarinfo, targetpath):
2192 """Make a file from a TarInfo object with an unknown type
2193 at targetpath.
2194 """
2195 self.makefile(tarinfo, targetpath)
2196 self._dbg(1, "tarfile: Unknown file type %r, " \
2197 "extracted as regular file." % tarinfo.type)
2198
2199 def makefifo(self, tarinfo, targetpath):
2200 """Make a fifo called targetpath.
2201 """
2202 if hasattr(os, "mkfifo"):
2203 os.mkfifo(targetpath)
2204 else:
Georg Brandle4751e32006-05-18 06:11:19 +00002205 raise ExtractError("fifo not supported by system")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002206
2207 def makedev(self, tarinfo, targetpath):
2208 """Make a character or block device called targetpath.
2209 """
2210 if not hasattr(os, "mknod") or not hasattr(os, "makedev"):
Georg Brandle4751e32006-05-18 06:11:19 +00002211 raise ExtractError("special devices not supported by system")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002212
2213 mode = tarinfo.mode
2214 if tarinfo.isblk():
2215 mode |= stat.S_IFBLK
2216 else:
2217 mode |= stat.S_IFCHR
2218
2219 os.mknod(targetpath, mode,
2220 os.makedev(tarinfo.devmajor, tarinfo.devminor))
2221
2222 def makelink(self, tarinfo, targetpath):
2223 """Make a (symbolic) link called targetpath. If it cannot be created
2224 (platform limitation), we try to make a copy of the referenced file
2225 instead of a link.
2226 """
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002227 try:
2228 if tarinfo.issym():
Lars Gustäbelf7cda522009-08-28 19:23:44 +00002229 os.symlink(tarinfo.linkname, targetpath)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002230 else:
Neal Norwitza4f651a2004-07-20 22:07:44 +00002231 # See extract().
2232 os.link(tarinfo._link_target, targetpath)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002233 except AttributeError:
2234 if tarinfo.issym():
Lars Gustäbelf7cda522009-08-28 19:23:44 +00002235 linkpath = os.path.dirname(tarinfo.name) + "/" + \
2236 tarinfo.linkname
2237 else:
2238 linkpath = tarinfo.linkname
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002239
2240 try:
2241 self._extract_member(self.getmember(linkpath), targetpath)
2242 except (EnvironmentError, KeyError), e:
Lars Gustäbelf7cda522009-08-28 19:23:44 +00002243 linkpath = linkpath.replace("/", os.sep)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002244 try:
2245 shutil.copy2(linkpath, targetpath)
2246 except EnvironmentError, e:
Georg Brandle4751e32006-05-18 06:11:19 +00002247 raise IOError("link could not be created")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002248
2249 def chown(self, tarinfo, targetpath):
2250 """Set owner of targetpath according to tarinfo.
2251 """
2252 if pwd and hasattr(os, "geteuid") and os.geteuid() == 0:
2253 # We have to be root to do so.
2254 try:
2255 g = grp.getgrnam(tarinfo.gname)[2]
2256 except KeyError:
2257 try:
2258 g = grp.getgrgid(tarinfo.gid)[2]
2259 except KeyError:
2260 g = os.getgid()
2261 try:
2262 u = pwd.getpwnam(tarinfo.uname)[2]
2263 except KeyError:
2264 try:
2265 u = pwd.getpwuid(tarinfo.uid)[2]
2266 except KeyError:
2267 u = os.getuid()
2268 try:
2269 if tarinfo.issym() and hasattr(os, "lchown"):
2270 os.lchown(targetpath, u, g)
2271 else:
Andrew MacIntyre7970d202003-02-19 12:51:34 +00002272 if sys.platform != "os2emx":
2273 os.chown(targetpath, u, g)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002274 except EnvironmentError, e:
Georg Brandle4751e32006-05-18 06:11:19 +00002275 raise ExtractError("could not change owner")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002276
2277 def chmod(self, tarinfo, targetpath):
2278 """Set file permissions of targetpath according to tarinfo.
2279 """
Jack Jansen834eff62003-03-07 12:47:06 +00002280 if hasattr(os, 'chmod'):
2281 try:
2282 os.chmod(targetpath, tarinfo.mode)
2283 except EnvironmentError, e:
Georg Brandle4751e32006-05-18 06:11:19 +00002284 raise ExtractError("could not change mode")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002285
2286 def utime(self, tarinfo, targetpath):
2287 """Set modification time of targetpath according to tarinfo.
2288 """
Jack Jansen834eff62003-03-07 12:47:06 +00002289 if not hasattr(os, 'utime'):
Tim Petersf9347782003-03-07 15:36:41 +00002290 return
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002291 try:
2292 os.utime(targetpath, (tarinfo.mtime, tarinfo.mtime))
2293 except EnvironmentError, e:
Georg Brandle4751e32006-05-18 06:11:19 +00002294 raise ExtractError("could not change modification time")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002295
2296 #--------------------------------------------------------------------------
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002297 def next(self):
2298 """Return the next member of the archive as a TarInfo object, when
2299 TarFile is opened for reading. Return None if there is no more
2300 available.
2301 """
2302 self._check("ra")
2303 if self.firstmember is not None:
2304 m = self.firstmember
2305 self.firstmember = None
2306 return m
2307
2308 # Read the next block.
Andrew M. Kuchling864bba12004-07-10 22:02:11 +00002309 self.fileobj.seek(self.offset)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002310 while True:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002311 try:
Lars Gustäbelc64e4022007-03-13 10:47:19 +00002312 tarinfo = self.tarinfo.fromtarfile(self)
2313 if tarinfo is None:
2314 return
2315 self.members.append(tarinfo)
Georg Brandl38c6a222006-05-10 16:26:03 +00002316
Georg Brandlebbeed72006-12-19 22:06:46 +00002317 except HeaderError, e:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002318 if self.ignore_zeros:
Georg Brandlebbeed72006-12-19 22:06:46 +00002319 self._dbg(2, "0x%X: %s" % (self.offset, e))
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002320 self.offset += BLOCKSIZE
2321 continue
2322 else:
Andrew M. Kuchling864bba12004-07-10 22:02:11 +00002323 if self.offset == 0:
Georg Brandlebbeed72006-12-19 22:06:46 +00002324 raise ReadError(str(e))
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002325 return None
2326 break
2327
Georg Brandl38c6a222006-05-10 16:26:03 +00002328 return tarinfo
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002329
2330 #--------------------------------------------------------------------------
2331 # Little helper methods:
2332
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002333 def _getmember(self, name, tarinfo=None):
2334 """Find an archive member by name from bottom to top.
2335 If tarinfo is given, it is used as the starting point.
2336 """
Martin v. Löwisf3c56112004-09-18 09:08:52 +00002337 # Ensure that all members have been loaded.
2338 members = self.getmembers()
2339
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002340 if tarinfo is None:
Martin v. Löwisf3c56112004-09-18 09:08:52 +00002341 end = len(members)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002342 else:
Martin v. Löwisf3c56112004-09-18 09:08:52 +00002343 end = members.index(tarinfo)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002344
2345 for i in xrange(end - 1, -1, -1):
Martin v. Löwisf3c56112004-09-18 09:08:52 +00002346 if name == members[i].name:
2347 return members[i]
Andrew M. Kuchling864bba12004-07-10 22:02:11 +00002348
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002349 def _load(self):
2350 """Read through the entire archive file and look for readable
2351 members.
2352 """
2353 while True:
2354 tarinfo = self.next()
2355 if tarinfo is None:
2356 break
2357 self._loaded = True
2358
2359 def _check(self, mode=None):
2360 """Check if TarFile is still open, and if the operation's mode
2361 corresponds to TarFile's mode.
2362 """
2363 if self.closed:
Georg Brandle4751e32006-05-18 06:11:19 +00002364 raise IOError("%s is closed" % self.__class__.__name__)
Lars Gustäbelc64e4022007-03-13 10:47:19 +00002365 if mode is not None and self.mode not in mode:
2366 raise IOError("bad operation for mode %r" % self.mode)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002367
2368 def __iter__(self):
2369 """Provide an iterator object.
2370 """
2371 if self._loaded:
2372 return iter(self.members)
2373 else:
2374 return TarIter(self)
2375
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002376 def _dbg(self, level, msg):
2377 """Write debugging output to sys.stderr.
2378 """
2379 if level <= self.debug:
2380 print >> sys.stderr, msg
2381# class TarFile
2382
2383class TarIter:
2384 """Iterator Class.
2385
2386 for tarinfo in TarFile(...):
2387 suite...
2388 """
2389
2390 def __init__(self, tarfile):
2391 """Construct a TarIter object.
2392 """
2393 self.tarfile = tarfile
Martin v. Löwis637431b2005-03-03 23:12:42 +00002394 self.index = 0
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002395 def __iter__(self):
2396 """Return iterator object.
2397 """
2398 return self
2399 def next(self):
2400 """Return the next item using TarFile's next() method.
2401 When all members have been read, set TarFile as _loaded.
2402 """
Martin v. Löwis637431b2005-03-03 23:12:42 +00002403 # Fix for SF #1100429: Under rare circumstances it can
2404 # happen that getmembers() is called during iteration,
2405 # which will cause TarIter to stop prematurely.
2406 if not self.tarfile._loaded:
2407 tarinfo = self.tarfile.next()
2408 if not tarinfo:
2409 self.tarfile._loaded = True
2410 raise StopIteration
2411 else:
2412 try:
2413 tarinfo = self.tarfile.members[self.index]
2414 except IndexError:
2415 raise StopIteration
2416 self.index += 1
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002417 return tarinfo
2418
2419# Helper classes for sparse file support
2420class _section:
2421 """Base class for _data and _hole.
2422 """
2423 def __init__(self, offset, size):
2424 self.offset = offset
2425 self.size = size
2426 def __contains__(self, offset):
2427 return self.offset <= offset < self.offset + self.size
2428
2429class _data(_section):
2430 """Represent a data section in a sparse file.
2431 """
2432 def __init__(self, offset, size, realpos):
2433 _section.__init__(self, offset, size)
2434 self.realpos = realpos
2435
2436class _hole(_section):
2437 """Represent a hole section in a sparse file.
2438 """
2439 pass
2440
2441class _ringbuffer(list):
2442 """Ringbuffer class which increases performance
2443 over a regular list.
2444 """
2445 def __init__(self):
2446 self.idx = 0
2447 def find(self, offset):
2448 idx = self.idx
2449 while True:
2450 item = self[idx]
2451 if offset in item:
2452 break
2453 idx += 1
2454 if idx == len(self):
2455 idx = 0
2456 if idx == self.idx:
2457 # End of File
2458 return None
2459 self.idx = idx
2460 return item
2461
2462#---------------------------------------------
2463# zipfile compatible TarFile class
2464#---------------------------------------------
2465TAR_PLAIN = 0 # zipfile.ZIP_STORED
2466TAR_GZIPPED = 8 # zipfile.ZIP_DEFLATED
2467class TarFileCompat:
2468 """TarFile class compatible with standard module zipfile's
2469 ZipFile class.
2470 """
2471 def __init__(self, file, mode="r", compression=TAR_PLAIN):
Lars Gustäbel727bd0b2008-08-02 11:26:39 +00002472 from warnings import warnpy3k
2473 warnpy3k("the TarFileCompat class has been removed in Python 3.0",
2474 stacklevel=2)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002475 if compression == TAR_PLAIN:
2476 self.tarfile = TarFile.taropen(file, mode)
2477 elif compression == TAR_GZIPPED:
2478 self.tarfile = TarFile.gzopen(file, mode)
2479 else:
Georg Brandle4751e32006-05-18 06:11:19 +00002480 raise ValueError("unknown compression constant")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002481 if mode[0:1] == "r":
2482 members = self.tarfile.getmembers()
Raymond Hettingera1d09e22005-09-11 16:34:05 +00002483 for m in members:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002484 m.filename = m.name
2485 m.file_size = m.size
2486 m.date_time = time.gmtime(m.mtime)[:6]
2487 def namelist(self):
2488 return map(lambda m: m.name, self.infolist())
2489 def infolist(self):
2490 return filter(lambda m: m.type in REGULAR_TYPES,
2491 self.tarfile.getmembers())
2492 def printdir(self):
2493 self.tarfile.list()
2494 def testzip(self):
2495 return
2496 def getinfo(self, name):
2497 return self.tarfile.getmember(name)
2498 def read(self, name):
2499 return self.tarfile.extractfile(self.tarfile.getmember(name)).read()
2500 def write(self, filename, arcname=None, compress_type=None):
2501 self.tarfile.add(filename, arcname)
2502 def writestr(self, zinfo, bytes):
Raymond Hettingera6172712004-12-31 19:15:26 +00002503 try:
2504 from cStringIO import StringIO
2505 except ImportError:
2506 from StringIO import StringIO
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002507 import calendar
Lars Gustäbel727bd0b2008-08-02 11:26:39 +00002508 tinfo = TarInfo(zinfo.filename)
2509 tinfo.size = len(bytes)
2510 tinfo.mtime = calendar.timegm(zinfo.date_time)
2511 self.tarfile.addfile(tinfo, StringIO(bytes))
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002512 def close(self):
2513 self.tarfile.close()
2514#class TarFileCompat
2515
2516#--------------------
2517# exported functions
2518#--------------------
2519def is_tarfile(name):
2520 """Return True if name points to a tar archive that we
2521 are able to handle, else return False.
2522 """
2523 try:
2524 t = open(name)
2525 t.close()
2526 return True
2527 except TarError:
2528 return False
2529
Brett Cannon6cef0762007-05-25 20:17:15 +00002530bltn_open = open
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002531open = TarFile.open