blob: 54a26062ed902468c4f70359414429aa426a0a97 [file] [log] [blame]
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001#!/usr/bin/env python
2# -*- coding: iso-8859-1 -*-
3#-------------------------------------------------------------------
4# tarfile.py
5#-------------------------------------------------------------------
6# Copyright (C) 2002 Lars Gustäbel <lars@gustaebel.de>
7# All rights reserved.
8#
9# Permission is hereby granted, free of charge, to any person
10# obtaining a copy of this software and associated documentation
11# files (the "Software"), to deal in the Software without
12# restriction, including without limitation the rights to use,
13# copy, modify, merge, publish, distribute, sublicense, and/or sell
14# copies of the Software, and to permit persons to whom the
15# Software is furnished to do so, subject to the following
16# conditions:
17#
18# The above copyright notice and this permission notice shall be
19# included in all copies or substantial portions of the Software.
20#
21# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
22# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
23# OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
24# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
25# HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
26# WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
27# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
28# OTHER DEALINGS IN THE SOFTWARE.
29#
30"""Read from and write to tar format archives.
31"""
32
33__version__ = "$Revision$"
34# $Source$
35
Lars Gustäbelc64e4022007-03-13 10:47:19 +000036version = "0.9.0"
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +000037__author__ = "Lars Gustäbel (lars@gustaebel.de)"
38__date__ = "$Date$"
39__cvsid__ = "$Id$"
40__credits__ = "Gustavo Niemeyer, Niels Gustäbel, Richard Townsend."
41
42#---------
43# Imports
44#---------
45import sys
46import os
47import shutil
48import stat
49import errno
50import time
51import struct
Georg Brandl3354f282006-10-29 09:16:12 +000052import copy
Lars Gustäbelc64e4022007-03-13 10:47:19 +000053import re
Brett Cannon132fc542008-08-04 21:23:07 +000054import operator
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +000055
Jack Jansencfc49022003-03-07 13:37:32 +000056if sys.platform == 'mac':
57 # This module needs work for MacOS9, especially in the area of pathname
58 # handling. In many places it is assumed a simple substitution of / by the
59 # local os.path.sep is good enough to convert pathnames, but this does not
60 # work with the mac rooted:path:name versus :nonrooted:path:name syntax
61 raise ImportError, "tarfile does not work for platform==mac"
62
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +000063try:
64 import grp, pwd
65except ImportError:
66 grp = pwd = None
67
68# from tarfile import *
69__all__ = ["TarFile", "TarInfo", "is_tarfile", "TarError"]
70
71#---------------------------------------------------------
72# tar constants
73#---------------------------------------------------------
Lars Gustäbelc64e4022007-03-13 10:47:19 +000074NUL = "\0" # the null character
75BLOCKSIZE = 512 # length of processing blocks
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +000076RECORDSIZE = BLOCKSIZE * 20 # length of records
Lars Gustäbelc64e4022007-03-13 10:47:19 +000077GNU_MAGIC = "ustar \0" # magic gnu tar string
78POSIX_MAGIC = "ustar\x0000" # magic posix tar string
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +000079
Lars Gustäbelc64e4022007-03-13 10:47:19 +000080LENGTH_NAME = 100 # maximum length of a filename
81LENGTH_LINK = 100 # maximum length of a linkname
82LENGTH_PREFIX = 155 # maximum length of the prefix field
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +000083
Lars Gustäbelc64e4022007-03-13 10:47:19 +000084REGTYPE = "0" # regular file
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +000085AREGTYPE = "\0" # regular file
Lars Gustäbelc64e4022007-03-13 10:47:19 +000086LNKTYPE = "1" # link (inside tarfile)
87SYMTYPE = "2" # symbolic link
88CHRTYPE = "3" # character special device
89BLKTYPE = "4" # block special device
90DIRTYPE = "5" # directory
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +000091FIFOTYPE = "6" # fifo special device
92CONTTYPE = "7" # contiguous file
93
Lars Gustäbelc64e4022007-03-13 10:47:19 +000094GNUTYPE_LONGNAME = "L" # GNU tar longname
95GNUTYPE_LONGLINK = "K" # GNU tar longlink
96GNUTYPE_SPARSE = "S" # GNU tar sparse file
97
98XHDTYPE = "x" # POSIX.1-2001 extended header
99XGLTYPE = "g" # POSIX.1-2001 global header
100SOLARIS_XHDTYPE = "X" # Solaris extended header
101
102USTAR_FORMAT = 0 # POSIX.1-1988 (ustar) format
103GNU_FORMAT = 1 # GNU tar format
104PAX_FORMAT = 2 # POSIX.1-2001 (pax) format
105DEFAULT_FORMAT = GNU_FORMAT
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000106
107#---------------------------------------------------------
108# tarfile constants
109#---------------------------------------------------------
Lars Gustäbelc64e4022007-03-13 10:47:19 +0000110# File types that tarfile supports:
111SUPPORTED_TYPES = (REGTYPE, AREGTYPE, LNKTYPE,
112 SYMTYPE, DIRTYPE, FIFOTYPE,
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000113 CONTTYPE, CHRTYPE, BLKTYPE,
114 GNUTYPE_LONGNAME, GNUTYPE_LONGLINK,
115 GNUTYPE_SPARSE)
116
Lars Gustäbelc64e4022007-03-13 10:47:19 +0000117# File types that will be treated as a regular file.
118REGULAR_TYPES = (REGTYPE, AREGTYPE,
119 CONTTYPE, GNUTYPE_SPARSE)
120
121# File types that are part of the GNU tar format.
122GNU_TYPES = (GNUTYPE_LONGNAME, GNUTYPE_LONGLINK,
123 GNUTYPE_SPARSE)
124
125# Fields from a pax header that override a TarInfo attribute.
126PAX_FIELDS = ("path", "linkpath", "size", "mtime",
127 "uid", "gid", "uname", "gname")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000128
Lars Gustäbela0fcb932007-05-27 19:49:30 +0000129# Fields in a pax header that are numbers, all other fields
130# are treated as strings.
131PAX_NUMBER_FIELDS = {
132 "atime": float,
133 "ctime": float,
134 "mtime": float,
135 "uid": int,
136 "gid": int,
137 "size": int
138}
139
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000140#---------------------------------------------------------
141# Bits used in the mode field, values in octal.
142#---------------------------------------------------------
143S_IFLNK = 0120000 # symbolic link
144S_IFREG = 0100000 # regular file
145S_IFBLK = 0060000 # block device
146S_IFDIR = 0040000 # directory
147S_IFCHR = 0020000 # character device
148S_IFIFO = 0010000 # fifo
149
150TSUID = 04000 # set UID on execution
151TSGID = 02000 # set GID on execution
152TSVTX = 01000 # reserved
153
154TUREAD = 0400 # read by owner
155TUWRITE = 0200 # write by owner
156TUEXEC = 0100 # execute/search by owner
157TGREAD = 0040 # read by group
158TGWRITE = 0020 # write by group
159TGEXEC = 0010 # execute/search by group
160TOREAD = 0004 # read by other
161TOWRITE = 0002 # write by other
162TOEXEC = 0001 # execute/search by other
163
164#---------------------------------------------------------
Lars Gustäbelc64e4022007-03-13 10:47:19 +0000165# initialization
166#---------------------------------------------------------
167ENCODING = sys.getfilesystemencoding()
168if ENCODING is None:
Lars Gustäbela0fcb932007-05-27 19:49:30 +0000169 ENCODING = sys.getdefaultencoding()
Lars Gustäbelc64e4022007-03-13 10:47:19 +0000170
171#---------------------------------------------------------
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000172# Some useful functions
173#---------------------------------------------------------
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000174
Georg Brandl38c6a222006-05-10 16:26:03 +0000175def stn(s, length):
176 """Convert a python string to a null-terminated string buffer.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000177 """
Georg Brandla32e0a02006-10-24 16:54:16 +0000178 return s[:length] + (length - len(s)) * NUL
Georg Brandl38c6a222006-05-10 16:26:03 +0000179
Lars Gustäbelc64e4022007-03-13 10:47:19 +0000180def nts(s):
181 """Convert a null-terminated string field to a python string.
182 """
183 # Use the string up to the first null char.
184 p = s.find("\0")
185 if p == -1:
186 return s
187 return s[:p]
188
Georg Brandl38c6a222006-05-10 16:26:03 +0000189def nti(s):
190 """Convert a number field to a python number.
191 """
192 # There are two possible encodings for a number field, see
193 # itn() below.
194 if s[0] != chr(0200):
Georg Brandlded1c4d2006-12-20 11:55:16 +0000195 try:
Lars Gustäbelc64e4022007-03-13 10:47:19 +0000196 n = int(nts(s) or "0", 8)
Georg Brandlded1c4d2006-12-20 11:55:16 +0000197 except ValueError:
198 raise HeaderError("invalid header")
Georg Brandl38c6a222006-05-10 16:26:03 +0000199 else:
200 n = 0L
201 for i in xrange(len(s) - 1):
202 n <<= 8
203 n += ord(s[i + 1])
204 return n
205
Lars Gustäbelc64e4022007-03-13 10:47:19 +0000206def itn(n, digits=8, format=DEFAULT_FORMAT):
Georg Brandl38c6a222006-05-10 16:26:03 +0000207 """Convert a python number to a number field.
208 """
209 # POSIX 1003.1-1988 requires numbers to be encoded as a string of
210 # octal digits followed by a null-byte, this allows values up to
211 # (8**(digits-1))-1. GNU tar allows storing numbers greater than
212 # that if necessary. A leading 0200 byte indicates this particular
213 # encoding, the following digits-1 bytes are a big-endian
214 # representation. This allows values up to (256**(digits-1))-1.
215 if 0 <= n < 8 ** (digits - 1):
216 s = "%0*o" % (digits - 1, n) + NUL
217 else:
Lars Gustäbelc64e4022007-03-13 10:47:19 +0000218 if format != GNU_FORMAT or n >= 256 ** (digits - 1):
Georg Brandle4751e32006-05-18 06:11:19 +0000219 raise ValueError("overflow in number field")
Georg Brandl38c6a222006-05-10 16:26:03 +0000220
221 if n < 0:
222 # XXX We mimic GNU tar's behaviour with negative numbers,
223 # this could raise OverflowError.
224 n = struct.unpack("L", struct.pack("l", n))[0]
225
226 s = ""
227 for i in xrange(digits - 1):
228 s = chr(n & 0377) + s
229 n >>= 8
230 s = chr(0200) + s
231 return s
232
Lars Gustäbela0fcb932007-05-27 19:49:30 +0000233def uts(s, encoding, errors):
234 """Convert a unicode object to a string.
235 """
236 if errors == "utf-8":
237 # An extra error handler similar to the -o invalid=UTF-8 option
238 # in POSIX.1-2001. Replace untranslatable characters with their
239 # UTF-8 representation.
240 try:
241 return s.encode(encoding, "strict")
242 except UnicodeEncodeError:
243 x = []
244 for c in s:
245 try:
246 x.append(c.encode(encoding, "strict"))
247 except UnicodeEncodeError:
248 x.append(c.encode("utf8"))
249 return "".join(x)
250 else:
251 return s.encode(encoding, errors)
252
Georg Brandl38c6a222006-05-10 16:26:03 +0000253def calc_chksums(buf):
254 """Calculate the checksum for a member's header by summing up all
255 characters except for the chksum field which is treated as if
256 it was filled with spaces. According to the GNU tar sources,
257 some tars (Sun and NeXT) calculate chksum with signed char,
258 which will be different if there are chars in the buffer with
259 the high bit set. So we calculate two checksums, unsigned and
260 signed.
261 """
262 unsigned_chksum = 256 + sum(struct.unpack("148B", buf[:148]) + struct.unpack("356B", buf[156:512]))
263 signed_chksum = 256 + sum(struct.unpack("148b", buf[:148]) + struct.unpack("356b", buf[156:512]))
264 return unsigned_chksum, signed_chksum
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000265
266def copyfileobj(src, dst, length=None):
267 """Copy length bytes from fileobj src to fileobj dst.
268 If length is None, copy the entire content.
269 """
270 if length == 0:
271 return
272 if length is None:
273 shutil.copyfileobj(src, dst)
274 return
275
276 BUFSIZE = 16 * 1024
277 blocks, remainder = divmod(length, BUFSIZE)
278 for b in xrange(blocks):
279 buf = src.read(BUFSIZE)
280 if len(buf) < BUFSIZE:
Georg Brandle4751e32006-05-18 06:11:19 +0000281 raise IOError("end of file reached")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000282 dst.write(buf)
283
284 if remainder != 0:
285 buf = src.read(remainder)
286 if len(buf) < remainder:
Georg Brandle4751e32006-05-18 06:11:19 +0000287 raise IOError("end of file reached")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000288 dst.write(buf)
289 return
290
291filemode_table = (
Andrew M. Kuchling8bc462f2004-10-20 11:48:42 +0000292 ((S_IFLNK, "l"),
293 (S_IFREG, "-"),
294 (S_IFBLK, "b"),
295 (S_IFDIR, "d"),
296 (S_IFCHR, "c"),
297 (S_IFIFO, "p")),
298
299 ((TUREAD, "r"),),
300 ((TUWRITE, "w"),),
301 ((TUEXEC|TSUID, "s"),
302 (TSUID, "S"),
303 (TUEXEC, "x")),
304
305 ((TGREAD, "r"),),
306 ((TGWRITE, "w"),),
307 ((TGEXEC|TSGID, "s"),
308 (TSGID, "S"),
309 (TGEXEC, "x")),
310
311 ((TOREAD, "r"),),
312 ((TOWRITE, "w"),),
313 ((TOEXEC|TSVTX, "t"),
314 (TSVTX, "T"),
315 (TOEXEC, "x"))
316)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000317
318def filemode(mode):
319 """Convert a file's mode to a string of the form
320 -rwxrwxrwx.
321 Used by TarFile.list()
322 """
Andrew M. Kuchling8bc462f2004-10-20 11:48:42 +0000323 perm = []
324 for table in filemode_table:
325 for bit, char in table:
326 if mode & bit == bit:
327 perm.append(char)
328 break
329 else:
330 perm.append("-")
331 return "".join(perm)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000332
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000333class TarError(Exception):
334 """Base exception."""
335 pass
336class ExtractError(TarError):
337 """General exception for extract errors."""
338 pass
339class ReadError(TarError):
340 """Exception for unreadble tar archives."""
341 pass
342class CompressionError(TarError):
343 """Exception for unavailable compression methods."""
344 pass
345class StreamError(TarError):
346 """Exception for unsupported operations on stream-like TarFiles."""
347 pass
Georg Brandlebbeed72006-12-19 22:06:46 +0000348class HeaderError(TarError):
349 """Exception for invalid headers."""
350 pass
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000351
352#---------------------------
353# internal stream interface
354#---------------------------
355class _LowLevelFile:
356 """Low-level file object. Supports reading and writing.
357 It is used instead of a regular file object for streaming
358 access.
359 """
360
361 def __init__(self, name, mode):
362 mode = {
363 "r": os.O_RDONLY,
364 "w": os.O_WRONLY | os.O_CREAT | os.O_TRUNC,
365 }[mode]
366 if hasattr(os, "O_BINARY"):
367 mode |= os.O_BINARY
368 self.fd = os.open(name, mode)
369
370 def close(self):
371 os.close(self.fd)
372
373 def read(self, size):
374 return os.read(self.fd, size)
375
376 def write(self, s):
377 os.write(self.fd, s)
378
379class _Stream:
380 """Class that serves as an adapter between TarFile and
381 a stream-like object. The stream-like object only
382 needs to have a read() or write() method and is accessed
383 blockwise. Use of gzip or bzip2 compression is possible.
384 A stream-like object could be for example: sys.stdin,
385 sys.stdout, a socket, a tape device etc.
386
387 _Stream is intended to be used only internally.
388 """
389
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000390 def __init__(self, name, mode, comptype, fileobj, bufsize):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000391 """Construct a _Stream object.
392 """
393 self._extfileobj = True
394 if fileobj is None:
395 fileobj = _LowLevelFile(name, mode)
396 self._extfileobj = False
397
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000398 if comptype == '*':
399 # Enable transparent compression detection for the
400 # stream interface
401 fileobj = _StreamProxy(fileobj)
402 comptype = fileobj.getcomptype()
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000403
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000404 self.name = name or ""
405 self.mode = mode
406 self.comptype = comptype
407 self.fileobj = fileobj
408 self.bufsize = bufsize
409 self.buf = ""
410 self.pos = 0L
411 self.closed = False
412
413 if comptype == "gz":
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000414 try:
415 import zlib
416 except ImportError:
Georg Brandle4751e32006-05-18 06:11:19 +0000417 raise CompressionError("zlib module is not available")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000418 self.zlib = zlib
Gregory P. Smith88440962008-03-25 06:12:45 +0000419 self.crc = zlib.crc32("") & 0xffffffffL
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000420 if mode == "r":
421 self._init_read_gz()
422 else:
423 self._init_write_gz()
424
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000425 if comptype == "bz2":
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000426 try:
427 import bz2
428 except ImportError:
Georg Brandle4751e32006-05-18 06:11:19 +0000429 raise CompressionError("bz2 module is not available")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000430 if mode == "r":
431 self.dbuf = ""
432 self.cmp = bz2.BZ2Decompressor()
433 else:
434 self.cmp = bz2.BZ2Compressor()
435
436 def __del__(self):
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000437 if hasattr(self, "closed") and not self.closed:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000438 self.close()
439
440 def _init_write_gz(self):
441 """Initialize for writing with gzip compression.
442 """
443 self.cmp = self.zlib.compressobj(9, self.zlib.DEFLATED,
444 -self.zlib.MAX_WBITS,
445 self.zlib.DEF_MEM_LEVEL,
446 0)
447 timestamp = struct.pack("<L", long(time.time()))
448 self.__write("\037\213\010\010%s\002\377" % timestamp)
449 if self.name.endswith(".gz"):
450 self.name = self.name[:-3]
451 self.__write(self.name + NUL)
452
453 def write(self, s):
454 """Write string s to the stream.
455 """
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000456 if self.comptype == "gz":
Gregory P. Smith88440962008-03-25 06:12:45 +0000457 self.crc = self.zlib.crc32(s, self.crc) & 0xffffffffL
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000458 self.pos += len(s)
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000459 if self.comptype != "tar":
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000460 s = self.cmp.compress(s)
461 self.__write(s)
462
463 def __write(self, s):
464 """Write string s to the stream if a whole new block
465 is ready to be written.
466 """
467 self.buf += s
468 while len(self.buf) > self.bufsize:
469 self.fileobj.write(self.buf[:self.bufsize])
470 self.buf = self.buf[self.bufsize:]
471
472 def close(self):
473 """Close the _Stream object. No operation should be
474 done on it afterwards.
475 """
476 if self.closed:
477 return
478
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000479 if self.mode == "w" and self.comptype != "tar":
Martin v. Löwisc234a522004-08-22 21:28:33 +0000480 self.buf += self.cmp.flush()
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000481
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000482 if self.mode == "w" and self.buf:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000483 self.fileobj.write(self.buf)
484 self.buf = ""
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000485 if self.comptype == "gz":
Tim Petersa05f6e22006-08-02 05:20:08 +0000486 # The native zlib crc is an unsigned 32-bit integer, but
487 # the Python wrapper implicitly casts that to a signed C
488 # long. So, on a 32-bit box self.crc may "look negative",
489 # while the same crc on a 64-bit box may "look positive".
490 # To avoid irksome warnings from the `struct` module, force
491 # it to look positive on all boxes.
492 self.fileobj.write(struct.pack("<L", self.crc & 0xffffffffL))
Andrew M. Kuchling10a44492003-10-24 17:38:34 +0000493 self.fileobj.write(struct.pack("<L", self.pos & 0xffffFFFFL))
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000494
495 if not self._extfileobj:
496 self.fileobj.close()
497
498 self.closed = True
499
500 def _init_read_gz(self):
501 """Initialize for reading a gzip compressed fileobj.
502 """
503 self.cmp = self.zlib.decompressobj(-self.zlib.MAX_WBITS)
504 self.dbuf = ""
505
506 # taken from gzip.GzipFile with some alterations
507 if self.__read(2) != "\037\213":
Georg Brandle4751e32006-05-18 06:11:19 +0000508 raise ReadError("not a gzip file")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000509 if self.__read(1) != "\010":
Georg Brandle4751e32006-05-18 06:11:19 +0000510 raise CompressionError("unsupported compression method")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000511
512 flag = ord(self.__read(1))
513 self.__read(6)
514
515 if flag & 4:
516 xlen = ord(self.__read(1)) + 256 * ord(self.__read(1))
517 self.read(xlen)
518 if flag & 8:
519 while True:
520 s = self.__read(1)
521 if not s or s == NUL:
522 break
523 if flag & 16:
524 while True:
525 s = self.__read(1)
526 if not s or s == NUL:
527 break
528 if flag & 2:
529 self.__read(2)
530
531 def tell(self):
532 """Return the stream's file pointer position.
533 """
534 return self.pos
535
536 def seek(self, pos=0):
537 """Set the stream's file pointer to pos. Negative seeking
538 is forbidden.
539 """
540 if pos - self.pos >= 0:
541 blocks, remainder = divmod(pos - self.pos, self.bufsize)
542 for i in xrange(blocks):
543 self.read(self.bufsize)
544 self.read(remainder)
545 else:
Georg Brandle4751e32006-05-18 06:11:19 +0000546 raise StreamError("seeking backwards is not allowed")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000547 return self.pos
548
549 def read(self, size=None):
550 """Return the next size number of bytes from the stream.
551 If size is not defined, return all bytes of the stream
552 up to EOF.
553 """
554 if size is None:
555 t = []
556 while True:
557 buf = self._read(self.bufsize)
558 if not buf:
559 break
560 t.append(buf)
561 buf = "".join(t)
562 else:
563 buf = self._read(size)
564 self.pos += len(buf)
565 return buf
566
567 def _read(self, size):
568 """Return size bytes from the stream.
569 """
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000570 if self.comptype == "tar":
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000571 return self.__read(size)
572
573 c = len(self.dbuf)
574 t = [self.dbuf]
575 while c < size:
576 buf = self.__read(self.bufsize)
577 if not buf:
578 break
Lars Gustäbelc64e4022007-03-13 10:47:19 +0000579 try:
580 buf = self.cmp.decompress(buf)
581 except IOError:
582 raise ReadError("invalid compressed data")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000583 t.append(buf)
584 c += len(buf)
585 t = "".join(t)
586 self.dbuf = t[size:]
587 return t[:size]
588
589 def __read(self, size):
590 """Return size bytes from stream. If internal buffer is empty,
591 read another block from the stream.
592 """
593 c = len(self.buf)
594 t = [self.buf]
595 while c < size:
596 buf = self.fileobj.read(self.bufsize)
597 if not buf:
598 break
599 t.append(buf)
600 c += len(buf)
601 t = "".join(t)
602 self.buf = t[size:]
603 return t[:size]
604# class _Stream
605
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000606class _StreamProxy(object):
607 """Small proxy class that enables transparent compression
608 detection for the Stream interface (mode 'r|*').
609 """
610
611 def __init__(self, fileobj):
612 self.fileobj = fileobj
613 self.buf = self.fileobj.read(BLOCKSIZE)
614
615 def read(self, size):
616 self.read = self.fileobj.read
617 return self.buf
618
619 def getcomptype(self):
620 if self.buf.startswith("\037\213\010"):
621 return "gz"
622 if self.buf.startswith("BZh91"):
623 return "bz2"
624 return "tar"
625
626 def close(self):
627 self.fileobj.close()
628# class StreamProxy
629
Georg Brandl49c8f4c2006-05-15 19:30:35 +0000630class _BZ2Proxy(object):
631 """Small proxy class that enables external file object
632 support for "r:bz2" and "w:bz2" modes. This is actually
633 a workaround for a limitation in bz2 module's BZ2File
634 class which (unlike gzip.GzipFile) has no support for
635 a file object argument.
636 """
637
638 blocksize = 16 * 1024
639
640 def __init__(self, fileobj, mode):
641 self.fileobj = fileobj
642 self.mode = mode
Lars Gustäbelc64e4022007-03-13 10:47:19 +0000643 self.name = getattr(self.fileobj, "name", None)
Georg Brandl49c8f4c2006-05-15 19:30:35 +0000644 self.init()
645
646 def init(self):
647 import bz2
648 self.pos = 0
649 if self.mode == "r":
650 self.bz2obj = bz2.BZ2Decompressor()
651 self.fileobj.seek(0)
652 self.buf = ""
653 else:
654 self.bz2obj = bz2.BZ2Compressor()
655
656 def read(self, size):
657 b = [self.buf]
658 x = len(self.buf)
659 while x < size:
Lars Gustäbel2020a592009-03-22 20:09:33 +0000660 raw = self.fileobj.read(self.blocksize)
661 if not raw:
Georg Brandl49c8f4c2006-05-15 19:30:35 +0000662 break
Lars Gustäbel2020a592009-03-22 20:09:33 +0000663 data = self.bz2obj.decompress(raw)
664 b.append(data)
Georg Brandl49c8f4c2006-05-15 19:30:35 +0000665 x += len(data)
666 self.buf = "".join(b)
667
668 buf = self.buf[:size]
669 self.buf = self.buf[size:]
670 self.pos += len(buf)
671 return buf
672
673 def seek(self, pos):
674 if pos < self.pos:
675 self.init()
676 self.read(pos - self.pos)
677
678 def tell(self):
679 return self.pos
680
681 def write(self, data):
682 self.pos += len(data)
683 raw = self.bz2obj.compress(data)
684 self.fileobj.write(raw)
685
686 def close(self):
687 if self.mode == "w":
688 raw = self.bz2obj.flush()
689 self.fileobj.write(raw)
Georg Brandl49c8f4c2006-05-15 19:30:35 +0000690# class _BZ2Proxy
691
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000692#------------------------
693# Extraction file object
694#------------------------
Lars Gustäbel6baa5022006-12-23 16:40:13 +0000695class _FileInFile(object):
696 """A thin wrapper around an existing file object that
697 provides a part of its data as an individual file
698 object.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000699 """
700
Lars Gustäbel6baa5022006-12-23 16:40:13 +0000701 def __init__(self, fileobj, offset, size, sparse=None):
702 self.fileobj = fileobj
703 self.offset = offset
704 self.size = size
705 self.sparse = sparse
706 self.position = 0
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000707
Lars Gustäbel6baa5022006-12-23 16:40:13 +0000708 def tell(self):
709 """Return the current file position.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000710 """
Lars Gustäbel6baa5022006-12-23 16:40:13 +0000711 return self.position
712
713 def seek(self, position):
714 """Seek to a position in the file.
715 """
716 self.position = position
717
718 def read(self, size=None):
719 """Read data from the file.
720 """
721 if size is None:
722 size = self.size - self.position
723 else:
724 size = min(size, self.size - self.position)
725
726 if self.sparse is None:
727 return self.readnormal(size)
728 else:
729 return self.readsparse(size)
730
731 def readnormal(self, size):
732 """Read operation for regular files.
733 """
734 self.fileobj.seek(self.offset + self.position)
735 self.position += size
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000736 return self.fileobj.read(size)
737
Lars Gustäbel6baa5022006-12-23 16:40:13 +0000738 def readsparse(self, size):
739 """Read operation for sparse files.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000740 """
Lars Gustäbel6baa5022006-12-23 16:40:13 +0000741 data = []
742 while size > 0:
743 buf = self.readsparsesection(size)
744 if not buf:
745 break
746 size -= len(buf)
747 data.append(buf)
748 return "".join(data)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000749
Lars Gustäbel6baa5022006-12-23 16:40:13 +0000750 def readsparsesection(self, size):
751 """Read a single section of a sparse file.
752 """
753 section = self.sparse.find(self.position)
754
755 if section is None:
756 return ""
757
758 size = min(size, section.offset + section.size - self.position)
759
760 if isinstance(section, _data):
761 realpos = section.realpos + self.position - section.offset
762 self.fileobj.seek(self.offset + realpos)
763 self.position += size
764 return self.fileobj.read(size)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000765 else:
Lars Gustäbel6baa5022006-12-23 16:40:13 +0000766 self.position += size
767 return NUL * size
768#class _FileInFile
769
770
771class ExFileObject(object):
772 """File-like object for reading an archive member.
773 Is returned by TarFile.extractfile().
774 """
775 blocksize = 1024
776
777 def __init__(self, tarfile, tarinfo):
778 self.fileobj = _FileInFile(tarfile.fileobj,
779 tarinfo.offset_data,
780 tarinfo.size,
781 getattr(tarinfo, "sparse", None))
782 self.name = tarinfo.name
783 self.mode = "r"
784 self.closed = False
785 self.size = tarinfo.size
786
787 self.position = 0
788 self.buffer = ""
789
790 def read(self, size=None):
791 """Read at most size bytes from the file. If size is not
792 present or None, read all data until EOF is reached.
793 """
794 if self.closed:
795 raise ValueError("I/O operation on closed file")
796
797 buf = ""
798 if self.buffer:
799 if size is None:
800 buf = self.buffer
801 self.buffer = ""
802 else:
803 buf = self.buffer[:size]
804 self.buffer = self.buffer[size:]
805
806 if size is None:
807 buf += self.fileobj.read()
808 else:
809 buf += self.fileobj.read(size - len(buf))
810
811 self.position += len(buf)
812 return buf
813
814 def readline(self, size=-1):
815 """Read one entire line from the file. If size is present
816 and non-negative, return a string with at most that
817 size, which may be an incomplete line.
818 """
819 if self.closed:
820 raise ValueError("I/O operation on closed file")
821
822 if "\n" in self.buffer:
823 pos = self.buffer.find("\n") + 1
824 else:
825 buffers = [self.buffer]
826 while True:
827 buf = self.fileobj.read(self.blocksize)
828 buffers.append(buf)
829 if not buf or "\n" in buf:
830 self.buffer = "".join(buffers)
831 pos = self.buffer.find("\n") + 1
832 if pos == 0:
833 # no newline found.
834 pos = len(self.buffer)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000835 break
Lars Gustäbel6baa5022006-12-23 16:40:13 +0000836
837 if size != -1:
838 pos = min(size, pos)
839
840 buf = self.buffer[:pos]
841 self.buffer = self.buffer[pos:]
842 self.position += len(buf)
843 return buf
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000844
845 def readlines(self):
Lars Gustäbel6baa5022006-12-23 16:40:13 +0000846 """Return a list with all remaining lines.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000847 """
848 result = []
849 while True:
850 line = self.readline()
851 if not line: break
852 result.append(line)
853 return result
854
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000855 def tell(self):
856 """Return the current file position.
857 """
Lars Gustäbel6baa5022006-12-23 16:40:13 +0000858 if self.closed:
859 raise ValueError("I/O operation on closed file")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000860
Lars Gustäbel6baa5022006-12-23 16:40:13 +0000861 return self.position
862
863 def seek(self, pos, whence=os.SEEK_SET):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000864 """Seek to a position in the file.
865 """
Lars Gustäbel6baa5022006-12-23 16:40:13 +0000866 if self.closed:
867 raise ValueError("I/O operation on closed file")
868
869 if whence == os.SEEK_SET:
870 self.position = min(max(pos, 0), self.size)
871 elif whence == os.SEEK_CUR:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000872 if pos < 0:
Lars Gustäbel6baa5022006-12-23 16:40:13 +0000873 self.position = max(self.position + pos, 0)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000874 else:
Lars Gustäbel6baa5022006-12-23 16:40:13 +0000875 self.position = min(self.position + pos, self.size)
876 elif whence == os.SEEK_END:
877 self.position = max(min(self.size + pos, self.size), 0)
878 else:
879 raise ValueError("Invalid argument")
880
881 self.buffer = ""
882 self.fileobj.seek(self.position)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000883
884 def close(self):
885 """Close the file object.
886 """
887 self.closed = True
Martin v. Löwisdf241532005-03-03 08:17:42 +0000888
889 def __iter__(self):
Lars Gustäbel6baa5022006-12-23 16:40:13 +0000890 """Get an iterator over the file's lines.
Martin v. Löwisdf241532005-03-03 08:17:42 +0000891 """
Lars Gustäbel6baa5022006-12-23 16:40:13 +0000892 while True:
893 line = self.readline()
894 if not line:
895 break
896 yield line
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000897#class ExFileObject
898
899#------------------
900# Exported Classes
901#------------------
902class TarInfo(object):
903 """Informational class which holds the details about an
904 archive member given by a tar header block.
905 TarInfo objects are returned by TarFile.getmember(),
906 TarFile.getmembers() and TarFile.gettarinfo() and are
907 usually created internally.
908 """
909
910 def __init__(self, name=""):
911 """Construct a TarInfo object. name is the optional name
912 of the member.
913 """
Lars Gustäbelc64e4022007-03-13 10:47:19 +0000914 self.name = name # member name
915 self.mode = 0644 # file permissions
Georg Brandl38c6a222006-05-10 16:26:03 +0000916 self.uid = 0 # user id
917 self.gid = 0 # group id
918 self.size = 0 # file size
919 self.mtime = 0 # modification time
920 self.chksum = 0 # header checksum
921 self.type = REGTYPE # member type
922 self.linkname = "" # link name
Lars Gustäbelc64e4022007-03-13 10:47:19 +0000923 self.uname = "root" # user name
924 self.gname = "root" # group name
Georg Brandl38c6a222006-05-10 16:26:03 +0000925 self.devmajor = 0 # device major number
926 self.devminor = 0 # device minor number
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000927
Georg Brandl38c6a222006-05-10 16:26:03 +0000928 self.offset = 0 # the tar header starts here
929 self.offset_data = 0 # the file's data starts here
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000930
Lars Gustäbelc64e4022007-03-13 10:47:19 +0000931 self.pax_headers = {} # pax header information
932
933 # In pax headers the "name" and "linkname" field are called
934 # "path" and "linkpath".
935 def _getpath(self):
936 return self.name
937 def _setpath(self, name):
938 self.name = name
939 path = property(_getpath, _setpath)
940
941 def _getlinkpath(self):
942 return self.linkname
943 def _setlinkpath(self, linkname):
944 self.linkname = linkname
945 linkpath = property(_getlinkpath, _setlinkpath)
946
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000947 def __repr__(self):
948 return "<%s %r at %#x>" % (self.__class__.__name__,self.name,id(self))
949
Lars Gustäbela0fcb932007-05-27 19:49:30 +0000950 def get_info(self, encoding, errors):
Lars Gustäbelc64e4022007-03-13 10:47:19 +0000951 """Return the TarInfo's attributes as a dictionary.
952 """
953 info = {
Lars Gustäbelf7cda522009-08-28 19:23:44 +0000954 "name": self.name,
Lars Gustäbelc64e4022007-03-13 10:47:19 +0000955 "mode": self.mode & 07777,
956 "uid": self.uid,
957 "gid": self.gid,
958 "size": self.size,
959 "mtime": self.mtime,
960 "chksum": self.chksum,
961 "type": self.type,
Lars Gustäbelf7cda522009-08-28 19:23:44 +0000962 "linkname": self.linkname,
Lars Gustäbelc64e4022007-03-13 10:47:19 +0000963 "uname": self.uname,
964 "gname": self.gname,
965 "devmajor": self.devmajor,
966 "devminor": self.devminor
967 }
968
969 if info["type"] == DIRTYPE and not info["name"].endswith("/"):
970 info["name"] += "/"
971
Lars Gustäbela0fcb932007-05-27 19:49:30 +0000972 for key in ("name", "linkname", "uname", "gname"):
973 if type(info[key]) is unicode:
974 info[key] = info[key].encode(encoding, errors)
975
Lars Gustäbelc64e4022007-03-13 10:47:19 +0000976 return info
977
Lars Gustäbela0fcb932007-05-27 19:49:30 +0000978 def tobuf(self, format=DEFAULT_FORMAT, encoding=ENCODING, errors="strict"):
Lars Gustäbelc64e4022007-03-13 10:47:19 +0000979 """Return a tar header as a string of 512 byte blocks.
980 """
Lars Gustäbela0fcb932007-05-27 19:49:30 +0000981 info = self.get_info(encoding, errors)
982
Lars Gustäbelc64e4022007-03-13 10:47:19 +0000983 if format == USTAR_FORMAT:
Lars Gustäbela0fcb932007-05-27 19:49:30 +0000984 return self.create_ustar_header(info)
Lars Gustäbelc64e4022007-03-13 10:47:19 +0000985 elif format == GNU_FORMAT:
Lars Gustäbela0fcb932007-05-27 19:49:30 +0000986 return self.create_gnu_header(info)
Lars Gustäbelc64e4022007-03-13 10:47:19 +0000987 elif format == PAX_FORMAT:
Lars Gustäbela0fcb932007-05-27 19:49:30 +0000988 return self.create_pax_header(info, encoding, errors)
Lars Gustäbelc64e4022007-03-13 10:47:19 +0000989 else:
990 raise ValueError("invalid format")
991
Lars Gustäbela0fcb932007-05-27 19:49:30 +0000992 def create_ustar_header(self, info):
Lars Gustäbelc64e4022007-03-13 10:47:19 +0000993 """Return the object as a ustar header block.
994 """
Lars Gustäbelc64e4022007-03-13 10:47:19 +0000995 info["magic"] = POSIX_MAGIC
996
997 if len(info["linkname"]) > LENGTH_LINK:
998 raise ValueError("linkname is too long")
999
1000 if len(info["name"]) > LENGTH_NAME:
1001 info["prefix"], info["name"] = self._posix_split_name(info["name"])
1002
1003 return self._create_header(info, USTAR_FORMAT)
1004
Lars Gustäbela0fcb932007-05-27 19:49:30 +00001005 def create_gnu_header(self, info):
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001006 """Return the object as a GNU header block sequence.
1007 """
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001008 info["magic"] = GNU_MAGIC
1009
1010 buf = ""
1011 if len(info["linkname"]) > LENGTH_LINK:
1012 buf += self._create_gnu_long_header(info["linkname"], GNUTYPE_LONGLINK)
1013
1014 if len(info["name"]) > LENGTH_NAME:
1015 buf += self._create_gnu_long_header(info["name"], GNUTYPE_LONGNAME)
1016
1017 return buf + self._create_header(info, GNU_FORMAT)
1018
Lars Gustäbela0fcb932007-05-27 19:49:30 +00001019 def create_pax_header(self, info, encoding, errors):
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001020 """Return the object as a ustar header block. If it cannot be
1021 represented this way, prepend a pax extended header sequence
1022 with supplement information.
1023 """
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001024 info["magic"] = POSIX_MAGIC
1025 pax_headers = self.pax_headers.copy()
1026
1027 # Test string fields for values that exceed the field length or cannot
1028 # be represented in ASCII encoding.
1029 for name, hname, length in (
1030 ("name", "path", LENGTH_NAME), ("linkname", "linkpath", LENGTH_LINK),
1031 ("uname", "uname", 32), ("gname", "gname", 32)):
1032
Lars Gustäbela0fcb932007-05-27 19:49:30 +00001033 if hname in pax_headers:
1034 # The pax header has priority.
1035 continue
1036
1037 val = info[name].decode(encoding, errors)
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001038
1039 # Try to encode the string as ASCII.
1040 try:
1041 val.encode("ascii")
1042 except UnicodeEncodeError:
1043 pax_headers[hname] = val
1044 continue
1045
Lars Gustäbela0fcb932007-05-27 19:49:30 +00001046 if len(info[name]) > length:
1047 pax_headers[hname] = val
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001048
1049 # Test number fields for values that exceed the field limit or values
1050 # that like to be stored as float.
1051 for name, digits in (("uid", 8), ("gid", 8), ("size", 12), ("mtime", 12)):
Lars Gustäbela0fcb932007-05-27 19:49:30 +00001052 if name in pax_headers:
1053 # The pax header has priority. Avoid overflow.
1054 info[name] = 0
1055 continue
1056
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001057 val = info[name]
1058 if not 0 <= val < 8 ** (digits - 1) or isinstance(val, float):
1059 pax_headers[name] = unicode(val)
1060 info[name] = 0
1061
Lars Gustäbela0fcb932007-05-27 19:49:30 +00001062 # Create a pax extended header if necessary.
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001063 if pax_headers:
1064 buf = self._create_pax_generic_header(pax_headers)
1065 else:
1066 buf = ""
1067
1068 return buf + self._create_header(info, USTAR_FORMAT)
1069
1070 @classmethod
Lars Gustäbela0fcb932007-05-27 19:49:30 +00001071 def create_pax_global_header(cls, pax_headers):
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001072 """Return the object as a pax global header block sequence.
1073 """
Lars Gustäbela0fcb932007-05-27 19:49:30 +00001074 return cls._create_pax_generic_header(pax_headers, type=XGLTYPE)
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001075
1076 def _posix_split_name(self, name):
1077 """Split a name longer than 100 chars into a prefix
1078 and a name part.
1079 """
1080 prefix = name[:LENGTH_PREFIX + 1]
1081 while prefix and prefix[-1] != "/":
1082 prefix = prefix[:-1]
1083
1084 name = name[len(prefix):]
1085 prefix = prefix[:-1]
1086
1087 if not prefix or len(name) > LENGTH_NAME:
1088 raise ValueError("name is too long")
1089 return prefix, name
1090
1091 @staticmethod
1092 def _create_header(info, format):
1093 """Return a header block. info is a dictionary with file
1094 information, format must be one of the *_FORMAT constants.
1095 """
1096 parts = [
1097 stn(info.get("name", ""), 100),
1098 itn(info.get("mode", 0) & 07777, 8, format),
1099 itn(info.get("uid", 0), 8, format),
1100 itn(info.get("gid", 0), 8, format),
1101 itn(info.get("size", 0), 12, format),
1102 itn(info.get("mtime", 0), 12, format),
1103 " ", # checksum field
1104 info.get("type", REGTYPE),
1105 stn(info.get("linkname", ""), 100),
Lars Gustäbela0fcb932007-05-27 19:49:30 +00001106 stn(info.get("magic", POSIX_MAGIC), 8),
1107 stn(info.get("uname", "root"), 32),
1108 stn(info.get("gname", "root"), 32),
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001109 itn(info.get("devmajor", 0), 8, format),
1110 itn(info.get("devminor", 0), 8, format),
1111 stn(info.get("prefix", ""), 155)
1112 ]
1113
1114 buf = struct.pack("%ds" % BLOCKSIZE, "".join(parts))
1115 chksum = calc_chksums(buf[-BLOCKSIZE:])[0]
1116 buf = buf[:-364] + "%06o\0" % chksum + buf[-357:]
1117 return buf
1118
1119 @staticmethod
1120 def _create_payload(payload):
1121 """Return the string payload filled with zero bytes
1122 up to the next 512 byte border.
1123 """
1124 blocks, remainder = divmod(len(payload), BLOCKSIZE)
1125 if remainder > 0:
1126 payload += (BLOCKSIZE - remainder) * NUL
1127 return payload
1128
1129 @classmethod
1130 def _create_gnu_long_header(cls, name, type):
1131 """Return a GNUTYPE_LONGNAME or GNUTYPE_LONGLINK sequence
1132 for name.
1133 """
1134 name += NUL
1135
1136 info = {}
1137 info["name"] = "././@LongLink"
1138 info["type"] = type
1139 info["size"] = len(name)
1140 info["magic"] = GNU_MAGIC
1141
1142 # create extended header + name blocks.
1143 return cls._create_header(info, USTAR_FORMAT) + \
1144 cls._create_payload(name)
1145
1146 @classmethod
1147 def _create_pax_generic_header(cls, pax_headers, type=XHDTYPE):
1148 """Return a POSIX.1-2001 extended or global header sequence
1149 that contains a list of keyword, value pairs. The values
1150 must be unicode objects.
1151 """
1152 records = []
1153 for keyword, value in pax_headers.iteritems():
1154 keyword = keyword.encode("utf8")
1155 value = value.encode("utf8")
1156 l = len(keyword) + len(value) + 3 # ' ' + '=' + '\n'
1157 n = p = 0
1158 while True:
1159 n = l + len(str(p))
1160 if n == p:
1161 break
1162 p = n
1163 records.append("%d %s=%s\n" % (p, keyword, value))
1164 records = "".join(records)
1165
1166 # We use a hardcoded "././@PaxHeader" name like star does
1167 # instead of the one that POSIX recommends.
1168 info = {}
1169 info["name"] = "././@PaxHeader"
1170 info["type"] = type
1171 info["size"] = len(records)
1172 info["magic"] = POSIX_MAGIC
1173
1174 # Create pax header + record blocks.
1175 return cls._create_header(info, USTAR_FORMAT) + \
1176 cls._create_payload(records)
1177
Guido van Rossum75b64e62005-01-16 00:16:11 +00001178 @classmethod
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001179 def frombuf(cls, buf):
1180 """Construct a TarInfo object from a 512 byte string buffer.
1181 """
Georg Brandl38c6a222006-05-10 16:26:03 +00001182 if len(buf) != BLOCKSIZE:
Georg Brandlebbeed72006-12-19 22:06:46 +00001183 raise HeaderError("truncated header")
Georg Brandl38c6a222006-05-10 16:26:03 +00001184 if buf.count(NUL) == BLOCKSIZE:
Georg Brandlebbeed72006-12-19 22:06:46 +00001185 raise HeaderError("empty header")
1186
Georg Brandlded1c4d2006-12-20 11:55:16 +00001187 chksum = nti(buf[148:156])
Georg Brandlebbeed72006-12-19 22:06:46 +00001188 if chksum not in calc_chksums(buf):
1189 raise HeaderError("bad checksum")
Georg Brandl38c6a222006-05-10 16:26:03 +00001190
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001191 obj = cls()
1192 obj.buf = buf
1193 obj.name = nts(buf[0:100])
1194 obj.mode = nti(buf[100:108])
1195 obj.uid = nti(buf[108:116])
1196 obj.gid = nti(buf[116:124])
1197 obj.size = nti(buf[124:136])
1198 obj.mtime = nti(buf[136:148])
1199 obj.chksum = chksum
1200 obj.type = buf[156:157]
1201 obj.linkname = nts(buf[157:257])
1202 obj.uname = nts(buf[265:297])
1203 obj.gname = nts(buf[297:329])
1204 obj.devmajor = nti(buf[329:337])
1205 obj.devminor = nti(buf[337:345])
1206 prefix = nts(buf[345:500])
Georg Brandl3354f282006-10-29 09:16:12 +00001207
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001208 # Old V7 tar format represents a directory as a regular
1209 # file with a trailing slash.
1210 if obj.type == AREGTYPE and obj.name.endswith("/"):
1211 obj.type = DIRTYPE
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001212
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001213 # Remove redundant slashes from directories.
1214 if obj.isdir():
1215 obj.name = obj.name.rstrip("/")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001216
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001217 # Reconstruct a ustar longname.
1218 if prefix and obj.type not in GNU_TYPES:
1219 obj.name = prefix + "/" + obj.name
1220 return obj
1221
1222 @classmethod
1223 def fromtarfile(cls, tarfile):
1224 """Return the next TarInfo object from TarFile object
1225 tarfile.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001226 """
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001227 buf = tarfile.fileobj.read(BLOCKSIZE)
1228 if not buf:
1229 return
1230 obj = cls.frombuf(buf)
1231 obj.offset = tarfile.fileobj.tell() - BLOCKSIZE
1232 return obj._proc_member(tarfile)
Georg Brandl3354f282006-10-29 09:16:12 +00001233
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001234 #--------------------------------------------------------------------------
1235 # The following are methods that are called depending on the type of a
1236 # member. The entry point is _proc_member() which can be overridden in a
1237 # subclass to add custom _proc_*() methods. A _proc_*() method MUST
1238 # implement the following
1239 # operations:
1240 # 1. Set self.offset_data to the position where the data blocks begin,
1241 # if there is data that follows.
1242 # 2. Set tarfile.offset to the position where the next member's header will
1243 # begin.
1244 # 3. Return self or another valid TarInfo object.
1245 def _proc_member(self, tarfile):
1246 """Choose the right processing method depending on
1247 the type and call it.
Georg Brandl3354f282006-10-29 09:16:12 +00001248 """
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001249 if self.type in (GNUTYPE_LONGNAME, GNUTYPE_LONGLINK):
1250 return self._proc_gnulong(tarfile)
1251 elif self.type == GNUTYPE_SPARSE:
1252 return self._proc_sparse(tarfile)
1253 elif self.type in (XHDTYPE, XGLTYPE, SOLARIS_XHDTYPE):
1254 return self._proc_pax(tarfile)
1255 else:
1256 return self._proc_builtin(tarfile)
Georg Brandl3354f282006-10-29 09:16:12 +00001257
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001258 def _proc_builtin(self, tarfile):
1259 """Process a builtin type or an unknown type which
1260 will be treated as a regular file.
1261 """
1262 self.offset_data = tarfile.fileobj.tell()
1263 offset = self.offset_data
1264 if self.isreg() or self.type not in SUPPORTED_TYPES:
1265 # Skip the following data blocks.
1266 offset += self._block(self.size)
1267 tarfile.offset = offset
Georg Brandl3354f282006-10-29 09:16:12 +00001268
Lars Gustäbela0fcb932007-05-27 19:49:30 +00001269 # Patch the TarInfo object with saved global
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001270 # header information.
Lars Gustäbela0fcb932007-05-27 19:49:30 +00001271 self._apply_pax_info(tarfile.pax_headers, tarfile.encoding, tarfile.errors)
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001272
1273 return self
1274
1275 def _proc_gnulong(self, tarfile):
1276 """Process the blocks that hold a GNU longname
1277 or longlink member.
1278 """
1279 buf = tarfile.fileobj.read(self._block(self.size))
1280
1281 # Fetch the next header and process it.
Lars Gustäbela0fcb932007-05-27 19:49:30 +00001282 next = self.fromtarfile(tarfile)
1283 if next is None:
1284 raise HeaderError("missing subsequent header")
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001285
1286 # Patch the TarInfo object from the next header with
1287 # the longname information.
1288 next.offset = self.offset
1289 if self.type == GNUTYPE_LONGNAME:
Lars Gustäbela0fcb932007-05-27 19:49:30 +00001290 next.name = nts(buf)
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001291 elif self.type == GNUTYPE_LONGLINK:
Lars Gustäbela0fcb932007-05-27 19:49:30 +00001292 next.linkname = nts(buf)
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001293
1294 return next
1295
1296 def _proc_sparse(self, tarfile):
1297 """Process a GNU sparse header plus extra headers.
1298 """
1299 buf = self.buf
1300 sp = _ringbuffer()
1301 pos = 386
1302 lastpos = 0L
1303 realpos = 0L
1304 # There are 4 possible sparse structs in the
1305 # first header.
1306 for i in xrange(4):
1307 try:
1308 offset = nti(buf[pos:pos + 12])
1309 numbytes = nti(buf[pos + 12:pos + 24])
1310 except ValueError:
1311 break
1312 if offset > lastpos:
1313 sp.append(_hole(lastpos, offset - lastpos))
1314 sp.append(_data(offset, numbytes, realpos))
1315 realpos += numbytes
1316 lastpos = offset + numbytes
1317 pos += 24
1318
1319 isextended = ord(buf[482])
1320 origsize = nti(buf[483:495])
1321
1322 # If the isextended flag is given,
1323 # there are extra headers to process.
1324 while isextended == 1:
1325 buf = tarfile.fileobj.read(BLOCKSIZE)
1326 pos = 0
1327 for i in xrange(21):
1328 try:
1329 offset = nti(buf[pos:pos + 12])
1330 numbytes = nti(buf[pos + 12:pos + 24])
1331 except ValueError:
1332 break
1333 if offset > lastpos:
1334 sp.append(_hole(lastpos, offset - lastpos))
1335 sp.append(_data(offset, numbytes, realpos))
1336 realpos += numbytes
1337 lastpos = offset + numbytes
1338 pos += 24
1339 isextended = ord(buf[504])
1340
1341 if lastpos < origsize:
1342 sp.append(_hole(lastpos, origsize - lastpos))
1343
1344 self.sparse = sp
1345
1346 self.offset_data = tarfile.fileobj.tell()
1347 tarfile.offset = self.offset_data + self._block(self.size)
1348 self.size = origsize
1349
1350 return self
1351
1352 def _proc_pax(self, tarfile):
1353 """Process an extended or global header as described in
1354 POSIX.1-2001.
1355 """
1356 # Read the header information.
1357 buf = tarfile.fileobj.read(self._block(self.size))
1358
1359 # A pax header stores supplemental information for either
1360 # the following file (extended) or all following files
1361 # (global).
1362 if self.type == XGLTYPE:
1363 pax_headers = tarfile.pax_headers
1364 else:
1365 pax_headers = tarfile.pax_headers.copy()
1366
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001367 # Parse pax header information. A record looks like that:
1368 # "%d %s=%s\n" % (length, keyword, value). length is the size
1369 # of the complete record including the length field itself and
Lars Gustäbela0fcb932007-05-27 19:49:30 +00001370 # the newline. keyword and value are both UTF-8 encoded strings.
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001371 regex = re.compile(r"(\d+) ([^=]+)=", re.U)
1372 pos = 0
1373 while True:
1374 match = regex.match(buf, pos)
1375 if not match:
1376 break
1377
1378 length, keyword = match.groups()
1379 length = int(length)
1380 value = buf[match.end(2) + 1:match.start(1) + length - 1]
1381
1382 keyword = keyword.decode("utf8")
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001383 value = value.decode("utf8")
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001384
1385 pax_headers[keyword] = value
1386 pos += length
1387
Lars Gustäbela0fcb932007-05-27 19:49:30 +00001388 # Fetch the next header.
1389 next = self.fromtarfile(tarfile)
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001390
Lars Gustäbela0fcb932007-05-27 19:49:30 +00001391 if self.type in (XHDTYPE, SOLARIS_XHDTYPE):
1392 if next is None:
1393 raise HeaderError("missing subsequent header")
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001394
Lars Gustäbela0fcb932007-05-27 19:49:30 +00001395 # Patch the TarInfo object with the extended header info.
1396 next._apply_pax_info(pax_headers, tarfile.encoding, tarfile.errors)
1397 next.offset = self.offset
1398
Brett Cannon132fc542008-08-04 21:23:07 +00001399 if "size" in pax_headers:
Lars Gustäbela0fcb932007-05-27 19:49:30 +00001400 # If the extended header replaces the size field,
1401 # we need to recalculate the offset where the next
1402 # header starts.
1403 offset = next.offset_data
1404 if next.isreg() or next.type not in SUPPORTED_TYPES:
1405 offset += next._block(next.size)
1406 tarfile.offset = offset
1407
1408 return next
1409
1410 def _apply_pax_info(self, pax_headers, encoding, errors):
1411 """Replace fields with supplemental information from a previous
1412 pax extended or global header.
1413 """
1414 for keyword, value in pax_headers.iteritems():
1415 if keyword not in PAX_FIELDS:
1416 continue
1417
1418 if keyword == "path":
1419 value = value.rstrip("/")
1420
1421 if keyword in PAX_NUMBER_FIELDS:
1422 try:
1423 value = PAX_NUMBER_FIELDS[keyword](value)
1424 except ValueError:
1425 value = 0
1426 else:
1427 value = uts(value, encoding, errors)
1428
1429 setattr(self, keyword, value)
1430
1431 self.pax_headers = pax_headers.copy()
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001432
1433 def _block(self, count):
1434 """Round up a byte count by BLOCKSIZE and return it,
1435 e.g. _block(834) => 1024.
1436 """
1437 blocks, remainder = divmod(count, BLOCKSIZE)
1438 if remainder:
1439 blocks += 1
1440 return blocks * BLOCKSIZE
Georg Brandl3354f282006-10-29 09:16:12 +00001441
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001442 def isreg(self):
1443 return self.type in REGULAR_TYPES
1444 def isfile(self):
1445 return self.isreg()
1446 def isdir(self):
1447 return self.type == DIRTYPE
1448 def issym(self):
1449 return self.type == SYMTYPE
1450 def islnk(self):
1451 return self.type == LNKTYPE
1452 def ischr(self):
1453 return self.type == CHRTYPE
1454 def isblk(self):
1455 return self.type == BLKTYPE
1456 def isfifo(self):
1457 return self.type == FIFOTYPE
1458 def issparse(self):
1459 return self.type == GNUTYPE_SPARSE
1460 def isdev(self):
1461 return self.type in (CHRTYPE, BLKTYPE, FIFOTYPE)
1462# class TarInfo
1463
1464class TarFile(object):
1465 """The TarFile Class provides an interface to tar archives.
1466 """
1467
1468 debug = 0 # May be set from 0 (no msgs) to 3 (all msgs)
1469
1470 dereference = False # If true, add content of linked file to the
1471 # tar file, else the link.
1472
1473 ignore_zeros = False # If true, skips empty or invalid blocks and
1474 # continues processing.
1475
1476 errorlevel = 0 # If 0, fatal errors only appear in debug
1477 # messages (if debug >= 0). If > 0, errors
1478 # are passed to the caller as exceptions.
1479
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001480 format = DEFAULT_FORMAT # The format to use when creating an archive.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001481
Lars Gustäbela0fcb932007-05-27 19:49:30 +00001482 encoding = ENCODING # Encoding for 8-bit character strings.
1483
1484 errors = None # Error handler for unicode conversion.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001485
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001486 tarinfo = TarInfo # The default TarInfo class to use.
1487
1488 fileobject = ExFileObject # The default ExFileObject class to use.
1489
1490 def __init__(self, name=None, mode="r", fileobj=None, format=None,
1491 tarinfo=None, dereference=None, ignore_zeros=None, encoding=None,
Lars Gustäbela0fcb932007-05-27 19:49:30 +00001492 errors=None, pax_headers=None, debug=None, errorlevel=None):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001493 """Open an (uncompressed) tar archive `name'. `mode' is either 'r' to
1494 read from an existing archive, 'a' to append data to an existing
1495 file or 'w' to create a new file overwriting an existing one. `mode'
1496 defaults to 'r'.
1497 If `fileobj' is given, it is used for reading or writing data. If it
1498 can be determined, `mode' is overridden by `fileobj's mode.
1499 `fileobj' is not closed, when TarFile is closed.
1500 """
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001501 if len(mode) > 1 or mode not in "raw":
Georg Brandle4751e32006-05-18 06:11:19 +00001502 raise ValueError("mode must be 'r', 'a' or 'w'")
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001503 self.mode = mode
1504 self._mode = {"r": "rb", "a": "r+b", "w": "wb"}[mode]
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001505
1506 if not fileobj:
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001507 if self.mode == "a" and not os.path.exists(name):
Lars Gustäbel3f8aca12007-02-06 18:38:13 +00001508 # Create nonexistent files in append mode.
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001509 self.mode = "w"
1510 self._mode = "wb"
Brett Cannon6cef0762007-05-25 20:17:15 +00001511 fileobj = bltn_open(name, self._mode)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001512 self._extfileobj = False
1513 else:
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001514 if name is None and hasattr(fileobj, "name"):
1515 name = fileobj.name
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001516 if hasattr(fileobj, "mode"):
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001517 self._mode = fileobj.mode
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001518 self._extfileobj = True
Lars Gustäbel0f4a14b2007-08-28 12:31:09 +00001519 self.name = os.path.abspath(name) if name else None
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001520 self.fileobj = fileobj
1521
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001522 # Init attributes.
1523 if format is not None:
1524 self.format = format
1525 if tarinfo is not None:
1526 self.tarinfo = tarinfo
1527 if dereference is not None:
1528 self.dereference = dereference
1529 if ignore_zeros is not None:
1530 self.ignore_zeros = ignore_zeros
1531 if encoding is not None:
1532 self.encoding = encoding
Lars Gustäbela0fcb932007-05-27 19:49:30 +00001533
1534 if errors is not None:
1535 self.errors = errors
1536 elif mode == "r":
1537 self.errors = "utf-8"
1538 else:
1539 self.errors = "strict"
1540
1541 if pax_headers is not None and self.format == PAX_FORMAT:
1542 self.pax_headers = pax_headers
1543 else:
1544 self.pax_headers = {}
1545
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001546 if debug is not None:
1547 self.debug = debug
1548 if errorlevel is not None:
1549 self.errorlevel = errorlevel
1550
1551 # Init datastructures.
Georg Brandl38c6a222006-05-10 16:26:03 +00001552 self.closed = False
1553 self.members = [] # list of members as TarInfo objects
1554 self._loaded = False # flag if all members have been read
Lars Gustäbel77b2d632007-12-01 21:02:12 +00001555 self.offset = self.fileobj.tell()
1556 # current position in the archive file
Georg Brandl38c6a222006-05-10 16:26:03 +00001557 self.inodes = {} # dictionary caching the inodes of
1558 # archive members already added
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001559
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001560 if self.mode == "r":
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001561 self.firstmember = None
1562 self.firstmember = self.next()
1563
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001564 if self.mode == "a":
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001565 # Move to the end of the archive,
1566 # before the first empty block.
1567 self.firstmember = None
1568 while True:
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001569 if self.next() is None:
Lars Gustäbel3f8aca12007-02-06 18:38:13 +00001570 if self.offset > 0:
1571 self.fileobj.seek(- BLOCKSIZE, 1)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001572 break
1573
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001574 if self.mode in "aw":
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001575 self._loaded = True
1576
Lars Gustäbela0fcb932007-05-27 19:49:30 +00001577 if self.pax_headers:
1578 buf = self.tarinfo.create_pax_global_header(self.pax_headers.copy())
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001579 self.fileobj.write(buf)
1580 self.offset += len(buf)
1581
1582 def _getposix(self):
1583 return self.format == USTAR_FORMAT
1584 def _setposix(self, value):
1585 import warnings
Philip Jenveyd846f1d2009-05-08 02:28:39 +00001586 warnings.warn("use the format attribute instead", DeprecationWarning,
1587 2)
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001588 if value:
1589 self.format = USTAR_FORMAT
1590 else:
1591 self.format = GNU_FORMAT
1592 posix = property(_getposix, _setposix)
1593
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001594 #--------------------------------------------------------------------------
1595 # Below are the classmethods which act as alternate constructors to the
1596 # TarFile class. The open() method is the only one that is needed for
1597 # public use; it is the "super"-constructor and is able to select an
1598 # adequate "sub"-constructor for a particular compression using the mapping
1599 # from OPEN_METH.
1600 #
1601 # This concept allows one to subclass TarFile without losing the comfort of
1602 # the super-constructor. A sub-constructor is registered and made available
1603 # by adding it to the mapping in OPEN_METH.
1604
Guido van Rossum75b64e62005-01-16 00:16:11 +00001605 @classmethod
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001606 def open(cls, name=None, mode="r", fileobj=None, bufsize=RECORDSIZE, **kwargs):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001607 """Open a tar archive for reading, writing or appending. Return
1608 an appropriate TarFile class.
1609
1610 mode:
Martin v. Löwis78be7df2005-03-05 12:47:42 +00001611 'r' or 'r:*' open for reading with transparent compression
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001612 'r:' open for reading exclusively uncompressed
1613 'r:gz' open for reading with gzip compression
1614 'r:bz2' open for reading with bzip2 compression
Lars Gustäbel3f8aca12007-02-06 18:38:13 +00001615 'a' or 'a:' open for appending, creating the file if necessary
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001616 'w' or 'w:' open for writing without compression
1617 'w:gz' open for writing with gzip compression
1618 'w:bz2' open for writing with bzip2 compression
Martin v. Löwis78be7df2005-03-05 12:47:42 +00001619
1620 'r|*' open a stream of tar blocks with transparent compression
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001621 'r|' open an uncompressed stream of tar blocks for reading
1622 'r|gz' open a gzip compressed stream of tar blocks
1623 'r|bz2' open a bzip2 compressed stream of tar blocks
1624 'w|' open an uncompressed stream for writing
1625 'w|gz' open a gzip compressed stream for writing
1626 'w|bz2' open a bzip2 compressed stream for writing
1627 """
1628
1629 if not name and not fileobj:
Georg Brandle4751e32006-05-18 06:11:19 +00001630 raise ValueError("nothing to open")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001631
Martin v. Löwis78be7df2005-03-05 12:47:42 +00001632 if mode in ("r", "r:*"):
1633 # Find out which *open() is appropriate for opening the file.
1634 for comptype in cls.OPEN_METH:
1635 func = getattr(cls, cls.OPEN_METH[comptype])
Lars Gustäbela7ba6fc2006-12-27 10:30:46 +00001636 if fileobj is not None:
1637 saved_pos = fileobj.tell()
Martin v. Löwis78be7df2005-03-05 12:47:42 +00001638 try:
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001639 return func(name, "r", fileobj, **kwargs)
1640 except (ReadError, CompressionError), e:
Lars Gustäbela7ba6fc2006-12-27 10:30:46 +00001641 if fileobj is not None:
1642 fileobj.seek(saved_pos)
Martin v. Löwis78be7df2005-03-05 12:47:42 +00001643 continue
Georg Brandle4751e32006-05-18 06:11:19 +00001644 raise ReadError("file could not be opened successfully")
Martin v. Löwis78be7df2005-03-05 12:47:42 +00001645
1646 elif ":" in mode:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001647 filemode, comptype = mode.split(":", 1)
1648 filemode = filemode or "r"
1649 comptype = comptype or "tar"
1650
1651 # Select the *open() function according to
1652 # given compression.
1653 if comptype in cls.OPEN_METH:
1654 func = getattr(cls, cls.OPEN_METH[comptype])
1655 else:
Georg Brandle4751e32006-05-18 06:11:19 +00001656 raise CompressionError("unknown compression type %r" % comptype)
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001657 return func(name, filemode, fileobj, **kwargs)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001658
1659 elif "|" in mode:
1660 filemode, comptype = mode.split("|", 1)
1661 filemode = filemode or "r"
1662 comptype = comptype or "tar"
1663
1664 if filemode not in "rw":
Georg Brandle4751e32006-05-18 06:11:19 +00001665 raise ValueError("mode must be 'r' or 'w'")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001666
1667 t = cls(name, filemode,
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001668 _Stream(name, filemode, comptype, fileobj, bufsize),
1669 **kwargs)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001670 t._extfileobj = False
1671 return t
1672
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001673 elif mode in "aw":
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001674 return cls.taropen(name, mode, fileobj, **kwargs)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001675
Georg Brandle4751e32006-05-18 06:11:19 +00001676 raise ValueError("undiscernible mode")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001677
Guido van Rossum75b64e62005-01-16 00:16:11 +00001678 @classmethod
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001679 def taropen(cls, name, mode="r", fileobj=None, **kwargs):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001680 """Open uncompressed tar archive name for reading or writing.
1681 """
1682 if len(mode) > 1 or mode not in "raw":
Georg Brandle4751e32006-05-18 06:11:19 +00001683 raise ValueError("mode must be 'r', 'a' or 'w'")
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001684 return cls(name, mode, fileobj, **kwargs)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001685
Guido van Rossum75b64e62005-01-16 00:16:11 +00001686 @classmethod
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001687 def gzopen(cls, name, mode="r", fileobj=None, compresslevel=9, **kwargs):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001688 """Open gzip compressed tar archive name for reading or writing.
1689 Appending is not allowed.
1690 """
1691 if len(mode) > 1 or mode not in "rw":
Georg Brandle4751e32006-05-18 06:11:19 +00001692 raise ValueError("mode must be 'r' or 'w'")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001693
1694 try:
1695 import gzip
Neal Norwitz4ec68242003-04-11 03:05:56 +00001696 gzip.GzipFile
1697 except (ImportError, AttributeError):
Georg Brandle4751e32006-05-18 06:11:19 +00001698 raise CompressionError("gzip module is not available")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001699
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001700 if fileobj is None:
Brett Cannon6cef0762007-05-25 20:17:15 +00001701 fileobj = bltn_open(name, mode + "b")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001702
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001703 try:
Lars Gustäbela4b23812006-12-23 17:57:23 +00001704 t = cls.taropen(name, mode,
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001705 gzip.GzipFile(name, mode, compresslevel, fileobj),
1706 **kwargs)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001707 except IOError:
Georg Brandle4751e32006-05-18 06:11:19 +00001708 raise ReadError("not a gzip file")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001709 t._extfileobj = False
1710 return t
1711
Guido van Rossum75b64e62005-01-16 00:16:11 +00001712 @classmethod
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001713 def bz2open(cls, name, mode="r", fileobj=None, compresslevel=9, **kwargs):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001714 """Open bzip2 compressed tar archive name for reading or writing.
1715 Appending is not allowed.
1716 """
1717 if len(mode) > 1 or mode not in "rw":
Georg Brandle4751e32006-05-18 06:11:19 +00001718 raise ValueError("mode must be 'r' or 'w'.")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001719
1720 try:
1721 import bz2
1722 except ImportError:
Georg Brandle4751e32006-05-18 06:11:19 +00001723 raise CompressionError("bz2 module is not available")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001724
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001725 if fileobj is not None:
Georg Brandl49c8f4c2006-05-15 19:30:35 +00001726 fileobj = _BZ2Proxy(fileobj, mode)
1727 else:
1728 fileobj = bz2.BZ2File(name, mode, compresslevel=compresslevel)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001729
1730 try:
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001731 t = cls.taropen(name, mode, fileobj, **kwargs)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001732 except IOError:
Georg Brandle4751e32006-05-18 06:11:19 +00001733 raise ReadError("not a bzip2 file")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001734 t._extfileobj = False
1735 return t
1736
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001737 # All *open() methods are registered here.
1738 OPEN_METH = {
1739 "tar": "taropen", # uncompressed tar
1740 "gz": "gzopen", # gzip compressed tar
1741 "bz2": "bz2open" # bzip2 compressed tar
1742 }
1743
1744 #--------------------------------------------------------------------------
1745 # The public methods which TarFile provides:
1746
1747 def close(self):
1748 """Close the TarFile. In write-mode, two finishing zero blocks are
1749 appended to the archive.
1750 """
1751 if self.closed:
1752 return
1753
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001754 if self.mode in "aw":
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001755 self.fileobj.write(NUL * (BLOCKSIZE * 2))
1756 self.offset += (BLOCKSIZE * 2)
1757 # fill up the end with zero-blocks
1758 # (like option -b20 for tar does)
1759 blocks, remainder = divmod(self.offset, RECORDSIZE)
1760 if remainder > 0:
1761 self.fileobj.write(NUL * (RECORDSIZE - remainder))
1762
1763 if not self._extfileobj:
1764 self.fileobj.close()
1765 self.closed = True
1766
1767 def getmember(self, name):
1768 """Return a TarInfo object for member `name'. If `name' can not be
1769 found in the archive, KeyError is raised. If a member occurs more
Mark Dickinson3e4caeb2009-02-21 20:27:01 +00001770 than once in the archive, its last occurrence is assumed to be the
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001771 most up-to-date version.
1772 """
Martin v. Löwisf3c56112004-09-18 09:08:52 +00001773 tarinfo = self._getmember(name)
1774 if tarinfo is None:
Georg Brandle4751e32006-05-18 06:11:19 +00001775 raise KeyError("filename %r not found" % name)
Martin v. Löwisf3c56112004-09-18 09:08:52 +00001776 return tarinfo
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001777
1778 def getmembers(self):
1779 """Return the members of the archive as a list of TarInfo objects. The
1780 list has the same order as the members in the archive.
1781 """
1782 self._check()
1783 if not self._loaded: # if we want to obtain a list of
1784 self._load() # all members, we first have to
1785 # scan the whole archive.
1786 return self.members
1787
1788 def getnames(self):
1789 """Return the members of the archive as a list of their names. It has
1790 the same order as the list returned by getmembers().
1791 """
Martin v. Löwisf3c56112004-09-18 09:08:52 +00001792 return [tarinfo.name for tarinfo in self.getmembers()]
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001793
1794 def gettarinfo(self, name=None, arcname=None, fileobj=None):
1795 """Create a TarInfo object for either the file `name' or the file
1796 object `fileobj' (using os.fstat on its file descriptor). You can
1797 modify some of the TarInfo's attributes before you add it using
1798 addfile(). If given, `arcname' specifies an alternative name for the
1799 file in the archive.
1800 """
1801 self._check("aw")
1802
1803 # When fileobj is given, replace name by
1804 # fileobj's real name.
1805 if fileobj is not None:
1806 name = fileobj.name
1807
1808 # Building the name of the member in the archive.
1809 # Backward slashes are converted to forward slashes,
1810 # Absolute paths are turned to relative paths.
1811 if arcname is None:
1812 arcname = name
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001813 drv, arcname = os.path.splitdrive(arcname)
Lars Gustäbelf7cda522009-08-28 19:23:44 +00001814 arcname = arcname.replace(os.sep, "/")
1815 arcname = arcname.lstrip("/")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001816
1817 # Now, fill the TarInfo object with
1818 # information specific for the file.
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001819 tarinfo = self.tarinfo()
1820 tarinfo.tarfile = self
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001821
1822 # Use os.stat or os.lstat, depending on platform
1823 # and if symlinks shall be resolved.
1824 if fileobj is None:
1825 if hasattr(os, "lstat") and not self.dereference:
1826 statres = os.lstat(name)
1827 else:
1828 statres = os.stat(name)
1829 else:
1830 statres = os.fstat(fileobj.fileno())
1831 linkname = ""
1832
1833 stmd = statres.st_mode
1834 if stat.S_ISREG(stmd):
1835 inode = (statres.st_ino, statres.st_dev)
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001836 if not self.dereference and statres.st_nlink > 1 and \
1837 inode in self.inodes and arcname != self.inodes[inode]:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001838 # Is it a hardlink to an already
1839 # archived file?
1840 type = LNKTYPE
1841 linkname = self.inodes[inode]
1842 else:
1843 # The inode is added only if its valid.
1844 # For win32 it is always 0.
1845 type = REGTYPE
1846 if inode[0]:
1847 self.inodes[inode] = arcname
1848 elif stat.S_ISDIR(stmd):
1849 type = DIRTYPE
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001850 elif stat.S_ISFIFO(stmd):
1851 type = FIFOTYPE
1852 elif stat.S_ISLNK(stmd):
1853 type = SYMTYPE
1854 linkname = os.readlink(name)
1855 elif stat.S_ISCHR(stmd):
1856 type = CHRTYPE
1857 elif stat.S_ISBLK(stmd):
1858 type = BLKTYPE
1859 else:
1860 return None
1861
1862 # Fill the TarInfo object with all
1863 # information we can get.
Martin v. Löwis5dbdc592005-08-27 10:07:56 +00001864 tarinfo.name = arcname
1865 tarinfo.mode = stmd
1866 tarinfo.uid = statres.st_uid
1867 tarinfo.gid = statres.st_gid
1868 if stat.S_ISREG(stmd):
Martin v. Löwis61d77e02004-08-20 06:35:46 +00001869 tarinfo.size = statres.st_size
Martin v. Löwis5dbdc592005-08-27 10:07:56 +00001870 else:
1871 tarinfo.size = 0L
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001872 tarinfo.mtime = statres.st_mtime
Martin v. Löwis5dbdc592005-08-27 10:07:56 +00001873 tarinfo.type = type
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001874 tarinfo.linkname = linkname
1875 if pwd:
1876 try:
1877 tarinfo.uname = pwd.getpwuid(tarinfo.uid)[0]
1878 except KeyError:
1879 pass
1880 if grp:
1881 try:
1882 tarinfo.gname = grp.getgrgid(tarinfo.gid)[0]
1883 except KeyError:
1884 pass
1885
1886 if type in (CHRTYPE, BLKTYPE):
1887 if hasattr(os, "major") and hasattr(os, "minor"):
1888 tarinfo.devmajor = os.major(statres.st_rdev)
1889 tarinfo.devminor = os.minor(statres.st_rdev)
1890 return tarinfo
1891
1892 def list(self, verbose=True):
1893 """Print a table of contents to sys.stdout. If `verbose' is False, only
1894 the names of the members are printed. If it is True, an `ls -l'-like
1895 output is produced.
1896 """
1897 self._check()
1898
1899 for tarinfo in self:
1900 if verbose:
1901 print filemode(tarinfo.mode),
1902 print "%s/%s" % (tarinfo.uname or tarinfo.uid,
1903 tarinfo.gname or tarinfo.gid),
1904 if tarinfo.ischr() or tarinfo.isblk():
1905 print "%10s" % ("%d,%d" \
1906 % (tarinfo.devmajor, tarinfo.devminor)),
1907 else:
1908 print "%10d" % tarinfo.size,
1909 print "%d-%02d-%02d %02d:%02d:%02d" \
1910 % time.localtime(tarinfo.mtime)[:6],
1911
Lars Gustäbelc64e4022007-03-13 10:47:19 +00001912 print tarinfo.name + ("/" if tarinfo.isdir() else ""),
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001913
1914 if verbose:
1915 if tarinfo.issym():
1916 print "->", tarinfo.linkname,
1917 if tarinfo.islnk():
1918 print "link to", tarinfo.linkname,
1919 print
1920
Lars Gustäbel104490e2007-06-18 11:42:11 +00001921 def add(self, name, arcname=None, recursive=True, exclude=None):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001922 """Add the file `name' to the archive. `name' may be any type of file
1923 (directory, fifo, symbolic link, etc.). If given, `arcname'
1924 specifies an alternative name for the file in the archive.
1925 Directories are added recursively by default. This can be avoided by
Lars Gustäbel104490e2007-06-18 11:42:11 +00001926 setting `recursive' to False. `exclude' is a function that should
1927 return True for each filename to be excluded.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001928 """
1929 self._check("aw")
1930
1931 if arcname is None:
1932 arcname = name
1933
Lars Gustäbel104490e2007-06-18 11:42:11 +00001934 # Exclude pathnames.
1935 if exclude is not None and exclude(name):
1936 self._dbg(2, "tarfile: Excluded %r" % name)
1937 return
1938
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001939 # Skip if somebody tries to archive the archive...
Lars Gustäbela4b23812006-12-23 17:57:23 +00001940 if self.name is not None and os.path.abspath(name) == self.name:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001941 self._dbg(2, "tarfile: Skipped %r" % name)
1942 return
1943
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001944 self._dbg(1, name)
1945
1946 # Create a TarInfo object from the file.
1947 tarinfo = self.gettarinfo(name, arcname)
1948
1949 if tarinfo is None:
1950 self._dbg(1, "tarfile: Unsupported type %r" % name)
1951 return
1952
1953 # Append the tar header and data to the archive.
1954 if tarinfo.isreg():
Brett Cannon6cef0762007-05-25 20:17:15 +00001955 f = bltn_open(name, "rb")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001956 self.addfile(tarinfo, f)
1957 f.close()
1958
Martin v. Löwis5dbdc592005-08-27 10:07:56 +00001959 elif tarinfo.isdir():
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001960 self.addfile(tarinfo)
1961 if recursive:
1962 for f in os.listdir(name):
Lars Gustäbel104490e2007-06-18 11:42:11 +00001963 self.add(os.path.join(name, f), os.path.join(arcname, f), recursive, exclude)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001964
Martin v. Löwis5dbdc592005-08-27 10:07:56 +00001965 else:
1966 self.addfile(tarinfo)
1967
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001968 def addfile(self, tarinfo, fileobj=None):
1969 """Add the TarInfo object `tarinfo' to the archive. If `fileobj' is
1970 given, tarinfo.size bytes are read from it and added to the archive.
1971 You can create TarInfo objects using gettarinfo().
1972 On Windows platforms, `fileobj' should always be opened with mode
1973 'rb' to avoid irritation about the file size.
1974 """
1975 self._check("aw")
1976
Georg Brandl3354f282006-10-29 09:16:12 +00001977 tarinfo = copy.copy(tarinfo)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001978
Lars Gustäbela0fcb932007-05-27 19:49:30 +00001979 buf = tarinfo.tobuf(self.format, self.encoding, self.errors)
Georg Brandl3354f282006-10-29 09:16:12 +00001980 self.fileobj.write(buf)
1981 self.offset += len(buf)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001982
1983 # If there's data to follow, append it.
1984 if fileobj is not None:
1985 copyfileobj(fileobj, self.fileobj, tarinfo.size)
1986 blocks, remainder = divmod(tarinfo.size, BLOCKSIZE)
1987 if remainder > 0:
1988 self.fileobj.write(NUL * (BLOCKSIZE - remainder))
1989 blocks += 1
1990 self.offset += blocks * BLOCKSIZE
1991
Martin v. Löwisf3c56112004-09-18 09:08:52 +00001992 self.members.append(tarinfo)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001993
Martin v. Löwis00a73e72005-03-04 19:40:34 +00001994 def extractall(self, path=".", members=None):
1995 """Extract all members from the archive to the current working
1996 directory and set owner, modification time and permissions on
1997 directories afterwards. `path' specifies a different directory
1998 to extract to. `members' is optional and must be a subset of the
1999 list returned by getmembers().
2000 """
2001 directories = []
2002
2003 if members is None:
2004 members = self
2005
2006 for tarinfo in members:
2007 if tarinfo.isdir():
Lars Gustäbel0192e432008-02-05 11:51:40 +00002008 # Extract directories with a safe mode.
Martin v. Löwis00a73e72005-03-04 19:40:34 +00002009 directories.append(tarinfo)
Lars Gustäbel0192e432008-02-05 11:51:40 +00002010 tarinfo = copy.copy(tarinfo)
2011 tarinfo.mode = 0700
2012 self.extract(tarinfo, path)
Martin v. Löwis00a73e72005-03-04 19:40:34 +00002013
2014 # Reverse sort directories.
Brett Cannon132fc542008-08-04 21:23:07 +00002015 directories.sort(key=operator.attrgetter('name'))
Martin v. Löwis00a73e72005-03-04 19:40:34 +00002016 directories.reverse()
2017
2018 # Set correct owner, mtime and filemode on directories.
2019 for tarinfo in directories:
Lars Gustäbel2ee1c762008-01-04 14:00:33 +00002020 dirpath = os.path.join(path, tarinfo.name)
Martin v. Löwis00a73e72005-03-04 19:40:34 +00002021 try:
Lars Gustäbel2ee1c762008-01-04 14:00:33 +00002022 self.chown(tarinfo, dirpath)
2023 self.utime(tarinfo, dirpath)
2024 self.chmod(tarinfo, dirpath)
Martin v. Löwis00a73e72005-03-04 19:40:34 +00002025 except ExtractError, e:
2026 if self.errorlevel > 1:
2027 raise
2028 else:
2029 self._dbg(1, "tarfile: %s" % e)
2030
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002031 def extract(self, member, path=""):
2032 """Extract a member from the archive to the current working directory,
2033 using its full name. Its file information is extracted as accurately
2034 as possible. `member' may be a filename or a TarInfo object. You can
2035 specify a different directory using `path'.
2036 """
2037 self._check("r")
2038
Lars Gustäbelc64e4022007-03-13 10:47:19 +00002039 if isinstance(member, basestring):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002040 tarinfo = self.getmember(member)
Lars Gustäbelc64e4022007-03-13 10:47:19 +00002041 else:
2042 tarinfo = member
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002043
Neal Norwitza4f651a2004-07-20 22:07:44 +00002044 # Prepare the link target for makelink().
2045 if tarinfo.islnk():
2046 tarinfo._link_target = os.path.join(path, tarinfo.linkname)
2047
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002048 try:
2049 self._extract_member(tarinfo, os.path.join(path, tarinfo.name))
2050 except EnvironmentError, e:
2051 if self.errorlevel > 0:
2052 raise
2053 else:
2054 if e.filename is None:
2055 self._dbg(1, "tarfile: %s" % e.strerror)
2056 else:
2057 self._dbg(1, "tarfile: %s %r" % (e.strerror, e.filename))
2058 except ExtractError, e:
2059 if self.errorlevel > 1:
2060 raise
2061 else:
2062 self._dbg(1, "tarfile: %s" % e)
2063
2064 def extractfile(self, member):
2065 """Extract a member from the archive as a file object. `member' may be
2066 a filename or a TarInfo object. If `member' is a regular file, a
2067 file-like object is returned. If `member' is a link, a file-like
2068 object is constructed from the link's target. If `member' is none of
2069 the above, None is returned.
2070 The file-like object is read-only and provides the following
2071 methods: read(), readline(), readlines(), seek() and tell()
2072 """
2073 self._check("r")
2074
Lars Gustäbelc64e4022007-03-13 10:47:19 +00002075 if isinstance(member, basestring):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002076 tarinfo = self.getmember(member)
Lars Gustäbelc64e4022007-03-13 10:47:19 +00002077 else:
2078 tarinfo = member
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002079
2080 if tarinfo.isreg():
2081 return self.fileobject(self, tarinfo)
2082
2083 elif tarinfo.type not in SUPPORTED_TYPES:
2084 # If a member's type is unknown, it is treated as a
2085 # regular file.
2086 return self.fileobject(self, tarinfo)
2087
2088 elif tarinfo.islnk() or tarinfo.issym():
2089 if isinstance(self.fileobj, _Stream):
2090 # A small but ugly workaround for the case that someone tries
2091 # to extract a (sym)link as a file-object from a non-seekable
2092 # stream of tar blocks.
Georg Brandle4751e32006-05-18 06:11:19 +00002093 raise StreamError("cannot extract (sym)link as file object")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002094 else:
Georg Brandl7eb4b7d2005-07-22 21:49:32 +00002095 # A (sym)link's file object is its target's file object.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002096 return self.extractfile(self._getmember(tarinfo.linkname,
2097 tarinfo))
2098 else:
2099 # If there's no data associated with the member (directory, chrdev,
2100 # blkdev, etc.), return None instead of a file object.
2101 return None
2102
2103 def _extract_member(self, tarinfo, targetpath):
2104 """Extract the TarInfo object tarinfo to a physical
2105 file called targetpath.
2106 """
2107 # Fetch the TarInfo object for the given name
2108 # and build the destination pathname, replacing
2109 # forward slashes to platform specific separators.
Lars Gustäbelf7cda522009-08-28 19:23:44 +00002110 targetpath = targetpath.rstrip("/")
2111 targetpath = targetpath.replace("/", os.sep)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002112
2113 # Create all upper directories.
2114 upperdirs = os.path.dirname(targetpath)
2115 if upperdirs and not os.path.exists(upperdirs):
Lars Gustäbel0192e432008-02-05 11:51:40 +00002116 # Create directories that are not part of the archive with
2117 # default permissions.
Lars Gustäbeld2e22902007-01-23 11:17:33 +00002118 os.makedirs(upperdirs)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002119
2120 if tarinfo.islnk() or tarinfo.issym():
2121 self._dbg(1, "%s -> %s" % (tarinfo.name, tarinfo.linkname))
2122 else:
2123 self._dbg(1, tarinfo.name)
2124
2125 if tarinfo.isreg():
2126 self.makefile(tarinfo, targetpath)
2127 elif tarinfo.isdir():
2128 self.makedir(tarinfo, targetpath)
2129 elif tarinfo.isfifo():
2130 self.makefifo(tarinfo, targetpath)
2131 elif tarinfo.ischr() or tarinfo.isblk():
2132 self.makedev(tarinfo, targetpath)
2133 elif tarinfo.islnk() or tarinfo.issym():
2134 self.makelink(tarinfo, targetpath)
2135 elif tarinfo.type not in SUPPORTED_TYPES:
2136 self.makeunknown(tarinfo, targetpath)
2137 else:
2138 self.makefile(tarinfo, targetpath)
2139
2140 self.chown(tarinfo, targetpath)
2141 if not tarinfo.issym():
2142 self.chmod(tarinfo, targetpath)
2143 self.utime(tarinfo, targetpath)
2144
2145 #--------------------------------------------------------------------------
2146 # Below are the different file methods. They are called via
2147 # _extract_member() when extract() is called. They can be replaced in a
2148 # subclass to implement other functionality.
2149
2150 def makedir(self, tarinfo, targetpath):
2151 """Make a directory called targetpath.
2152 """
2153 try:
Lars Gustäbel0192e432008-02-05 11:51:40 +00002154 # Use a safe mode for the directory, the real mode is set
2155 # later in _extract_member().
2156 os.mkdir(targetpath, 0700)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002157 except EnvironmentError, e:
2158 if e.errno != errno.EEXIST:
2159 raise
2160
2161 def makefile(self, tarinfo, targetpath):
2162 """Make a file called targetpath.
2163 """
2164 source = self.extractfile(tarinfo)
Brett Cannon6cef0762007-05-25 20:17:15 +00002165 target = bltn_open(targetpath, "wb")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002166 copyfileobj(source, target)
2167 source.close()
2168 target.close()
2169
2170 def makeunknown(self, tarinfo, targetpath):
2171 """Make a file from a TarInfo object with an unknown type
2172 at targetpath.
2173 """
2174 self.makefile(tarinfo, targetpath)
2175 self._dbg(1, "tarfile: Unknown file type %r, " \
2176 "extracted as regular file." % tarinfo.type)
2177
2178 def makefifo(self, tarinfo, targetpath):
2179 """Make a fifo called targetpath.
2180 """
2181 if hasattr(os, "mkfifo"):
2182 os.mkfifo(targetpath)
2183 else:
Georg Brandle4751e32006-05-18 06:11:19 +00002184 raise ExtractError("fifo not supported by system")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002185
2186 def makedev(self, tarinfo, targetpath):
2187 """Make a character or block device called targetpath.
2188 """
2189 if not hasattr(os, "mknod") or not hasattr(os, "makedev"):
Georg Brandle4751e32006-05-18 06:11:19 +00002190 raise ExtractError("special devices not supported by system")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002191
2192 mode = tarinfo.mode
2193 if tarinfo.isblk():
2194 mode |= stat.S_IFBLK
2195 else:
2196 mode |= stat.S_IFCHR
2197
2198 os.mknod(targetpath, mode,
2199 os.makedev(tarinfo.devmajor, tarinfo.devminor))
2200
2201 def makelink(self, tarinfo, targetpath):
2202 """Make a (symbolic) link called targetpath. If it cannot be created
2203 (platform limitation), we try to make a copy of the referenced file
2204 instead of a link.
2205 """
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002206 try:
2207 if tarinfo.issym():
Lars Gustäbelf7cda522009-08-28 19:23:44 +00002208 os.symlink(tarinfo.linkname, targetpath)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002209 else:
Neal Norwitza4f651a2004-07-20 22:07:44 +00002210 # See extract().
2211 os.link(tarinfo._link_target, targetpath)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002212 except AttributeError:
2213 if tarinfo.issym():
Lars Gustäbelf7cda522009-08-28 19:23:44 +00002214 linkpath = os.path.dirname(tarinfo.name) + "/" + \
2215 tarinfo.linkname
2216 else:
2217 linkpath = tarinfo.linkname
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002218
2219 try:
2220 self._extract_member(self.getmember(linkpath), targetpath)
2221 except (EnvironmentError, KeyError), e:
Lars Gustäbelf7cda522009-08-28 19:23:44 +00002222 linkpath = linkpath.replace("/", os.sep)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002223 try:
2224 shutil.copy2(linkpath, targetpath)
2225 except EnvironmentError, e:
Georg Brandle4751e32006-05-18 06:11:19 +00002226 raise IOError("link could not be created")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002227
2228 def chown(self, tarinfo, targetpath):
2229 """Set owner of targetpath according to tarinfo.
2230 """
2231 if pwd and hasattr(os, "geteuid") and os.geteuid() == 0:
2232 # We have to be root to do so.
2233 try:
2234 g = grp.getgrnam(tarinfo.gname)[2]
2235 except KeyError:
2236 try:
2237 g = grp.getgrgid(tarinfo.gid)[2]
2238 except KeyError:
2239 g = os.getgid()
2240 try:
2241 u = pwd.getpwnam(tarinfo.uname)[2]
2242 except KeyError:
2243 try:
2244 u = pwd.getpwuid(tarinfo.uid)[2]
2245 except KeyError:
2246 u = os.getuid()
2247 try:
2248 if tarinfo.issym() and hasattr(os, "lchown"):
2249 os.lchown(targetpath, u, g)
2250 else:
Andrew MacIntyre7970d202003-02-19 12:51:34 +00002251 if sys.platform != "os2emx":
2252 os.chown(targetpath, u, g)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002253 except EnvironmentError, e:
Georg Brandle4751e32006-05-18 06:11:19 +00002254 raise ExtractError("could not change owner")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002255
2256 def chmod(self, tarinfo, targetpath):
2257 """Set file permissions of targetpath according to tarinfo.
2258 """
Jack Jansen834eff62003-03-07 12:47:06 +00002259 if hasattr(os, 'chmod'):
2260 try:
2261 os.chmod(targetpath, tarinfo.mode)
2262 except EnvironmentError, e:
Georg Brandle4751e32006-05-18 06:11:19 +00002263 raise ExtractError("could not change mode")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002264
2265 def utime(self, tarinfo, targetpath):
2266 """Set modification time of targetpath according to tarinfo.
2267 """
Jack Jansen834eff62003-03-07 12:47:06 +00002268 if not hasattr(os, 'utime'):
Tim Petersf9347782003-03-07 15:36:41 +00002269 return
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002270 try:
2271 os.utime(targetpath, (tarinfo.mtime, tarinfo.mtime))
2272 except EnvironmentError, e:
Georg Brandle4751e32006-05-18 06:11:19 +00002273 raise ExtractError("could not change modification time")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002274
2275 #--------------------------------------------------------------------------
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002276 def next(self):
2277 """Return the next member of the archive as a TarInfo object, when
2278 TarFile is opened for reading. Return None if there is no more
2279 available.
2280 """
2281 self._check("ra")
2282 if self.firstmember is not None:
2283 m = self.firstmember
2284 self.firstmember = None
2285 return m
2286
2287 # Read the next block.
Andrew M. Kuchling864bba12004-07-10 22:02:11 +00002288 self.fileobj.seek(self.offset)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002289 while True:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002290 try:
Lars Gustäbelc64e4022007-03-13 10:47:19 +00002291 tarinfo = self.tarinfo.fromtarfile(self)
2292 if tarinfo is None:
2293 return
2294 self.members.append(tarinfo)
Georg Brandl38c6a222006-05-10 16:26:03 +00002295
Georg Brandlebbeed72006-12-19 22:06:46 +00002296 except HeaderError, e:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002297 if self.ignore_zeros:
Georg Brandlebbeed72006-12-19 22:06:46 +00002298 self._dbg(2, "0x%X: %s" % (self.offset, e))
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002299 self.offset += BLOCKSIZE
2300 continue
2301 else:
Andrew M. Kuchling864bba12004-07-10 22:02:11 +00002302 if self.offset == 0:
Georg Brandlebbeed72006-12-19 22:06:46 +00002303 raise ReadError(str(e))
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002304 return None
2305 break
2306
Georg Brandl38c6a222006-05-10 16:26:03 +00002307 return tarinfo
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002308
2309 #--------------------------------------------------------------------------
2310 # Little helper methods:
2311
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002312 def _getmember(self, name, tarinfo=None):
2313 """Find an archive member by name from bottom to top.
2314 If tarinfo is given, it is used as the starting point.
2315 """
Martin v. Löwisf3c56112004-09-18 09:08:52 +00002316 # Ensure that all members have been loaded.
2317 members = self.getmembers()
2318
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002319 if tarinfo is None:
Martin v. Löwisf3c56112004-09-18 09:08:52 +00002320 end = len(members)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002321 else:
Martin v. Löwisf3c56112004-09-18 09:08:52 +00002322 end = members.index(tarinfo)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002323
2324 for i in xrange(end - 1, -1, -1):
Martin v. Löwisf3c56112004-09-18 09:08:52 +00002325 if name == members[i].name:
2326 return members[i]
Andrew M. Kuchling864bba12004-07-10 22:02:11 +00002327
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002328 def _load(self):
2329 """Read through the entire archive file and look for readable
2330 members.
2331 """
2332 while True:
2333 tarinfo = self.next()
2334 if tarinfo is None:
2335 break
2336 self._loaded = True
2337
2338 def _check(self, mode=None):
2339 """Check if TarFile is still open, and if the operation's mode
2340 corresponds to TarFile's mode.
2341 """
2342 if self.closed:
Georg Brandle4751e32006-05-18 06:11:19 +00002343 raise IOError("%s is closed" % self.__class__.__name__)
Lars Gustäbelc64e4022007-03-13 10:47:19 +00002344 if mode is not None and self.mode not in mode:
2345 raise IOError("bad operation for mode %r" % self.mode)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002346
2347 def __iter__(self):
2348 """Provide an iterator object.
2349 """
2350 if self._loaded:
2351 return iter(self.members)
2352 else:
2353 return TarIter(self)
2354
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002355 def _dbg(self, level, msg):
2356 """Write debugging output to sys.stderr.
2357 """
2358 if level <= self.debug:
2359 print >> sys.stderr, msg
2360# class TarFile
2361
2362class TarIter:
2363 """Iterator Class.
2364
2365 for tarinfo in TarFile(...):
2366 suite...
2367 """
2368
2369 def __init__(self, tarfile):
2370 """Construct a TarIter object.
2371 """
2372 self.tarfile = tarfile
Martin v. Löwis637431b2005-03-03 23:12:42 +00002373 self.index = 0
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002374 def __iter__(self):
2375 """Return iterator object.
2376 """
2377 return self
2378 def next(self):
2379 """Return the next item using TarFile's next() method.
2380 When all members have been read, set TarFile as _loaded.
2381 """
Martin v. Löwis637431b2005-03-03 23:12:42 +00002382 # Fix for SF #1100429: Under rare circumstances it can
2383 # happen that getmembers() is called during iteration,
2384 # which will cause TarIter to stop prematurely.
2385 if not self.tarfile._loaded:
2386 tarinfo = self.tarfile.next()
2387 if not tarinfo:
2388 self.tarfile._loaded = True
2389 raise StopIteration
2390 else:
2391 try:
2392 tarinfo = self.tarfile.members[self.index]
2393 except IndexError:
2394 raise StopIteration
2395 self.index += 1
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002396 return tarinfo
2397
2398# Helper classes for sparse file support
2399class _section:
2400 """Base class for _data and _hole.
2401 """
2402 def __init__(self, offset, size):
2403 self.offset = offset
2404 self.size = size
2405 def __contains__(self, offset):
2406 return self.offset <= offset < self.offset + self.size
2407
2408class _data(_section):
2409 """Represent a data section in a sparse file.
2410 """
2411 def __init__(self, offset, size, realpos):
2412 _section.__init__(self, offset, size)
2413 self.realpos = realpos
2414
2415class _hole(_section):
2416 """Represent a hole section in a sparse file.
2417 """
2418 pass
2419
2420class _ringbuffer(list):
2421 """Ringbuffer class which increases performance
2422 over a regular list.
2423 """
2424 def __init__(self):
2425 self.idx = 0
2426 def find(self, offset):
2427 idx = self.idx
2428 while True:
2429 item = self[idx]
2430 if offset in item:
2431 break
2432 idx += 1
2433 if idx == len(self):
2434 idx = 0
2435 if idx == self.idx:
2436 # End of File
2437 return None
2438 self.idx = idx
2439 return item
2440
2441#---------------------------------------------
2442# zipfile compatible TarFile class
2443#---------------------------------------------
2444TAR_PLAIN = 0 # zipfile.ZIP_STORED
2445TAR_GZIPPED = 8 # zipfile.ZIP_DEFLATED
2446class TarFileCompat:
2447 """TarFile class compatible with standard module zipfile's
2448 ZipFile class.
2449 """
2450 def __init__(self, file, mode="r", compression=TAR_PLAIN):
Lars Gustäbel727bd0b2008-08-02 11:26:39 +00002451 from warnings import warnpy3k
2452 warnpy3k("the TarFileCompat class has been removed in Python 3.0",
2453 stacklevel=2)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002454 if compression == TAR_PLAIN:
2455 self.tarfile = TarFile.taropen(file, mode)
2456 elif compression == TAR_GZIPPED:
2457 self.tarfile = TarFile.gzopen(file, mode)
2458 else:
Georg Brandle4751e32006-05-18 06:11:19 +00002459 raise ValueError("unknown compression constant")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002460 if mode[0:1] == "r":
2461 members = self.tarfile.getmembers()
Raymond Hettingera1d09e22005-09-11 16:34:05 +00002462 for m in members:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002463 m.filename = m.name
2464 m.file_size = m.size
2465 m.date_time = time.gmtime(m.mtime)[:6]
2466 def namelist(self):
2467 return map(lambda m: m.name, self.infolist())
2468 def infolist(self):
2469 return filter(lambda m: m.type in REGULAR_TYPES,
2470 self.tarfile.getmembers())
2471 def printdir(self):
2472 self.tarfile.list()
2473 def testzip(self):
2474 return
2475 def getinfo(self, name):
2476 return self.tarfile.getmember(name)
2477 def read(self, name):
2478 return self.tarfile.extractfile(self.tarfile.getmember(name)).read()
2479 def write(self, filename, arcname=None, compress_type=None):
2480 self.tarfile.add(filename, arcname)
2481 def writestr(self, zinfo, bytes):
Raymond Hettingera6172712004-12-31 19:15:26 +00002482 try:
2483 from cStringIO import StringIO
2484 except ImportError:
2485 from StringIO import StringIO
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002486 import calendar
Lars Gustäbel727bd0b2008-08-02 11:26:39 +00002487 tinfo = TarInfo(zinfo.filename)
2488 tinfo.size = len(bytes)
2489 tinfo.mtime = calendar.timegm(zinfo.date_time)
2490 self.tarfile.addfile(tinfo, StringIO(bytes))
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002491 def close(self):
2492 self.tarfile.close()
2493#class TarFileCompat
2494
2495#--------------------
2496# exported functions
2497#--------------------
2498def is_tarfile(name):
2499 """Return True if name points to a tar archive that we
2500 are able to handle, else return False.
2501 """
2502 try:
2503 t = open(name)
2504 t.close()
2505 return True
2506 except TarError:
2507 return False
2508
Brett Cannon6cef0762007-05-25 20:17:15 +00002509bltn_open = open
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002510open = TarFile.open