blob: efade27e119d05d9babc3ab8338d951c1147ea9e [file] [log] [blame]
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001#!/usr/bin/env python
2# -*- coding: iso-8859-1 -*-
3#-------------------------------------------------------------------
4# tarfile.py
5#-------------------------------------------------------------------
6# Copyright (C) 2002 Lars Gustäbel <lars@gustaebel.de>
7# All rights reserved.
8#
9# Permission is hereby granted, free of charge, to any person
10# obtaining a copy of this software and associated documentation
11# files (the "Software"), to deal in the Software without
12# restriction, including without limitation the rights to use,
13# copy, modify, merge, publish, distribute, sublicense, and/or sell
14# copies of the Software, and to permit persons to whom the
15# Software is furnished to do so, subject to the following
16# conditions:
17#
18# The above copyright notice and this permission notice shall be
19# included in all copies or substantial portions of the Software.
20#
21# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
22# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
23# OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
24# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
25# HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
26# WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
27# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
28# OTHER DEALINGS IN THE SOFTWARE.
29#
30"""Read from and write to tar format archives.
31"""
32
33__version__ = "$Revision$"
34# $Source$
35
Guido van Rossumd8faa362007-04-27 19:54:29 +000036version = "0.9.0"
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +000037__author__ = "Lars Gustäbel (lars@gustaebel.de)"
38__date__ = "$Date$"
39__cvsid__ = "$Id$"
40__credits__ = "Gustavo Niemeyer, Niels Gustäbel, Richard Townsend."
41
42#---------
43# Imports
44#---------
45import sys
46import os
47import shutil
48import stat
49import errno
50import time
51import struct
Thomas Wouters89f507f2006-12-13 04:49:30 +000052import copy
Guido van Rossumd8faa362007-04-27 19:54:29 +000053import re
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +000054
Jack Jansencfc49022003-03-07 13:37:32 +000055if sys.platform == 'mac':
56 # This module needs work for MacOS9, especially in the area of pathname
57 # handling. In many places it is assumed a simple substitution of / by the
58 # local os.path.sep is good enough to convert pathnames, but this does not
59 # work with the mac rooted:path:name versus :nonrooted:path:name syntax
60 raise ImportError, "tarfile does not work for platform==mac"
61
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +000062try:
63 import grp, pwd
64except ImportError:
65 grp = pwd = None
66
67# from tarfile import *
68__all__ = ["TarFile", "TarInfo", "is_tarfile", "TarError"]
69
Guido van Rossum8f78fe92006-08-24 04:03:53 +000070from __builtin__ import open as _open # Since 'open' is TarFile.open
71
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +000072#---------------------------------------------------------
73# tar constants
74#---------------------------------------------------------
Guido van Rossumd8faa362007-04-27 19:54:29 +000075NUL = "\0" # the null character
76BLOCKSIZE = 512 # length of processing blocks
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +000077RECORDSIZE = BLOCKSIZE * 20 # length of records
Guido van Rossumd8faa362007-04-27 19:54:29 +000078GNU_MAGIC = "ustar \0" # magic gnu tar string
79POSIX_MAGIC = "ustar\x0000" # magic posix tar string
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +000080
Guido van Rossumd8faa362007-04-27 19:54:29 +000081LENGTH_NAME = 100 # maximum length of a filename
82LENGTH_LINK = 100 # maximum length of a linkname
83LENGTH_PREFIX = 155 # maximum length of the prefix field
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +000084
Guido van Rossumd8faa362007-04-27 19:54:29 +000085REGTYPE = "0" # regular file
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +000086AREGTYPE = "\0" # regular file
Guido van Rossumd8faa362007-04-27 19:54:29 +000087LNKTYPE = "1" # link (inside tarfile)
88SYMTYPE = "2" # symbolic link
89CHRTYPE = "3" # character special device
90BLKTYPE = "4" # block special device
91DIRTYPE = "5" # directory
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +000092FIFOTYPE = "6" # fifo special device
93CONTTYPE = "7" # contiguous file
94
Guido van Rossumd8faa362007-04-27 19:54:29 +000095GNUTYPE_LONGNAME = "L" # GNU tar longname
96GNUTYPE_LONGLINK = "K" # GNU tar longlink
97GNUTYPE_SPARSE = "S" # GNU tar sparse file
98
99XHDTYPE = "x" # POSIX.1-2001 extended header
100XGLTYPE = "g" # POSIX.1-2001 global header
101SOLARIS_XHDTYPE = "X" # Solaris extended header
102
103USTAR_FORMAT = 0 # POSIX.1-1988 (ustar) format
104GNU_FORMAT = 1 # GNU tar format
105PAX_FORMAT = 2 # POSIX.1-2001 (pax) format
106DEFAULT_FORMAT = GNU_FORMAT
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000107
108#---------------------------------------------------------
109# tarfile constants
110#---------------------------------------------------------
Guido van Rossumd8faa362007-04-27 19:54:29 +0000111# File types that tarfile supports:
112SUPPORTED_TYPES = (REGTYPE, AREGTYPE, LNKTYPE,
113 SYMTYPE, DIRTYPE, FIFOTYPE,
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000114 CONTTYPE, CHRTYPE, BLKTYPE,
115 GNUTYPE_LONGNAME, GNUTYPE_LONGLINK,
116 GNUTYPE_SPARSE)
117
Guido van Rossumd8faa362007-04-27 19:54:29 +0000118# File types that will be treated as a regular file.
119REGULAR_TYPES = (REGTYPE, AREGTYPE,
120 CONTTYPE, GNUTYPE_SPARSE)
121
122# File types that are part of the GNU tar format.
123GNU_TYPES = (GNUTYPE_LONGNAME, GNUTYPE_LONGLINK,
124 GNUTYPE_SPARSE)
125
126# Fields from a pax header that override a TarInfo attribute.
127PAX_FIELDS = ("path", "linkpath", "size", "mtime",
128 "uid", "gid", "uname", "gname")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000129
130#---------------------------------------------------------
131# Bits used in the mode field, values in octal.
132#---------------------------------------------------------
133S_IFLNK = 0120000 # symbolic link
134S_IFREG = 0100000 # regular file
135S_IFBLK = 0060000 # block device
136S_IFDIR = 0040000 # directory
137S_IFCHR = 0020000 # character device
138S_IFIFO = 0010000 # fifo
139
140TSUID = 04000 # set UID on execution
141TSGID = 02000 # set GID on execution
142TSVTX = 01000 # reserved
143
144TUREAD = 0400 # read by owner
145TUWRITE = 0200 # write by owner
146TUEXEC = 0100 # execute/search by owner
147TGREAD = 0040 # read by group
148TGWRITE = 0020 # write by group
149TGEXEC = 0010 # execute/search by group
150TOREAD = 0004 # read by other
151TOWRITE = 0002 # write by other
152TOEXEC = 0001 # execute/search by other
153
154#---------------------------------------------------------
Guido van Rossumd8faa362007-04-27 19:54:29 +0000155# initialization
156#---------------------------------------------------------
157ENCODING = sys.getfilesystemencoding()
158if ENCODING is None:
159 ENCODING = "ascii"
160
161#---------------------------------------------------------
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000162# Some useful functions
163#---------------------------------------------------------
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000164
Thomas Wouters477c8d52006-05-27 19:21:47 +0000165def stn(s, length):
166 """Convert a python string to a null-terminated string buffer.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000167 """
Thomas Wouters89f507f2006-12-13 04:49:30 +0000168 return s[:length] + (length - len(s)) * NUL
Thomas Wouters477c8d52006-05-27 19:21:47 +0000169
Guido van Rossumd8faa362007-04-27 19:54:29 +0000170def nts(s):
171 """Convert a null-terminated string field to a python string.
172 """
173 # Use the string up to the first null char.
174 p = s.find("\0")
175 if p == -1:
176 return s
177 return s[:p]
178
Thomas Wouters477c8d52006-05-27 19:21:47 +0000179def nti(s):
180 """Convert a number field to a python number.
181 """
182 # There are two possible encodings for a number field, see
183 # itn() below.
184 if s[0] != chr(0200):
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000185 try:
Guido van Rossumd8faa362007-04-27 19:54:29 +0000186 n = int(nts(s) or "0", 8)
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000187 except ValueError:
188 raise HeaderError("invalid header")
Thomas Wouters477c8d52006-05-27 19:21:47 +0000189 else:
Guido van Rossume2a383d2007-01-15 16:59:06 +0000190 n = 0
Thomas Wouters477c8d52006-05-27 19:21:47 +0000191 for i in xrange(len(s) - 1):
192 n <<= 8
193 n += ord(s[i + 1])
194 return n
195
Guido van Rossumd8faa362007-04-27 19:54:29 +0000196def itn(n, digits=8, format=DEFAULT_FORMAT):
Thomas Wouters477c8d52006-05-27 19:21:47 +0000197 """Convert a python number to a number field.
198 """
199 # POSIX 1003.1-1988 requires numbers to be encoded as a string of
200 # octal digits followed by a null-byte, this allows values up to
201 # (8**(digits-1))-1. GNU tar allows storing numbers greater than
202 # that if necessary. A leading 0200 byte indicates this particular
203 # encoding, the following digits-1 bytes are a big-endian
204 # representation. This allows values up to (256**(digits-1))-1.
205 if 0 <= n < 8 ** (digits - 1):
206 s = "%0*o" % (digits - 1, n) + NUL
207 else:
Guido van Rossumd8faa362007-04-27 19:54:29 +0000208 if format != GNU_FORMAT or n >= 256 ** (digits - 1):
Thomas Wouters477c8d52006-05-27 19:21:47 +0000209 raise ValueError("overflow in number field")
210
211 if n < 0:
212 # XXX We mimic GNU tar's behaviour with negative numbers,
213 # this could raise OverflowError.
214 n = struct.unpack("L", struct.pack("l", n))[0]
215
216 s = ""
217 for i in xrange(digits - 1):
218 s = chr(n & 0377) + s
219 n >>= 8
220 s = chr(0200) + s
221 return s
222
223def calc_chksums(buf):
224 """Calculate the checksum for a member's header by summing up all
225 characters except for the chksum field which is treated as if
226 it was filled with spaces. According to the GNU tar sources,
227 some tars (Sun and NeXT) calculate chksum with signed char,
228 which will be different if there are chars in the buffer with
229 the high bit set. So we calculate two checksums, unsigned and
230 signed.
231 """
232 unsigned_chksum = 256 + sum(struct.unpack("148B", buf[:148]) + struct.unpack("356B", buf[156:512]))
233 signed_chksum = 256 + sum(struct.unpack("148b", buf[:148]) + struct.unpack("356b", buf[156:512]))
234 return unsigned_chksum, signed_chksum
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000235
236def copyfileobj(src, dst, length=None):
237 """Copy length bytes from fileobj src to fileobj dst.
238 If length is None, copy the entire content.
239 """
240 if length == 0:
241 return
242 if length is None:
243 shutil.copyfileobj(src, dst)
244 return
245
246 BUFSIZE = 16 * 1024
247 blocks, remainder = divmod(length, BUFSIZE)
248 for b in xrange(blocks):
249 buf = src.read(BUFSIZE)
250 if len(buf) < BUFSIZE:
Thomas Wouters477c8d52006-05-27 19:21:47 +0000251 raise IOError("end of file reached")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000252 dst.write(buf)
253
254 if remainder != 0:
255 buf = src.read(remainder)
256 if len(buf) < remainder:
Thomas Wouters477c8d52006-05-27 19:21:47 +0000257 raise IOError("end of file reached")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000258 dst.write(buf)
259 return
260
261filemode_table = (
Andrew M. Kuchling8bc462f2004-10-20 11:48:42 +0000262 ((S_IFLNK, "l"),
263 (S_IFREG, "-"),
264 (S_IFBLK, "b"),
265 (S_IFDIR, "d"),
266 (S_IFCHR, "c"),
267 (S_IFIFO, "p")),
268
269 ((TUREAD, "r"),),
270 ((TUWRITE, "w"),),
271 ((TUEXEC|TSUID, "s"),
272 (TSUID, "S"),
273 (TUEXEC, "x")),
274
275 ((TGREAD, "r"),),
276 ((TGWRITE, "w"),),
277 ((TGEXEC|TSGID, "s"),
278 (TSGID, "S"),
279 (TGEXEC, "x")),
280
281 ((TOREAD, "r"),),
282 ((TOWRITE, "w"),),
283 ((TOEXEC|TSVTX, "t"),
284 (TSVTX, "T"),
285 (TOEXEC, "x"))
286)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000287
288def filemode(mode):
289 """Convert a file's mode to a string of the form
290 -rwxrwxrwx.
291 Used by TarFile.list()
292 """
Andrew M. Kuchling8bc462f2004-10-20 11:48:42 +0000293 perm = []
294 for table in filemode_table:
295 for bit, char in table:
296 if mode & bit == bit:
297 perm.append(char)
298 break
299 else:
300 perm.append("-")
301 return "".join(perm)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000302
303if os.sep != "/":
304 normpath = lambda path: os.path.normpath(path).replace(os.sep, "/")
305else:
306 normpath = os.path.normpath
307
308class TarError(Exception):
309 """Base exception."""
310 pass
311class ExtractError(TarError):
312 """General exception for extract errors."""
313 pass
314class ReadError(TarError):
315 """Exception for unreadble tar archives."""
316 pass
317class CompressionError(TarError):
318 """Exception for unavailable compression methods."""
319 pass
320class StreamError(TarError):
321 """Exception for unsupported operations on stream-like TarFiles."""
322 pass
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000323class HeaderError(TarError):
324 """Exception for invalid headers."""
325 pass
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000326
327#---------------------------
328# internal stream interface
329#---------------------------
330class _LowLevelFile:
331 """Low-level file object. Supports reading and writing.
332 It is used instead of a regular file object for streaming
333 access.
334 """
335
336 def __init__(self, name, mode):
337 mode = {
338 "r": os.O_RDONLY,
339 "w": os.O_WRONLY | os.O_CREAT | os.O_TRUNC,
340 }[mode]
341 if hasattr(os, "O_BINARY"):
342 mode |= os.O_BINARY
343 self.fd = os.open(name, mode)
344
345 def close(self):
346 os.close(self.fd)
347
348 def read(self, size):
349 return os.read(self.fd, size)
350
351 def write(self, s):
352 os.write(self.fd, s)
353
354class _Stream:
355 """Class that serves as an adapter between TarFile and
356 a stream-like object. The stream-like object only
357 needs to have a read() or write() method and is accessed
358 blockwise. Use of gzip or bzip2 compression is possible.
359 A stream-like object could be for example: sys.stdin,
360 sys.stdout, a socket, a tape device etc.
361
362 _Stream is intended to be used only internally.
363 """
364
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000365 def __init__(self, name, mode, comptype, fileobj, bufsize):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000366 """Construct a _Stream object.
367 """
368 self._extfileobj = True
369 if fileobj is None:
370 fileobj = _LowLevelFile(name, mode)
371 self._extfileobj = False
372
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000373 if comptype == '*':
374 # Enable transparent compression detection for the
375 # stream interface
376 fileobj = _StreamProxy(fileobj)
377 comptype = fileobj.getcomptype()
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000378
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000379 self.name = name or ""
380 self.mode = mode
381 self.comptype = comptype
382 self.fileobj = fileobj
383 self.bufsize = bufsize
384 self.buf = ""
Guido van Rossume2a383d2007-01-15 16:59:06 +0000385 self.pos = 0
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000386 self.closed = False
387
388 if comptype == "gz":
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000389 try:
390 import zlib
391 except ImportError:
Thomas Wouters477c8d52006-05-27 19:21:47 +0000392 raise CompressionError("zlib module is not available")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000393 self.zlib = zlib
394 self.crc = zlib.crc32("")
395 if mode == "r":
396 self._init_read_gz()
397 else:
398 self._init_write_gz()
399
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000400 if comptype == "bz2":
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000401 try:
402 import bz2
403 except ImportError:
Thomas Wouters477c8d52006-05-27 19:21:47 +0000404 raise CompressionError("bz2 module is not available")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000405 if mode == "r":
406 self.dbuf = ""
407 self.cmp = bz2.BZ2Decompressor()
408 else:
409 self.cmp = bz2.BZ2Compressor()
410
411 def __del__(self):
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000412 if hasattr(self, "closed") and not self.closed:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000413 self.close()
414
415 def _init_write_gz(self):
416 """Initialize for writing with gzip compression.
417 """
418 self.cmp = self.zlib.compressobj(9, self.zlib.DEFLATED,
419 -self.zlib.MAX_WBITS,
420 self.zlib.DEF_MEM_LEVEL,
421 0)
Guido van Rossume2a383d2007-01-15 16:59:06 +0000422 timestamp = struct.pack("<L", int(time.time()))
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000423 self.__write("\037\213\010\010%s\002\377" % timestamp)
424 if self.name.endswith(".gz"):
425 self.name = self.name[:-3]
426 self.__write(self.name + NUL)
427
428 def write(self, s):
429 """Write string s to the stream.
430 """
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000431 if self.comptype == "gz":
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000432 self.crc = self.zlib.crc32(s, self.crc)
433 self.pos += len(s)
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000434 if self.comptype != "tar":
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000435 s = self.cmp.compress(s)
436 self.__write(s)
437
438 def __write(self, s):
439 """Write string s to the stream if a whole new block
440 is ready to be written.
441 """
442 self.buf += s
443 while len(self.buf) > self.bufsize:
444 self.fileobj.write(self.buf[:self.bufsize])
445 self.buf = self.buf[self.bufsize:]
446
447 def close(self):
448 """Close the _Stream object. No operation should be
449 done on it afterwards.
450 """
451 if self.closed:
452 return
453
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000454 if self.mode == "w" and self.comptype != "tar":
Martin v. Löwisc234a522004-08-22 21:28:33 +0000455 self.buf += self.cmp.flush()
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000456
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000457 if self.mode == "w" and self.buf:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000458 self.fileobj.write(self.buf)
459 self.buf = ""
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000460 if self.comptype == "gz":
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000461 # The native zlib crc is an unsigned 32-bit integer, but
462 # the Python wrapper implicitly casts that to a signed C
463 # long. So, on a 32-bit box self.crc may "look negative",
464 # while the same crc on a 64-bit box may "look positive".
465 # To avoid irksome warnings from the `struct` module, force
466 # it to look positive on all boxes.
Guido van Rossume2a383d2007-01-15 16:59:06 +0000467 self.fileobj.write(struct.pack("<L", self.crc & 0xffffffff))
468 self.fileobj.write(struct.pack("<L", self.pos & 0xffffFFFF))
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000469
470 if not self._extfileobj:
471 self.fileobj.close()
472
473 self.closed = True
474
475 def _init_read_gz(self):
476 """Initialize for reading a gzip compressed fileobj.
477 """
478 self.cmp = self.zlib.decompressobj(-self.zlib.MAX_WBITS)
479 self.dbuf = ""
480
481 # taken from gzip.GzipFile with some alterations
482 if self.__read(2) != "\037\213":
Thomas Wouters477c8d52006-05-27 19:21:47 +0000483 raise ReadError("not a gzip file")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000484 if self.__read(1) != "\010":
Thomas Wouters477c8d52006-05-27 19:21:47 +0000485 raise CompressionError("unsupported compression method")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000486
487 flag = ord(self.__read(1))
488 self.__read(6)
489
490 if flag & 4:
491 xlen = ord(self.__read(1)) + 256 * ord(self.__read(1))
492 self.read(xlen)
493 if flag & 8:
494 while True:
495 s = self.__read(1)
496 if not s or s == NUL:
497 break
498 if flag & 16:
499 while True:
500 s = self.__read(1)
501 if not s or s == NUL:
502 break
503 if flag & 2:
504 self.__read(2)
505
506 def tell(self):
507 """Return the stream's file pointer position.
508 """
509 return self.pos
510
511 def seek(self, pos=0):
512 """Set the stream's file pointer to pos. Negative seeking
513 is forbidden.
514 """
515 if pos - self.pos >= 0:
516 blocks, remainder = divmod(pos - self.pos, self.bufsize)
517 for i in xrange(blocks):
518 self.read(self.bufsize)
519 self.read(remainder)
520 else:
Thomas Wouters477c8d52006-05-27 19:21:47 +0000521 raise StreamError("seeking backwards is not allowed")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000522 return self.pos
523
524 def read(self, size=None):
525 """Return the next size number of bytes from the stream.
526 If size is not defined, return all bytes of the stream
527 up to EOF.
528 """
529 if size is None:
530 t = []
531 while True:
532 buf = self._read(self.bufsize)
533 if not buf:
534 break
535 t.append(buf)
536 buf = "".join(t)
537 else:
538 buf = self._read(size)
539 self.pos += len(buf)
540 return buf
541
542 def _read(self, size):
543 """Return size bytes from the stream.
544 """
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000545 if self.comptype == "tar":
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000546 return self.__read(size)
547
548 c = len(self.dbuf)
549 t = [self.dbuf]
550 while c < size:
551 buf = self.__read(self.bufsize)
552 if not buf:
553 break
Guido van Rossumd8faa362007-04-27 19:54:29 +0000554 try:
555 buf = self.cmp.decompress(buf)
556 except IOError:
557 raise ReadError("invalid compressed data")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000558 t.append(buf)
559 c += len(buf)
560 t = "".join(t)
561 self.dbuf = t[size:]
562 return t[:size]
563
564 def __read(self, size):
565 """Return size bytes from stream. If internal buffer is empty,
566 read another block from the stream.
567 """
568 c = len(self.buf)
569 t = [self.buf]
570 while c < size:
571 buf = self.fileobj.read(self.bufsize)
572 if not buf:
573 break
574 t.append(buf)
575 c += len(buf)
576 t = "".join(t)
577 self.buf = t[size:]
578 return t[:size]
579# class _Stream
580
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000581class _StreamProxy(object):
582 """Small proxy class that enables transparent compression
583 detection for the Stream interface (mode 'r|*').
584 """
585
586 def __init__(self, fileobj):
587 self.fileobj = fileobj
588 self.buf = self.fileobj.read(BLOCKSIZE)
589
590 def read(self, size):
591 self.read = self.fileobj.read
592 return self.buf
593
594 def getcomptype(self):
595 if self.buf.startswith("\037\213\010"):
596 return "gz"
597 if self.buf.startswith("BZh91"):
598 return "bz2"
599 return "tar"
600
601 def close(self):
602 self.fileobj.close()
603# class StreamProxy
604
Thomas Wouters477c8d52006-05-27 19:21:47 +0000605class _BZ2Proxy(object):
606 """Small proxy class that enables external file object
607 support for "r:bz2" and "w:bz2" modes. This is actually
608 a workaround for a limitation in bz2 module's BZ2File
609 class which (unlike gzip.GzipFile) has no support for
610 a file object argument.
611 """
612
613 blocksize = 16 * 1024
614
615 def __init__(self, fileobj, mode):
616 self.fileobj = fileobj
617 self.mode = mode
Guido van Rossumd8faa362007-04-27 19:54:29 +0000618 self.name = getattr(self.fileobj, "name", None)
Thomas Wouters477c8d52006-05-27 19:21:47 +0000619 self.init()
620
621 def init(self):
622 import bz2
623 self.pos = 0
624 if self.mode == "r":
625 self.bz2obj = bz2.BZ2Decompressor()
626 self.fileobj.seek(0)
627 self.buf = ""
628 else:
629 self.bz2obj = bz2.BZ2Compressor()
630
631 def read(self, size):
632 b = [self.buf]
633 x = len(self.buf)
634 while x < size:
635 try:
636 raw = self.fileobj.read(self.blocksize)
637 data = self.bz2obj.decompress(raw)
638 b.append(data)
639 except EOFError:
640 break
641 x += len(data)
642 self.buf = "".join(b)
643
644 buf = self.buf[:size]
645 self.buf = self.buf[size:]
646 self.pos += len(buf)
647 return buf
648
649 def seek(self, pos):
650 if pos < self.pos:
651 self.init()
652 self.read(pos - self.pos)
653
654 def tell(self):
655 return self.pos
656
657 def write(self, data):
658 self.pos += len(data)
659 raw = self.bz2obj.compress(data)
660 self.fileobj.write(raw)
661
662 def close(self):
663 if self.mode == "w":
664 raw = self.bz2obj.flush()
665 self.fileobj.write(raw)
666 self.fileobj.close()
667# class _BZ2Proxy
668
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000669#------------------------
670# Extraction file object
671#------------------------
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000672class _FileInFile(object):
673 """A thin wrapper around an existing file object that
674 provides a part of its data as an individual file
675 object.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000676 """
677
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000678 def __init__(self, fileobj, offset, size, sparse=None):
679 self.fileobj = fileobj
680 self.offset = offset
681 self.size = size
682 self.sparse = sparse
683 self.position = 0
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000684
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000685 def tell(self):
686 """Return the current file position.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000687 """
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000688 return self.position
689
690 def seek(self, position):
691 """Seek to a position in the file.
692 """
693 self.position = position
694
695 def read(self, size=None):
696 """Read data from the file.
697 """
698 if size is None:
699 size = self.size - self.position
700 else:
701 size = min(size, self.size - self.position)
702
703 if self.sparse is None:
704 return self.readnormal(size)
705 else:
706 return self.readsparse(size)
707
708 def readnormal(self, size):
709 """Read operation for regular files.
710 """
711 self.fileobj.seek(self.offset + self.position)
712 self.position += size
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000713 return self.fileobj.read(size)
714
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000715 def readsparse(self, size):
716 """Read operation for sparse files.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000717 """
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000718 data = []
719 while size > 0:
720 buf = self.readsparsesection(size)
721 if not buf:
722 break
723 size -= len(buf)
724 data.append(buf)
725 return "".join(data)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000726
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000727 def readsparsesection(self, size):
728 """Read a single section of a sparse file.
729 """
730 section = self.sparse.find(self.position)
731
732 if section is None:
733 return ""
734
735 size = min(size, section.offset + section.size - self.position)
736
737 if isinstance(section, _data):
738 realpos = section.realpos + self.position - section.offset
739 self.fileobj.seek(self.offset + realpos)
740 self.position += size
741 return self.fileobj.read(size)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000742 else:
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000743 self.position += size
744 return NUL * size
745#class _FileInFile
746
747
748class ExFileObject(object):
749 """File-like object for reading an archive member.
750 Is returned by TarFile.extractfile().
751 """
752 blocksize = 1024
753
754 def __init__(self, tarfile, tarinfo):
755 self.fileobj = _FileInFile(tarfile.fileobj,
756 tarinfo.offset_data,
757 tarinfo.size,
758 getattr(tarinfo, "sparse", None))
759 self.name = tarinfo.name
760 self.mode = "r"
761 self.closed = False
762 self.size = tarinfo.size
763
764 self.position = 0
765 self.buffer = ""
766
767 def read(self, size=None):
768 """Read at most size bytes from the file. If size is not
769 present or None, read all data until EOF is reached.
770 """
771 if self.closed:
772 raise ValueError("I/O operation on closed file")
773
774 buf = ""
775 if self.buffer:
776 if size is None:
777 buf = self.buffer
778 self.buffer = ""
779 else:
780 buf = self.buffer[:size]
781 self.buffer = self.buffer[size:]
782
783 if size is None:
784 buf += self.fileobj.read()
785 else:
786 buf += self.fileobj.read(size - len(buf))
787
788 self.position += len(buf)
789 return buf
790
791 def readline(self, size=-1):
792 """Read one entire line from the file. If size is present
793 and non-negative, return a string with at most that
794 size, which may be an incomplete line.
795 """
796 if self.closed:
797 raise ValueError("I/O operation on closed file")
798
799 if "\n" in self.buffer:
800 pos = self.buffer.find("\n") + 1
801 else:
802 buffers = [self.buffer]
803 while True:
804 buf = self.fileobj.read(self.blocksize)
805 buffers.append(buf)
806 if not buf or "\n" in buf:
807 self.buffer = "".join(buffers)
808 pos = self.buffer.find("\n") + 1
809 if pos == 0:
810 # no newline found.
811 pos = len(self.buffer)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000812 break
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000813
814 if size != -1:
815 pos = min(size, pos)
816
817 buf = self.buffer[:pos]
818 self.buffer = self.buffer[pos:]
819 self.position += len(buf)
820 return buf
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000821
822 def readlines(self):
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000823 """Return a list with all remaining lines.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000824 """
825 result = []
826 while True:
827 line = self.readline()
828 if not line: break
829 result.append(line)
830 return result
831
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000832 def tell(self):
833 """Return the current file position.
834 """
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000835 if self.closed:
836 raise ValueError("I/O operation on closed file")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000837
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000838 return self.position
839
840 def seek(self, pos, whence=os.SEEK_SET):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000841 """Seek to a position in the file.
842 """
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000843 if self.closed:
844 raise ValueError("I/O operation on closed file")
845
846 if whence == os.SEEK_SET:
847 self.position = min(max(pos, 0), self.size)
848 elif whence == os.SEEK_CUR:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000849 if pos < 0:
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000850 self.position = max(self.position + pos, 0)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000851 else:
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000852 self.position = min(self.position + pos, self.size)
853 elif whence == os.SEEK_END:
854 self.position = max(min(self.size + pos, self.size), 0)
855 else:
856 raise ValueError("Invalid argument")
857
858 self.buffer = ""
859 self.fileobj.seek(self.position)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000860
861 def close(self):
862 """Close the file object.
863 """
864 self.closed = True
Martin v. Löwisdf241532005-03-03 08:17:42 +0000865
866 def __iter__(self):
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000867 """Get an iterator over the file's lines.
Martin v. Löwisdf241532005-03-03 08:17:42 +0000868 """
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000869 while True:
870 line = self.readline()
871 if not line:
872 break
873 yield line
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000874#class ExFileObject
875
876#------------------
877# Exported Classes
878#------------------
879class TarInfo(object):
880 """Informational class which holds the details about an
881 archive member given by a tar header block.
882 TarInfo objects are returned by TarFile.getmember(),
883 TarFile.getmembers() and TarFile.gettarinfo() and are
884 usually created internally.
885 """
886
887 def __init__(self, name=""):
888 """Construct a TarInfo object. name is the optional name
889 of the member.
890 """
Guido van Rossumd8faa362007-04-27 19:54:29 +0000891 self.name = name # member name
892 self.mode = 0644 # file permissions
Thomas Wouters477c8d52006-05-27 19:21:47 +0000893 self.uid = 0 # user id
894 self.gid = 0 # group id
895 self.size = 0 # file size
896 self.mtime = 0 # modification time
897 self.chksum = 0 # header checksum
898 self.type = REGTYPE # member type
899 self.linkname = "" # link name
Guido van Rossumd8faa362007-04-27 19:54:29 +0000900 self.uname = "root" # user name
901 self.gname = "root" # group name
Thomas Wouters477c8d52006-05-27 19:21:47 +0000902 self.devmajor = 0 # device major number
903 self.devminor = 0 # device minor number
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000904
Thomas Wouters477c8d52006-05-27 19:21:47 +0000905 self.offset = 0 # the tar header starts here
906 self.offset_data = 0 # the file's data starts here
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000907
Guido van Rossumd8faa362007-04-27 19:54:29 +0000908 self.pax_headers = {} # pax header information
909
910 # In pax headers the "name" and "linkname" field are called
911 # "path" and "linkpath".
912 def _getpath(self):
913 return self.name
914 def _setpath(self, name):
915 self.name = name
916 path = property(_getpath, _setpath)
917
918 def _getlinkpath(self):
919 return self.linkname
920 def _setlinkpath(self, linkname):
921 self.linkname = linkname
922 linkpath = property(_getlinkpath, _setlinkpath)
923
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000924 def __repr__(self):
925 return "<%s %r at %#x>" % (self.__class__.__name__,self.name,id(self))
926
Guido van Rossumd8faa362007-04-27 19:54:29 +0000927 def get_info(self):
928 """Return the TarInfo's attributes as a dictionary.
929 """
930 info = {
931 "name": normpath(self.name),
932 "mode": self.mode & 07777,
933 "uid": self.uid,
934 "gid": self.gid,
935 "size": self.size,
936 "mtime": self.mtime,
937 "chksum": self.chksum,
938 "type": self.type,
939 "linkname": normpath(self.linkname) if self.linkname else "",
940 "uname": self.uname,
941 "gname": self.gname,
942 "devmajor": self.devmajor,
943 "devminor": self.devminor
944 }
945
946 if info["type"] == DIRTYPE and not info["name"].endswith("/"):
947 info["name"] += "/"
948
949 return info
950
951 def tobuf(self, format=DEFAULT_FORMAT, encoding=ENCODING):
952 """Return a tar header as a string of 512 byte blocks.
953 """
954 if format == USTAR_FORMAT:
955 return self.create_ustar_header()
956 elif format == GNU_FORMAT:
957 return self.create_gnu_header()
958 elif format == PAX_FORMAT:
959 return self.create_pax_header(encoding)
960 else:
961 raise ValueError("invalid format")
962
963 def create_ustar_header(self):
964 """Return the object as a ustar header block.
965 """
966 info = self.get_info()
967 info["magic"] = POSIX_MAGIC
968
969 if len(info["linkname"]) > LENGTH_LINK:
970 raise ValueError("linkname is too long")
971
972 if len(info["name"]) > LENGTH_NAME:
973 info["prefix"], info["name"] = self._posix_split_name(info["name"])
974
975 return self._create_header(info, USTAR_FORMAT)
976
977 def create_gnu_header(self):
978 """Return the object as a GNU header block sequence.
979 """
980 info = self.get_info()
981 info["magic"] = GNU_MAGIC
982
983 buf = ""
984 if len(info["linkname"]) > LENGTH_LINK:
985 buf += self._create_gnu_long_header(info["linkname"], GNUTYPE_LONGLINK)
986
987 if len(info["name"]) > LENGTH_NAME:
988 buf += self._create_gnu_long_header(info["name"], GNUTYPE_LONGNAME)
989
990 return buf + self._create_header(info, GNU_FORMAT)
991
992 def create_pax_header(self, encoding):
993 """Return the object as a ustar header block. If it cannot be
994 represented this way, prepend a pax extended header sequence
995 with supplement information.
996 """
997 info = self.get_info()
998 info["magic"] = POSIX_MAGIC
999 pax_headers = self.pax_headers.copy()
1000
1001 # Test string fields for values that exceed the field length or cannot
1002 # be represented in ASCII encoding.
1003 for name, hname, length in (
1004 ("name", "path", LENGTH_NAME), ("linkname", "linkpath", LENGTH_LINK),
1005 ("uname", "uname", 32), ("gname", "gname", 32)):
1006
1007 val = info[name].decode(encoding)
1008
1009 # Try to encode the string as ASCII.
1010 try:
1011 val.encode("ascii")
1012 except UnicodeEncodeError:
1013 pax_headers[hname] = val
1014 continue
1015
1016 if len(val) > length:
1017 if name == "name":
1018 # Try to squeeze a longname in the prefix and name fields as in
1019 # ustar format.
1020 try:
1021 info["prefix"], info["name"] = self._posix_split_name(info["name"])
1022 except ValueError:
1023 pax_headers[hname] = val
1024 else:
1025 continue
1026 else:
1027 pax_headers[hname] = val
1028
1029 # Test number fields for values that exceed the field limit or values
1030 # that like to be stored as float.
1031 for name, digits in (("uid", 8), ("gid", 8), ("size", 12), ("mtime", 12)):
1032 val = info[name]
1033 if not 0 <= val < 8 ** (digits - 1) or isinstance(val, float):
1034 pax_headers[name] = unicode(val)
1035 info[name] = 0
1036
1037 if pax_headers:
1038 buf = self._create_pax_generic_header(pax_headers)
1039 else:
1040 buf = ""
1041
1042 return buf + self._create_header(info, USTAR_FORMAT)
1043
1044 @classmethod
1045 def create_pax_global_header(cls, pax_headers, encoding):
1046 """Return the object as a pax global header block sequence.
1047 """
1048 new_headers = {}
1049 for key, val in pax_headers.items():
1050 key = cls._to_unicode(key, encoding)
1051 val = cls._to_unicode(val, encoding)
1052 new_headers[key] = val
1053 return cls._create_pax_generic_header(new_headers, type=XGLTYPE)
1054
1055 @staticmethod
1056 def _to_unicode(value, encoding):
1057 if isinstance(value, unicode):
1058 return value
1059 elif isinstance(value, (int, float)):
1060 return unicode(value)
1061 elif isinstance(value, str):
1062 return unicode(value, encoding)
1063 else:
1064 raise ValueError("unable to convert to unicode: %r" % value)
1065
1066 def _posix_split_name(self, name):
1067 """Split a name longer than 100 chars into a prefix
1068 and a name part.
1069 """
1070 prefix = name[:LENGTH_PREFIX + 1]
1071 while prefix and prefix[-1] != "/":
1072 prefix = prefix[:-1]
1073
1074 name = name[len(prefix):]
1075 prefix = prefix[:-1]
1076
1077 if not prefix or len(name) > LENGTH_NAME:
1078 raise ValueError("name is too long")
1079 return prefix, name
1080
1081 @staticmethod
1082 def _create_header(info, format):
1083 """Return a header block. info is a dictionary with file
1084 information, format must be one of the *_FORMAT constants.
1085 """
1086 parts = [
1087 stn(info.get("name", ""), 100),
1088 itn(info.get("mode", 0) & 07777, 8, format),
1089 itn(info.get("uid", 0), 8, format),
1090 itn(info.get("gid", 0), 8, format),
1091 itn(info.get("size", 0), 12, format),
1092 itn(info.get("mtime", 0), 12, format),
1093 " ", # checksum field
1094 info.get("type", REGTYPE),
1095 stn(info.get("linkname", ""), 100),
1096 stn(info.get("magic", ""), 8),
1097 stn(info.get("uname", ""), 32),
1098 stn(info.get("gname", ""), 32),
1099 itn(info.get("devmajor", 0), 8, format),
1100 itn(info.get("devminor", 0), 8, format),
1101 stn(info.get("prefix", ""), 155)
1102 ]
1103
1104 buf = struct.pack("%ds" % BLOCKSIZE, "".join(parts))
1105 chksum = calc_chksums(buf[-BLOCKSIZE:])[0]
1106 buf = buf[:-364] + "%06o\0" % chksum + buf[-357:]
1107 return buf
1108
1109 @staticmethod
1110 def _create_payload(payload):
1111 """Return the string payload filled with zero bytes
1112 up to the next 512 byte border.
1113 """
1114 blocks, remainder = divmod(len(payload), BLOCKSIZE)
1115 if remainder > 0:
1116 payload += (BLOCKSIZE - remainder) * NUL
1117 return payload
1118
1119 @classmethod
1120 def _create_gnu_long_header(cls, name, type):
1121 """Return a GNUTYPE_LONGNAME or GNUTYPE_LONGLINK sequence
1122 for name.
1123 """
1124 name += NUL
1125
1126 info = {}
1127 info["name"] = "././@LongLink"
1128 info["type"] = type
1129 info["size"] = len(name)
1130 info["magic"] = GNU_MAGIC
1131
1132 # create extended header + name blocks.
1133 return cls._create_header(info, USTAR_FORMAT) + \
1134 cls._create_payload(name)
1135
1136 @classmethod
1137 def _create_pax_generic_header(cls, pax_headers, type=XHDTYPE):
1138 """Return a POSIX.1-2001 extended or global header sequence
1139 that contains a list of keyword, value pairs. The values
1140 must be unicode objects.
1141 """
1142 records = []
1143 for keyword, value in pax_headers.items():
1144 keyword = keyword.encode("utf8")
1145 value = value.encode("utf8")
1146 l = len(keyword) + len(value) + 3 # ' ' + '=' + '\n'
1147 n = p = 0
1148 while True:
1149 n = l + len(str(p))
1150 if n == p:
1151 break
1152 p = n
1153 records.append("%d %s=%s\n" % (p, keyword, value))
1154 records = "".join(records)
1155
1156 # We use a hardcoded "././@PaxHeader" name like star does
1157 # instead of the one that POSIX recommends.
1158 info = {}
1159 info["name"] = "././@PaxHeader"
1160 info["type"] = type
1161 info["size"] = len(records)
1162 info["magic"] = POSIX_MAGIC
1163
1164 # Create pax header + record blocks.
1165 return cls._create_header(info, USTAR_FORMAT) + \
1166 cls._create_payload(records)
1167
Guido van Rossum75b64e62005-01-16 00:16:11 +00001168 @classmethod
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001169 def frombuf(cls, buf):
1170 """Construct a TarInfo object from a 512 byte string buffer.
1171 """
Thomas Wouters477c8d52006-05-27 19:21:47 +00001172 if len(buf) != BLOCKSIZE:
Thomas Wouters902d6eb2007-01-09 23:18:33 +00001173 raise HeaderError("truncated header")
Thomas Wouters477c8d52006-05-27 19:21:47 +00001174 if buf.count(NUL) == BLOCKSIZE:
Thomas Wouters902d6eb2007-01-09 23:18:33 +00001175 raise HeaderError("empty header")
1176
1177 chksum = nti(buf[148:156])
1178 if chksum not in calc_chksums(buf):
1179 raise HeaderError("bad checksum")
Thomas Wouters477c8d52006-05-27 19:21:47 +00001180
Guido van Rossumd8faa362007-04-27 19:54:29 +00001181 obj = cls()
1182 obj.buf = buf
1183 obj.name = nts(buf[0:100])
1184 obj.mode = nti(buf[100:108])
1185 obj.uid = nti(buf[108:116])
1186 obj.gid = nti(buf[116:124])
1187 obj.size = nti(buf[124:136])
1188 obj.mtime = nti(buf[136:148])
1189 obj.chksum = chksum
1190 obj.type = buf[156:157]
1191 obj.linkname = nts(buf[157:257])
1192 obj.uname = nts(buf[265:297])
1193 obj.gname = nts(buf[297:329])
1194 obj.devmajor = nti(buf[329:337])
1195 obj.devminor = nti(buf[337:345])
1196 prefix = nts(buf[345:500])
Thomas Wouters89f507f2006-12-13 04:49:30 +00001197
Guido van Rossumd8faa362007-04-27 19:54:29 +00001198 # Old V7 tar format represents a directory as a regular
1199 # file with a trailing slash.
1200 if obj.type == AREGTYPE and obj.name.endswith("/"):
1201 obj.type = DIRTYPE
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001202
Guido van Rossumd8faa362007-04-27 19:54:29 +00001203 # Remove redundant slashes from directories.
1204 if obj.isdir():
1205 obj.name = obj.name.rstrip("/")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001206
Guido van Rossumd8faa362007-04-27 19:54:29 +00001207 # Reconstruct a ustar longname.
1208 if prefix and obj.type not in GNU_TYPES:
1209 obj.name = prefix + "/" + obj.name
1210 return obj
1211
1212 @classmethod
1213 def fromtarfile(cls, tarfile):
1214 """Return the next TarInfo object from TarFile object
1215 tarfile.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001216 """
Guido van Rossumd8faa362007-04-27 19:54:29 +00001217 buf = tarfile.fileobj.read(BLOCKSIZE)
1218 if not buf:
1219 return
1220 obj = cls.frombuf(buf)
1221 obj.offset = tarfile.fileobj.tell() - BLOCKSIZE
1222 return obj._proc_member(tarfile)
Thomas Wouters89f507f2006-12-13 04:49:30 +00001223
Guido van Rossumd8faa362007-04-27 19:54:29 +00001224 #--------------------------------------------------------------------------
1225 # The following are methods that are called depending on the type of a
1226 # member. The entry point is _proc_member() which can be overridden in a
1227 # subclass to add custom _proc_*() methods. A _proc_*() method MUST
1228 # implement the following
1229 # operations:
1230 # 1. Set self.offset_data to the position where the data blocks begin,
1231 # if there is data that follows.
1232 # 2. Set tarfile.offset to the position where the next member's header will
1233 # begin.
1234 # 3. Return self or another valid TarInfo object.
1235 def _proc_member(self, tarfile):
1236 """Choose the right processing method depending on
1237 the type and call it.
Thomas Wouters89f507f2006-12-13 04:49:30 +00001238 """
Guido van Rossumd8faa362007-04-27 19:54:29 +00001239 if self.type in (GNUTYPE_LONGNAME, GNUTYPE_LONGLINK):
1240 return self._proc_gnulong(tarfile)
1241 elif self.type == GNUTYPE_SPARSE:
1242 return self._proc_sparse(tarfile)
1243 elif self.type in (XHDTYPE, XGLTYPE, SOLARIS_XHDTYPE):
1244 return self._proc_pax(tarfile)
1245 else:
1246 return self._proc_builtin(tarfile)
Thomas Wouters89f507f2006-12-13 04:49:30 +00001247
Guido van Rossumd8faa362007-04-27 19:54:29 +00001248 def _proc_builtin(self, tarfile):
1249 """Process a builtin type or an unknown type which
1250 will be treated as a regular file.
1251 """
1252 self.offset_data = tarfile.fileobj.tell()
1253 offset = self.offset_data
1254 if self.isreg() or self.type not in SUPPORTED_TYPES:
1255 # Skip the following data blocks.
1256 offset += self._block(self.size)
1257 tarfile.offset = offset
Thomas Wouters89f507f2006-12-13 04:49:30 +00001258
Guido van Rossumd8faa362007-04-27 19:54:29 +00001259 # Patch the TarInfo object with saved extended
1260 # header information.
1261 for keyword, value in tarfile.pax_headers.items():
1262 if keyword in PAX_FIELDS:
1263 setattr(self, keyword, value)
1264 self.pax_headers[keyword] = value
1265
1266 return self
1267
1268 def _proc_gnulong(self, tarfile):
1269 """Process the blocks that hold a GNU longname
1270 or longlink member.
1271 """
1272 buf = tarfile.fileobj.read(self._block(self.size))
1273
1274 # Fetch the next header and process it.
1275 b = tarfile.fileobj.read(BLOCKSIZE)
1276 t = self.frombuf(b)
1277 t.offset = self.offset
1278 next = t._proc_member(tarfile)
1279
1280 # Patch the TarInfo object from the next header with
1281 # the longname information.
1282 next.offset = self.offset
1283 if self.type == GNUTYPE_LONGNAME:
1284 next.name = buf.rstrip(NUL)
1285 elif self.type == GNUTYPE_LONGLINK:
1286 next.linkname = buf.rstrip(NUL)
1287
1288 return next
1289
1290 def _proc_sparse(self, tarfile):
1291 """Process a GNU sparse header plus extra headers.
1292 """
1293 buf = self.buf
1294 sp = _ringbuffer()
1295 pos = 386
1296 lastpos = 0
1297 realpos = 0
1298 # There are 4 possible sparse structs in the
1299 # first header.
1300 for i in xrange(4):
1301 try:
1302 offset = nti(buf[pos:pos + 12])
1303 numbytes = nti(buf[pos + 12:pos + 24])
1304 except ValueError:
1305 break
1306 if offset > lastpos:
1307 sp.append(_hole(lastpos, offset - lastpos))
1308 sp.append(_data(offset, numbytes, realpos))
1309 realpos += numbytes
1310 lastpos = offset + numbytes
1311 pos += 24
1312
1313 isextended = ord(buf[482])
1314 origsize = nti(buf[483:495])
1315
1316 # If the isextended flag is given,
1317 # there are extra headers to process.
1318 while isextended == 1:
1319 buf = tarfile.fileobj.read(BLOCKSIZE)
1320 pos = 0
1321 for i in xrange(21):
1322 try:
1323 offset = nti(buf[pos:pos + 12])
1324 numbytes = nti(buf[pos + 12:pos + 24])
1325 except ValueError:
1326 break
1327 if offset > lastpos:
1328 sp.append(_hole(lastpos, offset - lastpos))
1329 sp.append(_data(offset, numbytes, realpos))
1330 realpos += numbytes
1331 lastpos = offset + numbytes
1332 pos += 24
1333 isextended = ord(buf[504])
1334
1335 if lastpos < origsize:
1336 sp.append(_hole(lastpos, origsize - lastpos))
1337
1338 self.sparse = sp
1339
1340 self.offset_data = tarfile.fileobj.tell()
1341 tarfile.offset = self.offset_data + self._block(self.size)
1342 self.size = origsize
1343
1344 return self
1345
1346 def _proc_pax(self, tarfile):
1347 """Process an extended or global header as described in
1348 POSIX.1-2001.
1349 """
1350 # Read the header information.
1351 buf = tarfile.fileobj.read(self._block(self.size))
1352
1353 # A pax header stores supplemental information for either
1354 # the following file (extended) or all following files
1355 # (global).
1356 if self.type == XGLTYPE:
1357 pax_headers = tarfile.pax_headers
1358 else:
1359 pax_headers = tarfile.pax_headers.copy()
1360
1361 # Fields in POSIX.1-2001 that are numbers, all other fields
1362 # are treated as UTF-8 strings.
1363 type_mapping = {
1364 "atime": float,
1365 "ctime": float,
1366 "mtime": float,
1367 "uid": int,
1368 "gid": int,
1369 "size": int
1370 }
1371
1372 # Parse pax header information. A record looks like that:
1373 # "%d %s=%s\n" % (length, keyword, value). length is the size
1374 # of the complete record including the length field itself and
1375 # the newline.
1376 regex = re.compile(r"(\d+) ([^=]+)=", re.U)
1377 pos = 0
1378 while True:
1379 match = regex.match(buf, pos)
1380 if not match:
1381 break
1382
1383 length, keyword = match.groups()
1384 length = int(length)
1385 value = buf[match.end(2) + 1:match.start(1) + length - 1]
1386
1387 keyword = keyword.decode("utf8")
1388 keyword = keyword.encode(tarfile.encoding)
1389
1390 value = value.decode("utf8")
1391 if keyword in type_mapping:
1392 try:
1393 value = type_mapping[keyword](value)
1394 except ValueError:
1395 value = 0
1396 else:
1397 value = value.encode(tarfile.encoding)
1398
1399 pax_headers[keyword] = value
1400 pos += length
1401
1402 # Fetch the next header that will be patched with the
1403 # supplement information from the pax header (extended
1404 # only).
1405 t = self.fromtarfile(tarfile)
1406
1407 if self.type != XGLTYPE and t is not None:
1408 # Patch the TarInfo object from the next header with
1409 # the pax header's information.
1410 for keyword, value in pax_headers.items():
1411 if keyword in PAX_FIELDS:
1412 setattr(t, keyword, value)
1413 pax_headers[keyword] = value
1414 t.pax_headers = pax_headers.copy()
1415
1416 return t
1417
1418 def _block(self, count):
1419 """Round up a byte count by BLOCKSIZE and return it,
1420 e.g. _block(834) => 1024.
1421 """
1422 blocks, remainder = divmod(count, BLOCKSIZE)
1423 if remainder:
1424 blocks += 1
1425 return blocks * BLOCKSIZE
Thomas Wouters89f507f2006-12-13 04:49:30 +00001426
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001427 def isreg(self):
1428 return self.type in REGULAR_TYPES
1429 def isfile(self):
1430 return self.isreg()
1431 def isdir(self):
1432 return self.type == DIRTYPE
1433 def issym(self):
1434 return self.type == SYMTYPE
1435 def islnk(self):
1436 return self.type == LNKTYPE
1437 def ischr(self):
1438 return self.type == CHRTYPE
1439 def isblk(self):
1440 return self.type == BLKTYPE
1441 def isfifo(self):
1442 return self.type == FIFOTYPE
1443 def issparse(self):
1444 return self.type == GNUTYPE_SPARSE
1445 def isdev(self):
1446 return self.type in (CHRTYPE, BLKTYPE, FIFOTYPE)
1447# class TarInfo
1448
1449class TarFile(object):
1450 """The TarFile Class provides an interface to tar archives.
1451 """
1452
1453 debug = 0 # May be set from 0 (no msgs) to 3 (all msgs)
1454
1455 dereference = False # If true, add content of linked file to the
1456 # tar file, else the link.
1457
1458 ignore_zeros = False # If true, skips empty or invalid blocks and
1459 # continues processing.
1460
1461 errorlevel = 0 # If 0, fatal errors only appear in debug
1462 # messages (if debug >= 0). If > 0, errors
1463 # are passed to the caller as exceptions.
1464
Guido van Rossumd8faa362007-04-27 19:54:29 +00001465 format = DEFAULT_FORMAT # The format to use when creating an archive.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001466
Guido van Rossumd8faa362007-04-27 19:54:29 +00001467 encoding = ENCODING # Transfer UTF-8 strings from POSIX.1-2001
1468 # headers to this encoding.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001469
Guido van Rossumd8faa362007-04-27 19:54:29 +00001470 tarinfo = TarInfo # The default TarInfo class to use.
1471
1472 fileobject = ExFileObject # The default ExFileObject class to use.
1473
1474 def __init__(self, name=None, mode="r", fileobj=None, format=None,
1475 tarinfo=None, dereference=None, ignore_zeros=None, encoding=None,
1476 pax_headers=None, debug=None, errorlevel=None):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001477 """Open an (uncompressed) tar archive `name'. `mode' is either 'r' to
1478 read from an existing archive, 'a' to append data to an existing
1479 file or 'w' to create a new file overwriting an existing one. `mode'
1480 defaults to 'r'.
1481 If `fileobj' is given, it is used for reading or writing data. If it
1482 can be determined, `mode' is overridden by `fileobj's mode.
1483 `fileobj' is not closed, when TarFile is closed.
1484 """
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001485 if len(mode) > 1 or mode not in "raw":
Thomas Wouters477c8d52006-05-27 19:21:47 +00001486 raise ValueError("mode must be 'r', 'a' or 'w'")
Guido van Rossumd8faa362007-04-27 19:54:29 +00001487 self.mode = mode
1488 self._mode = {"r": "rb", "a": "r+b", "w": "wb"}[mode]
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001489
1490 if not fileobj:
Guido van Rossumd8faa362007-04-27 19:54:29 +00001491 if self.mode == "a" and not os.path.exists(name):
Thomas Wouterscf297e42007-02-23 15:07:44 +00001492 # Create nonexistent files in append mode.
Guido van Rossumd8faa362007-04-27 19:54:29 +00001493 self.mode = "w"
1494 self._mode = "wb"
1495 fileobj = _open(name, self._mode)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001496 self._extfileobj = False
1497 else:
Guido van Rossumd8faa362007-04-27 19:54:29 +00001498 if name is None and hasattr(fileobj, "name"):
1499 name = fileobj.name
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001500 if hasattr(fileobj, "mode"):
Guido van Rossumd8faa362007-04-27 19:54:29 +00001501 self._mode = fileobj.mode
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001502 self._extfileobj = True
Guido van Rossumd8faa362007-04-27 19:54:29 +00001503 self.name = os.path.abspath(name)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001504 self.fileobj = fileobj
1505
Guido van Rossumd8faa362007-04-27 19:54:29 +00001506 # Init attributes.
1507 if format is not None:
1508 self.format = format
1509 if tarinfo is not None:
1510 self.tarinfo = tarinfo
1511 if dereference is not None:
1512 self.dereference = dereference
1513 if ignore_zeros is not None:
1514 self.ignore_zeros = ignore_zeros
1515 if encoding is not None:
1516 self.encoding = encoding
1517 if debug is not None:
1518 self.debug = debug
1519 if errorlevel is not None:
1520 self.errorlevel = errorlevel
1521
1522 # Init datastructures.
Thomas Wouters477c8d52006-05-27 19:21:47 +00001523 self.closed = False
1524 self.members = [] # list of members as TarInfo objects
1525 self._loaded = False # flag if all members have been read
Guido van Rossume2a383d2007-01-15 16:59:06 +00001526 self.offset = 0 # current position in the archive file
Thomas Wouters477c8d52006-05-27 19:21:47 +00001527 self.inodes = {} # dictionary caching the inodes of
1528 # archive members already added
Guido van Rossumd8faa362007-04-27 19:54:29 +00001529 self.pax_headers = {} # save contents of global pax headers
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001530
Guido van Rossumd8faa362007-04-27 19:54:29 +00001531 if self.mode == "r":
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001532 self.firstmember = None
1533 self.firstmember = self.next()
1534
Guido van Rossumd8faa362007-04-27 19:54:29 +00001535 if self.mode == "a":
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001536 # Move to the end of the archive,
1537 # before the first empty block.
1538 self.firstmember = None
1539 while True:
Guido van Rossumd8faa362007-04-27 19:54:29 +00001540 if self.next() is None:
Thomas Wouterscf297e42007-02-23 15:07:44 +00001541 if self.offset > 0:
1542 self.fileobj.seek(- BLOCKSIZE, 1)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001543 break
1544
Guido van Rossumd8faa362007-04-27 19:54:29 +00001545 if self.mode in "aw":
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001546 self._loaded = True
1547
Guido van Rossumd8faa362007-04-27 19:54:29 +00001548 if pax_headers:
1549 buf = self.tarinfo.create_pax_global_header(
1550 pax_headers.copy(), self.encoding)
1551 self.fileobj.write(buf)
1552 self.offset += len(buf)
1553
1554 def _getposix(self):
1555 return self.format == USTAR_FORMAT
1556 def _setposix(self, value):
1557 import warnings
1558 warnings.warn("use the format attribute instead", DeprecationWarning)
1559 if value:
1560 self.format = USTAR_FORMAT
1561 else:
1562 self.format = GNU_FORMAT
1563 posix = property(_getposix, _setposix)
1564
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001565 #--------------------------------------------------------------------------
1566 # Below are the classmethods which act as alternate constructors to the
1567 # TarFile class. The open() method is the only one that is needed for
1568 # public use; it is the "super"-constructor and is able to select an
1569 # adequate "sub"-constructor for a particular compression using the mapping
1570 # from OPEN_METH.
1571 #
1572 # This concept allows one to subclass TarFile without losing the comfort of
1573 # the super-constructor. A sub-constructor is registered and made available
1574 # by adding it to the mapping in OPEN_METH.
1575
Guido van Rossum75b64e62005-01-16 00:16:11 +00001576 @classmethod
Guido van Rossumd8faa362007-04-27 19:54:29 +00001577 def open(cls, name=None, mode="r", fileobj=None, bufsize=RECORDSIZE, **kwargs):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001578 """Open a tar archive for reading, writing or appending. Return
1579 an appropriate TarFile class.
1580
1581 mode:
Martin v. Löwis78be7df2005-03-05 12:47:42 +00001582 'r' or 'r:*' open for reading with transparent compression
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001583 'r:' open for reading exclusively uncompressed
1584 'r:gz' open for reading with gzip compression
1585 'r:bz2' open for reading with bzip2 compression
Thomas Wouterscf297e42007-02-23 15:07:44 +00001586 'a' or 'a:' open for appending, creating the file if necessary
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001587 'w' or 'w:' open for writing without compression
1588 'w:gz' open for writing with gzip compression
1589 'w:bz2' open for writing with bzip2 compression
Martin v. Löwis78be7df2005-03-05 12:47:42 +00001590
1591 'r|*' open a stream of tar blocks with transparent compression
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001592 'r|' open an uncompressed stream of tar blocks for reading
1593 'r|gz' open a gzip compressed stream of tar blocks
1594 'r|bz2' open a bzip2 compressed stream of tar blocks
1595 'w|' open an uncompressed stream for writing
1596 'w|gz' open a gzip compressed stream for writing
1597 'w|bz2' open a bzip2 compressed stream for writing
1598 """
1599
1600 if not name and not fileobj:
Thomas Wouters477c8d52006-05-27 19:21:47 +00001601 raise ValueError("nothing to open")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001602
Martin v. Löwis78be7df2005-03-05 12:47:42 +00001603 if mode in ("r", "r:*"):
1604 # Find out which *open() is appropriate for opening the file.
1605 for comptype in cls.OPEN_METH:
1606 func = getattr(cls, cls.OPEN_METH[comptype])
Thomas Wouters902d6eb2007-01-09 23:18:33 +00001607 if fileobj is not None:
1608 saved_pos = fileobj.tell()
Martin v. Löwis78be7df2005-03-05 12:47:42 +00001609 try:
Guido van Rossumd8faa362007-04-27 19:54:29 +00001610 return func(name, "r", fileobj, **kwargs)
1611 except (ReadError, CompressionError) as e:
Thomas Wouters902d6eb2007-01-09 23:18:33 +00001612 if fileobj is not None:
1613 fileobj.seek(saved_pos)
Martin v. Löwis78be7df2005-03-05 12:47:42 +00001614 continue
Thomas Wouters477c8d52006-05-27 19:21:47 +00001615 raise ReadError("file could not be opened successfully")
Martin v. Löwis78be7df2005-03-05 12:47:42 +00001616
1617 elif ":" in mode:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001618 filemode, comptype = mode.split(":", 1)
1619 filemode = filemode or "r"
1620 comptype = comptype or "tar"
1621
1622 # Select the *open() function according to
1623 # given compression.
1624 if comptype in cls.OPEN_METH:
1625 func = getattr(cls, cls.OPEN_METH[comptype])
1626 else:
Thomas Wouters477c8d52006-05-27 19:21:47 +00001627 raise CompressionError("unknown compression type %r" % comptype)
Guido van Rossumd8faa362007-04-27 19:54:29 +00001628 return func(name, filemode, fileobj, **kwargs)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001629
1630 elif "|" in mode:
1631 filemode, comptype = mode.split("|", 1)
1632 filemode = filemode or "r"
1633 comptype = comptype or "tar"
1634
1635 if filemode not in "rw":
Thomas Wouters477c8d52006-05-27 19:21:47 +00001636 raise ValueError("mode must be 'r' or 'w'")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001637
1638 t = cls(name, filemode,
Guido van Rossumd8faa362007-04-27 19:54:29 +00001639 _Stream(name, filemode, comptype, fileobj, bufsize),
1640 **kwargs)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001641 t._extfileobj = False
1642 return t
1643
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001644 elif mode in "aw":
Guido van Rossumd8faa362007-04-27 19:54:29 +00001645 return cls.taropen(name, mode, fileobj, **kwargs)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001646
Thomas Wouters477c8d52006-05-27 19:21:47 +00001647 raise ValueError("undiscernible mode")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001648
Guido van Rossum75b64e62005-01-16 00:16:11 +00001649 @classmethod
Guido van Rossumd8faa362007-04-27 19:54:29 +00001650 def taropen(cls, name, mode="r", fileobj=None, **kwargs):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001651 """Open uncompressed tar archive name for reading or writing.
1652 """
1653 if len(mode) > 1 or mode not in "raw":
Thomas Wouters477c8d52006-05-27 19:21:47 +00001654 raise ValueError("mode must be 'r', 'a' or 'w'")
Guido van Rossumd8faa362007-04-27 19:54:29 +00001655 return cls(name, mode, fileobj, **kwargs)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001656
Guido van Rossum75b64e62005-01-16 00:16:11 +00001657 @classmethod
Guido van Rossumd8faa362007-04-27 19:54:29 +00001658 def gzopen(cls, name, mode="r", fileobj=None, compresslevel=9, **kwargs):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001659 """Open gzip compressed tar archive name for reading or writing.
1660 Appending is not allowed.
1661 """
1662 if len(mode) > 1 or mode not in "rw":
Thomas Wouters477c8d52006-05-27 19:21:47 +00001663 raise ValueError("mode must be 'r' or 'w'")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001664
1665 try:
1666 import gzip
Neal Norwitz4ec68242003-04-11 03:05:56 +00001667 gzip.GzipFile
1668 except (ImportError, AttributeError):
Thomas Wouters477c8d52006-05-27 19:21:47 +00001669 raise CompressionError("gzip module is not available")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001670
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001671 if fileobj is None:
Guido van Rossum8f78fe92006-08-24 04:03:53 +00001672 fileobj = _open(name, mode + "b")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001673
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001674 try:
Thomas Wouters902d6eb2007-01-09 23:18:33 +00001675 t = cls.taropen(name, mode,
Guido van Rossumd8faa362007-04-27 19:54:29 +00001676 gzip.GzipFile(name, mode, compresslevel, fileobj),
1677 **kwargs)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001678 except IOError:
Thomas Wouters477c8d52006-05-27 19:21:47 +00001679 raise ReadError("not a gzip file")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001680 t._extfileobj = False
1681 return t
1682
Guido van Rossum75b64e62005-01-16 00:16:11 +00001683 @classmethod
Guido van Rossumd8faa362007-04-27 19:54:29 +00001684 def bz2open(cls, name, mode="r", fileobj=None, compresslevel=9, **kwargs):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001685 """Open bzip2 compressed tar archive name for reading or writing.
1686 Appending is not allowed.
1687 """
1688 if len(mode) > 1 or mode not in "rw":
Thomas Wouters477c8d52006-05-27 19:21:47 +00001689 raise ValueError("mode must be 'r' or 'w'.")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001690
1691 try:
1692 import bz2
1693 except ImportError:
Thomas Wouters477c8d52006-05-27 19:21:47 +00001694 raise CompressionError("bz2 module is not available")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001695
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001696 if fileobj is not None:
Thomas Wouters477c8d52006-05-27 19:21:47 +00001697 fileobj = _BZ2Proxy(fileobj, mode)
1698 else:
1699 fileobj = bz2.BZ2File(name, mode, compresslevel=compresslevel)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001700
1701 try:
Guido van Rossumd8faa362007-04-27 19:54:29 +00001702 t = cls.taropen(name, mode, fileobj, **kwargs)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001703 except IOError:
Thomas Wouters477c8d52006-05-27 19:21:47 +00001704 raise ReadError("not a bzip2 file")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001705 t._extfileobj = False
1706 return t
1707
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001708 # All *open() methods are registered here.
1709 OPEN_METH = {
1710 "tar": "taropen", # uncompressed tar
1711 "gz": "gzopen", # gzip compressed tar
1712 "bz2": "bz2open" # bzip2 compressed tar
1713 }
1714
1715 #--------------------------------------------------------------------------
1716 # The public methods which TarFile provides:
1717
1718 def close(self):
1719 """Close the TarFile. In write-mode, two finishing zero blocks are
1720 appended to the archive.
1721 """
1722 if self.closed:
1723 return
1724
Guido van Rossumd8faa362007-04-27 19:54:29 +00001725 if self.mode in "aw":
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001726 self.fileobj.write(NUL * (BLOCKSIZE * 2))
1727 self.offset += (BLOCKSIZE * 2)
1728 # fill up the end with zero-blocks
1729 # (like option -b20 for tar does)
1730 blocks, remainder = divmod(self.offset, RECORDSIZE)
1731 if remainder > 0:
1732 self.fileobj.write(NUL * (RECORDSIZE - remainder))
1733
1734 if not self._extfileobj:
1735 self.fileobj.close()
1736 self.closed = True
1737
1738 def getmember(self, name):
1739 """Return a TarInfo object for member `name'. If `name' can not be
1740 found in the archive, KeyError is raised. If a member occurs more
1741 than once in the archive, its last occurence is assumed to be the
1742 most up-to-date version.
1743 """
Martin v. Löwisf3c56112004-09-18 09:08:52 +00001744 tarinfo = self._getmember(name)
1745 if tarinfo is None:
Thomas Wouters477c8d52006-05-27 19:21:47 +00001746 raise KeyError("filename %r not found" % name)
Martin v. Löwisf3c56112004-09-18 09:08:52 +00001747 return tarinfo
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001748
1749 def getmembers(self):
1750 """Return the members of the archive as a list of TarInfo objects. The
1751 list has the same order as the members in the archive.
1752 """
1753 self._check()
1754 if not self._loaded: # if we want to obtain a list of
1755 self._load() # all members, we first have to
1756 # scan the whole archive.
1757 return self.members
1758
1759 def getnames(self):
1760 """Return the members of the archive as a list of their names. It has
1761 the same order as the list returned by getmembers().
1762 """
Martin v. Löwisf3c56112004-09-18 09:08:52 +00001763 return [tarinfo.name for tarinfo in self.getmembers()]
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001764
1765 def gettarinfo(self, name=None, arcname=None, fileobj=None):
1766 """Create a TarInfo object for either the file `name' or the file
1767 object `fileobj' (using os.fstat on its file descriptor). You can
1768 modify some of the TarInfo's attributes before you add it using
1769 addfile(). If given, `arcname' specifies an alternative name for the
1770 file in the archive.
1771 """
1772 self._check("aw")
1773
1774 # When fileobj is given, replace name by
1775 # fileobj's real name.
1776 if fileobj is not None:
1777 name = fileobj.name
1778
1779 # Building the name of the member in the archive.
1780 # Backward slashes are converted to forward slashes,
1781 # Absolute paths are turned to relative paths.
1782 if arcname is None:
1783 arcname = name
1784 arcname = normpath(arcname)
1785 drv, arcname = os.path.splitdrive(arcname)
1786 while arcname[0:1] == "/":
1787 arcname = arcname[1:]
1788
1789 # Now, fill the TarInfo object with
1790 # information specific for the file.
Guido van Rossumd8faa362007-04-27 19:54:29 +00001791 tarinfo = self.tarinfo()
1792 tarinfo.tarfile = self
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001793
1794 # Use os.stat or os.lstat, depending on platform
1795 # and if symlinks shall be resolved.
1796 if fileobj is None:
1797 if hasattr(os, "lstat") and not self.dereference:
1798 statres = os.lstat(name)
1799 else:
1800 statres = os.stat(name)
1801 else:
1802 statres = os.fstat(fileobj.fileno())
1803 linkname = ""
1804
1805 stmd = statres.st_mode
1806 if stat.S_ISREG(stmd):
1807 inode = (statres.st_ino, statres.st_dev)
Guido van Rossumd8faa362007-04-27 19:54:29 +00001808 if not self.dereference and statres.st_nlink > 1 and \
1809 inode in self.inodes and arcname != self.inodes[inode]:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001810 # Is it a hardlink to an already
1811 # archived file?
1812 type = LNKTYPE
1813 linkname = self.inodes[inode]
1814 else:
1815 # The inode is added only if its valid.
1816 # For win32 it is always 0.
1817 type = REGTYPE
1818 if inode[0]:
1819 self.inodes[inode] = arcname
1820 elif stat.S_ISDIR(stmd):
1821 type = DIRTYPE
1822 if arcname[-1:] != "/":
1823 arcname += "/"
1824 elif stat.S_ISFIFO(stmd):
1825 type = FIFOTYPE
1826 elif stat.S_ISLNK(stmd):
1827 type = SYMTYPE
1828 linkname = os.readlink(name)
1829 elif stat.S_ISCHR(stmd):
1830 type = CHRTYPE
1831 elif stat.S_ISBLK(stmd):
1832 type = BLKTYPE
1833 else:
1834 return None
1835
1836 # Fill the TarInfo object with all
1837 # information we can get.
Martin v. Löwis5dbdc592005-08-27 10:07:56 +00001838 tarinfo.name = arcname
1839 tarinfo.mode = stmd
1840 tarinfo.uid = statres.st_uid
1841 tarinfo.gid = statres.st_gid
1842 if stat.S_ISREG(stmd):
Martin v. Löwis61d77e02004-08-20 06:35:46 +00001843 tarinfo.size = statres.st_size
Martin v. Löwis5dbdc592005-08-27 10:07:56 +00001844 else:
Guido van Rossume2a383d2007-01-15 16:59:06 +00001845 tarinfo.size = 0
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001846 tarinfo.mtime = statres.st_mtime
Martin v. Löwis5dbdc592005-08-27 10:07:56 +00001847 tarinfo.type = type
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001848 tarinfo.linkname = linkname
1849 if pwd:
1850 try:
1851 tarinfo.uname = pwd.getpwuid(tarinfo.uid)[0]
1852 except KeyError:
1853 pass
1854 if grp:
1855 try:
1856 tarinfo.gname = grp.getgrgid(tarinfo.gid)[0]
1857 except KeyError:
1858 pass
1859
1860 if type in (CHRTYPE, BLKTYPE):
1861 if hasattr(os, "major") and hasattr(os, "minor"):
1862 tarinfo.devmajor = os.major(statres.st_rdev)
1863 tarinfo.devminor = os.minor(statres.st_rdev)
1864 return tarinfo
1865
1866 def list(self, verbose=True):
1867 """Print a table of contents to sys.stdout. If `verbose' is False, only
1868 the names of the members are printed. If it is True, an `ls -l'-like
1869 output is produced.
1870 """
1871 self._check()
1872
1873 for tarinfo in self:
1874 if verbose:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001875 print(filemode(tarinfo.mode), end=' ')
1876 print("%s/%s" % (tarinfo.uname or tarinfo.uid,
1877 tarinfo.gname or tarinfo.gid), end=' ')
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001878 if tarinfo.ischr() or tarinfo.isblk():
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001879 print("%10s" % ("%d,%d" \
1880 % (tarinfo.devmajor, tarinfo.devminor)), end=' ')
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001881 else:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001882 print("%10d" % tarinfo.size, end=' ')
1883 print("%d-%02d-%02d %02d:%02d:%02d" \
1884 % time.localtime(tarinfo.mtime)[:6], end=' ')
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001885
Guido van Rossumd8faa362007-04-27 19:54:29 +00001886 print(tarinfo.name + ("/" if tarinfo.isdir() else ""), end=' ')
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001887
1888 if verbose:
1889 if tarinfo.issym():
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001890 print("->", tarinfo.linkname, end=' ')
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001891 if tarinfo.islnk():
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001892 print("link to", tarinfo.linkname, end=' ')
1893 print()
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001894
1895 def add(self, name, arcname=None, recursive=True):
1896 """Add the file `name' to the archive. `name' may be any type of file
1897 (directory, fifo, symbolic link, etc.). If given, `arcname'
1898 specifies an alternative name for the file in the archive.
1899 Directories are added recursively by default. This can be avoided by
1900 setting `recursive' to False.
1901 """
1902 self._check("aw")
1903
1904 if arcname is None:
1905 arcname = name
1906
1907 # Skip if somebody tries to archive the archive...
Thomas Wouters902d6eb2007-01-09 23:18:33 +00001908 if self.name is not None and os.path.abspath(name) == self.name:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001909 self._dbg(2, "tarfile: Skipped %r" % name)
1910 return
1911
1912 # Special case: The user wants to add the current
1913 # working directory.
1914 if name == ".":
1915 if recursive:
1916 if arcname == ".":
1917 arcname = ""
Guido van Rossumd8faa362007-04-27 19:54:29 +00001918 for f in os.listdir(name):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001919 self.add(f, os.path.join(arcname, f))
1920 return
1921
1922 self._dbg(1, name)
1923
1924 # Create a TarInfo object from the file.
1925 tarinfo = self.gettarinfo(name, arcname)
1926
1927 if tarinfo is None:
1928 self._dbg(1, "tarfile: Unsupported type %r" % name)
1929 return
1930
1931 # Append the tar header and data to the archive.
1932 if tarinfo.isreg():
Guido van Rossum8f78fe92006-08-24 04:03:53 +00001933 f = _open(name, "rb")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001934 self.addfile(tarinfo, f)
1935 f.close()
1936
Martin v. Löwis5dbdc592005-08-27 10:07:56 +00001937 elif tarinfo.isdir():
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001938 self.addfile(tarinfo)
1939 if recursive:
1940 for f in os.listdir(name):
1941 self.add(os.path.join(name, f), os.path.join(arcname, f))
1942
Martin v. Löwis5dbdc592005-08-27 10:07:56 +00001943 else:
1944 self.addfile(tarinfo)
1945
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001946 def addfile(self, tarinfo, fileobj=None):
1947 """Add the TarInfo object `tarinfo' to the archive. If `fileobj' is
1948 given, tarinfo.size bytes are read from it and added to the archive.
1949 You can create TarInfo objects using gettarinfo().
1950 On Windows platforms, `fileobj' should always be opened with mode
1951 'rb' to avoid irritation about the file size.
1952 """
1953 self._check("aw")
1954
Thomas Wouters89f507f2006-12-13 04:49:30 +00001955 tarinfo = copy.copy(tarinfo)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001956
Guido van Rossumd8faa362007-04-27 19:54:29 +00001957 buf = tarinfo.tobuf(self.format, self.encoding)
Thomas Wouters89f507f2006-12-13 04:49:30 +00001958 self.fileobj.write(buf)
1959 self.offset += len(buf)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001960
1961 # If there's data to follow, append it.
1962 if fileobj is not None:
1963 copyfileobj(fileobj, self.fileobj, tarinfo.size)
1964 blocks, remainder = divmod(tarinfo.size, BLOCKSIZE)
1965 if remainder > 0:
1966 self.fileobj.write(NUL * (BLOCKSIZE - remainder))
1967 blocks += 1
1968 self.offset += blocks * BLOCKSIZE
1969
Martin v. Löwisf3c56112004-09-18 09:08:52 +00001970 self.members.append(tarinfo)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001971
Martin v. Löwis00a73e72005-03-04 19:40:34 +00001972 def extractall(self, path=".", members=None):
1973 """Extract all members from the archive to the current working
1974 directory and set owner, modification time and permissions on
1975 directories afterwards. `path' specifies a different directory
1976 to extract to. `members' is optional and must be a subset of the
1977 list returned by getmembers().
1978 """
1979 directories = []
1980
1981 if members is None:
1982 members = self
1983
1984 for tarinfo in members:
1985 if tarinfo.isdir():
1986 # Extract directory with a safe mode, so that
1987 # all files below can be extracted as well.
1988 try:
Guido van Rossumd8faa362007-04-27 19:54:29 +00001989 os.makedirs(os.path.join(path, tarinfo.name), 0700)
Martin v. Löwis00a73e72005-03-04 19:40:34 +00001990 except EnvironmentError:
1991 pass
1992 directories.append(tarinfo)
1993 else:
1994 self.extract(tarinfo, path)
1995
1996 # Reverse sort directories.
1997 directories.sort(lambda a, b: cmp(a.name, b.name))
1998 directories.reverse()
1999
2000 # Set correct owner, mtime and filemode on directories.
2001 for tarinfo in directories:
2002 path = os.path.join(path, tarinfo.name)
2003 try:
2004 self.chown(tarinfo, path)
2005 self.utime(tarinfo, path)
2006 self.chmod(tarinfo, path)
Guido van Rossumb940e112007-01-10 16:19:56 +00002007 except ExtractError as e:
Martin v. Löwis00a73e72005-03-04 19:40:34 +00002008 if self.errorlevel > 1:
2009 raise
2010 else:
2011 self._dbg(1, "tarfile: %s" % e)
2012
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002013 def extract(self, member, path=""):
2014 """Extract a member from the archive to the current working directory,
2015 using its full name. Its file information is extracted as accurately
2016 as possible. `member' may be a filename or a TarInfo object. You can
2017 specify a different directory using `path'.
2018 """
2019 self._check("r")
2020
Guido van Rossumd8faa362007-04-27 19:54:29 +00002021 if isinstance(member, basestring):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002022 tarinfo = self.getmember(member)
Guido van Rossumd8faa362007-04-27 19:54:29 +00002023 else:
2024 tarinfo = member
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002025
Neal Norwitza4f651a2004-07-20 22:07:44 +00002026 # Prepare the link target for makelink().
2027 if tarinfo.islnk():
2028 tarinfo._link_target = os.path.join(path, tarinfo.linkname)
2029
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002030 try:
2031 self._extract_member(tarinfo, os.path.join(path, tarinfo.name))
Guido van Rossumb940e112007-01-10 16:19:56 +00002032 except EnvironmentError as e:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002033 if self.errorlevel > 0:
2034 raise
2035 else:
2036 if e.filename is None:
2037 self._dbg(1, "tarfile: %s" % e.strerror)
2038 else:
2039 self._dbg(1, "tarfile: %s %r" % (e.strerror, e.filename))
Guido van Rossumb940e112007-01-10 16:19:56 +00002040 except ExtractError as e:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002041 if self.errorlevel > 1:
2042 raise
2043 else:
2044 self._dbg(1, "tarfile: %s" % e)
2045
2046 def extractfile(self, member):
2047 """Extract a member from the archive as a file object. `member' may be
2048 a filename or a TarInfo object. If `member' is a regular file, a
2049 file-like object is returned. If `member' is a link, a file-like
2050 object is constructed from the link's target. If `member' is none of
2051 the above, None is returned.
2052 The file-like object is read-only and provides the following
2053 methods: read(), readline(), readlines(), seek() and tell()
2054 """
2055 self._check("r")
2056
Guido van Rossumd8faa362007-04-27 19:54:29 +00002057 if isinstance(member, basestring):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002058 tarinfo = self.getmember(member)
Guido van Rossumd8faa362007-04-27 19:54:29 +00002059 else:
2060 tarinfo = member
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002061
2062 if tarinfo.isreg():
2063 return self.fileobject(self, tarinfo)
2064
2065 elif tarinfo.type not in SUPPORTED_TYPES:
2066 # If a member's type is unknown, it is treated as a
2067 # regular file.
2068 return self.fileobject(self, tarinfo)
2069
2070 elif tarinfo.islnk() or tarinfo.issym():
2071 if isinstance(self.fileobj, _Stream):
2072 # A small but ugly workaround for the case that someone tries
2073 # to extract a (sym)link as a file-object from a non-seekable
2074 # stream of tar blocks.
Thomas Wouters477c8d52006-05-27 19:21:47 +00002075 raise StreamError("cannot extract (sym)link as file object")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002076 else:
Georg Brandl7eb4b7d2005-07-22 21:49:32 +00002077 # A (sym)link's file object is its target's file object.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002078 return self.extractfile(self._getmember(tarinfo.linkname,
2079 tarinfo))
2080 else:
2081 # If there's no data associated with the member (directory, chrdev,
2082 # blkdev, etc.), return None instead of a file object.
2083 return None
2084
2085 def _extract_member(self, tarinfo, targetpath):
2086 """Extract the TarInfo object tarinfo to a physical
2087 file called targetpath.
2088 """
2089 # Fetch the TarInfo object for the given name
2090 # and build the destination pathname, replacing
2091 # forward slashes to platform specific separators.
2092 if targetpath[-1:] == "/":
2093 targetpath = targetpath[:-1]
2094 targetpath = os.path.normpath(targetpath)
2095
2096 # Create all upper directories.
2097 upperdirs = os.path.dirname(targetpath)
2098 if upperdirs and not os.path.exists(upperdirs):
Thomas Woutersb2137042007-02-01 18:02:27 +00002099 os.makedirs(upperdirs)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002100
2101 if tarinfo.islnk() or tarinfo.issym():
2102 self._dbg(1, "%s -> %s" % (tarinfo.name, tarinfo.linkname))
2103 else:
2104 self._dbg(1, tarinfo.name)
2105
2106 if tarinfo.isreg():
2107 self.makefile(tarinfo, targetpath)
2108 elif tarinfo.isdir():
2109 self.makedir(tarinfo, targetpath)
2110 elif tarinfo.isfifo():
2111 self.makefifo(tarinfo, targetpath)
2112 elif tarinfo.ischr() or tarinfo.isblk():
2113 self.makedev(tarinfo, targetpath)
2114 elif tarinfo.islnk() or tarinfo.issym():
2115 self.makelink(tarinfo, targetpath)
2116 elif tarinfo.type not in SUPPORTED_TYPES:
2117 self.makeunknown(tarinfo, targetpath)
2118 else:
2119 self.makefile(tarinfo, targetpath)
2120
2121 self.chown(tarinfo, targetpath)
2122 if not tarinfo.issym():
2123 self.chmod(tarinfo, targetpath)
2124 self.utime(tarinfo, targetpath)
2125
2126 #--------------------------------------------------------------------------
2127 # Below are the different file methods. They are called via
2128 # _extract_member() when extract() is called. They can be replaced in a
2129 # subclass to implement other functionality.
2130
2131 def makedir(self, tarinfo, targetpath):
2132 """Make a directory called targetpath.
2133 """
2134 try:
2135 os.mkdir(targetpath)
Guido van Rossumb940e112007-01-10 16:19:56 +00002136 except EnvironmentError as e:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002137 if e.errno != errno.EEXIST:
2138 raise
2139
2140 def makefile(self, tarinfo, targetpath):
2141 """Make a file called targetpath.
2142 """
2143 source = self.extractfile(tarinfo)
Guido van Rossum8f78fe92006-08-24 04:03:53 +00002144 target = _open(targetpath, "wb")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002145 copyfileobj(source, target)
2146 source.close()
2147 target.close()
2148
2149 def makeunknown(self, tarinfo, targetpath):
2150 """Make a file from a TarInfo object with an unknown type
2151 at targetpath.
2152 """
2153 self.makefile(tarinfo, targetpath)
2154 self._dbg(1, "tarfile: Unknown file type %r, " \
2155 "extracted as regular file." % tarinfo.type)
2156
2157 def makefifo(self, tarinfo, targetpath):
2158 """Make a fifo called targetpath.
2159 """
2160 if hasattr(os, "mkfifo"):
2161 os.mkfifo(targetpath)
2162 else:
Thomas Wouters477c8d52006-05-27 19:21:47 +00002163 raise ExtractError("fifo not supported by system")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002164
2165 def makedev(self, tarinfo, targetpath):
2166 """Make a character or block device called targetpath.
2167 """
2168 if not hasattr(os, "mknod") or not hasattr(os, "makedev"):
Thomas Wouters477c8d52006-05-27 19:21:47 +00002169 raise ExtractError("special devices not supported by system")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002170
2171 mode = tarinfo.mode
2172 if tarinfo.isblk():
2173 mode |= stat.S_IFBLK
2174 else:
2175 mode |= stat.S_IFCHR
2176
2177 os.mknod(targetpath, mode,
2178 os.makedev(tarinfo.devmajor, tarinfo.devminor))
2179
2180 def makelink(self, tarinfo, targetpath):
2181 """Make a (symbolic) link called targetpath. If it cannot be created
2182 (platform limitation), we try to make a copy of the referenced file
2183 instead of a link.
2184 """
2185 linkpath = tarinfo.linkname
2186 try:
2187 if tarinfo.issym():
2188 os.symlink(linkpath, targetpath)
2189 else:
Neal Norwitza4f651a2004-07-20 22:07:44 +00002190 # See extract().
2191 os.link(tarinfo._link_target, targetpath)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002192 except AttributeError:
2193 if tarinfo.issym():
2194 linkpath = os.path.join(os.path.dirname(tarinfo.name),
2195 linkpath)
2196 linkpath = normpath(linkpath)
2197
2198 try:
2199 self._extract_member(self.getmember(linkpath), targetpath)
Guido van Rossumb940e112007-01-10 16:19:56 +00002200 except (EnvironmentError, KeyError) as e:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002201 linkpath = os.path.normpath(linkpath)
2202 try:
2203 shutil.copy2(linkpath, targetpath)
Guido van Rossumb940e112007-01-10 16:19:56 +00002204 except EnvironmentError as e:
Thomas Wouters477c8d52006-05-27 19:21:47 +00002205 raise IOError("link could not be created")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002206
2207 def chown(self, tarinfo, targetpath):
2208 """Set owner of targetpath according to tarinfo.
2209 """
2210 if pwd and hasattr(os, "geteuid") and os.geteuid() == 0:
2211 # We have to be root to do so.
2212 try:
2213 g = grp.getgrnam(tarinfo.gname)[2]
2214 except KeyError:
2215 try:
2216 g = grp.getgrgid(tarinfo.gid)[2]
2217 except KeyError:
2218 g = os.getgid()
2219 try:
2220 u = pwd.getpwnam(tarinfo.uname)[2]
2221 except KeyError:
2222 try:
2223 u = pwd.getpwuid(tarinfo.uid)[2]
2224 except KeyError:
2225 u = os.getuid()
2226 try:
2227 if tarinfo.issym() and hasattr(os, "lchown"):
2228 os.lchown(targetpath, u, g)
2229 else:
Andrew MacIntyre7970d202003-02-19 12:51:34 +00002230 if sys.platform != "os2emx":
2231 os.chown(targetpath, u, g)
Guido van Rossumb940e112007-01-10 16:19:56 +00002232 except EnvironmentError as e:
Thomas Wouters477c8d52006-05-27 19:21:47 +00002233 raise ExtractError("could not change owner")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002234
2235 def chmod(self, tarinfo, targetpath):
2236 """Set file permissions of targetpath according to tarinfo.
2237 """
Jack Jansen834eff62003-03-07 12:47:06 +00002238 if hasattr(os, 'chmod'):
2239 try:
2240 os.chmod(targetpath, tarinfo.mode)
Guido van Rossumb940e112007-01-10 16:19:56 +00002241 except EnvironmentError as e:
Thomas Wouters477c8d52006-05-27 19:21:47 +00002242 raise ExtractError("could not change mode")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002243
2244 def utime(self, tarinfo, targetpath):
2245 """Set modification time of targetpath according to tarinfo.
2246 """
Jack Jansen834eff62003-03-07 12:47:06 +00002247 if not hasattr(os, 'utime'):
Tim Petersf9347782003-03-07 15:36:41 +00002248 return
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002249 if sys.platform == "win32" and tarinfo.isdir():
2250 # According to msdn.microsoft.com, it is an error (EACCES)
2251 # to use utime() on directories.
2252 return
2253 try:
2254 os.utime(targetpath, (tarinfo.mtime, tarinfo.mtime))
Guido van Rossumb940e112007-01-10 16:19:56 +00002255 except EnvironmentError as e:
Thomas Wouters477c8d52006-05-27 19:21:47 +00002256 raise ExtractError("could not change modification time")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002257
2258 #--------------------------------------------------------------------------
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002259 def next(self):
2260 """Return the next member of the archive as a TarInfo object, when
2261 TarFile is opened for reading. Return None if there is no more
2262 available.
2263 """
2264 self._check("ra")
2265 if self.firstmember is not None:
2266 m = self.firstmember
2267 self.firstmember = None
2268 return m
2269
2270 # Read the next block.
Andrew M. Kuchling864bba12004-07-10 22:02:11 +00002271 self.fileobj.seek(self.offset)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002272 while True:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002273 try:
Guido van Rossumd8faa362007-04-27 19:54:29 +00002274 tarinfo = self.tarinfo.fromtarfile(self)
2275 if tarinfo is None:
2276 return
2277 self.members.append(tarinfo)
Thomas Wouters477c8d52006-05-27 19:21:47 +00002278
Guido van Rossumb940e112007-01-10 16:19:56 +00002279 except HeaderError as e:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002280 if self.ignore_zeros:
Thomas Wouters902d6eb2007-01-09 23:18:33 +00002281 self._dbg(2, "0x%X: %s" % (self.offset, e))
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002282 self.offset += BLOCKSIZE
2283 continue
2284 else:
Andrew M. Kuchling864bba12004-07-10 22:02:11 +00002285 if self.offset == 0:
Thomas Wouters902d6eb2007-01-09 23:18:33 +00002286 raise ReadError(str(e))
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002287 return None
2288 break
2289
Thomas Wouters477c8d52006-05-27 19:21:47 +00002290 return tarinfo
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002291
2292 #--------------------------------------------------------------------------
2293 # Little helper methods:
2294
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002295 def _getmember(self, name, tarinfo=None):
2296 """Find an archive member by name from bottom to top.
2297 If tarinfo is given, it is used as the starting point.
2298 """
Martin v. Löwisf3c56112004-09-18 09:08:52 +00002299 # Ensure that all members have been loaded.
2300 members = self.getmembers()
2301
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002302 if tarinfo is None:
Martin v. Löwisf3c56112004-09-18 09:08:52 +00002303 end = len(members)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002304 else:
Martin v. Löwisf3c56112004-09-18 09:08:52 +00002305 end = members.index(tarinfo)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002306
2307 for i in xrange(end - 1, -1, -1):
Martin v. Löwisf3c56112004-09-18 09:08:52 +00002308 if name == members[i].name:
2309 return members[i]
Andrew M. Kuchling864bba12004-07-10 22:02:11 +00002310
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002311 def _load(self):
2312 """Read through the entire archive file and look for readable
2313 members.
2314 """
2315 while True:
2316 tarinfo = self.next()
2317 if tarinfo is None:
2318 break
2319 self._loaded = True
2320
2321 def _check(self, mode=None):
2322 """Check if TarFile is still open, and if the operation's mode
2323 corresponds to TarFile's mode.
2324 """
2325 if self.closed:
Thomas Wouters477c8d52006-05-27 19:21:47 +00002326 raise IOError("%s is closed" % self.__class__.__name__)
Guido van Rossumd8faa362007-04-27 19:54:29 +00002327 if mode is not None and self.mode not in mode:
2328 raise IOError("bad operation for mode %r" % self.mode)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002329
2330 def __iter__(self):
2331 """Provide an iterator object.
2332 """
2333 if self._loaded:
2334 return iter(self.members)
2335 else:
2336 return TarIter(self)
2337
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002338 def _dbg(self, level, msg):
2339 """Write debugging output to sys.stderr.
2340 """
2341 if level <= self.debug:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00002342 print(msg, file=sys.stderr)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002343# class TarFile
2344
2345class TarIter:
2346 """Iterator Class.
2347
2348 for tarinfo in TarFile(...):
2349 suite...
2350 """
2351
2352 def __init__(self, tarfile):
2353 """Construct a TarIter object.
2354 """
2355 self.tarfile = tarfile
Martin v. Löwis637431b2005-03-03 23:12:42 +00002356 self.index = 0
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002357 def __iter__(self):
2358 """Return iterator object.
2359 """
2360 return self
Georg Brandla18af4e2007-04-21 15:47:16 +00002361 def __next__(self):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002362 """Return the next item using TarFile's next() method.
2363 When all members have been read, set TarFile as _loaded.
2364 """
Martin v. Löwis637431b2005-03-03 23:12:42 +00002365 # Fix for SF #1100429: Under rare circumstances it can
2366 # happen that getmembers() is called during iteration,
2367 # which will cause TarIter to stop prematurely.
2368 if not self.tarfile._loaded:
2369 tarinfo = self.tarfile.next()
2370 if not tarinfo:
2371 self.tarfile._loaded = True
2372 raise StopIteration
2373 else:
2374 try:
2375 tarinfo = self.tarfile.members[self.index]
2376 except IndexError:
2377 raise StopIteration
2378 self.index += 1
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002379 return tarinfo
2380
2381# Helper classes for sparse file support
2382class _section:
2383 """Base class for _data and _hole.
2384 """
2385 def __init__(self, offset, size):
2386 self.offset = offset
2387 self.size = size
2388 def __contains__(self, offset):
2389 return self.offset <= offset < self.offset + self.size
2390
2391class _data(_section):
2392 """Represent a data section in a sparse file.
2393 """
2394 def __init__(self, offset, size, realpos):
2395 _section.__init__(self, offset, size)
2396 self.realpos = realpos
2397
2398class _hole(_section):
2399 """Represent a hole section in a sparse file.
2400 """
2401 pass
2402
2403class _ringbuffer(list):
2404 """Ringbuffer class which increases performance
2405 over a regular list.
2406 """
2407 def __init__(self):
2408 self.idx = 0
2409 def find(self, offset):
2410 idx = self.idx
2411 while True:
2412 item = self[idx]
2413 if offset in item:
2414 break
2415 idx += 1
2416 if idx == len(self):
2417 idx = 0
2418 if idx == self.idx:
2419 # End of File
2420 return None
2421 self.idx = idx
2422 return item
2423
2424#---------------------------------------------
2425# zipfile compatible TarFile class
2426#---------------------------------------------
2427TAR_PLAIN = 0 # zipfile.ZIP_STORED
2428TAR_GZIPPED = 8 # zipfile.ZIP_DEFLATED
2429class TarFileCompat:
2430 """TarFile class compatible with standard module zipfile's
2431 ZipFile class.
2432 """
2433 def __init__(self, file, mode="r", compression=TAR_PLAIN):
2434 if compression == TAR_PLAIN:
2435 self.tarfile = TarFile.taropen(file, mode)
2436 elif compression == TAR_GZIPPED:
2437 self.tarfile = TarFile.gzopen(file, mode)
2438 else:
Thomas Wouters477c8d52006-05-27 19:21:47 +00002439 raise ValueError("unknown compression constant")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002440 if mode[0:1] == "r":
2441 members = self.tarfile.getmembers()
Raymond Hettingera1d09e22005-09-11 16:34:05 +00002442 for m in members:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002443 m.filename = m.name
2444 m.file_size = m.size
2445 m.date_time = time.gmtime(m.mtime)[:6]
2446 def namelist(self):
2447 return map(lambda m: m.name, self.infolist())
2448 def infolist(self):
2449 return filter(lambda m: m.type in REGULAR_TYPES,
2450 self.tarfile.getmembers())
2451 def printdir(self):
2452 self.tarfile.list()
2453 def testzip(self):
2454 return
2455 def getinfo(self, name):
2456 return self.tarfile.getmember(name)
2457 def read(self, name):
2458 return self.tarfile.extractfile(self.tarfile.getmember(name)).read()
2459 def write(self, filename, arcname=None, compress_type=None):
2460 self.tarfile.add(filename, arcname)
2461 def writestr(self, zinfo, bytes):
Raymond Hettingera6172712004-12-31 19:15:26 +00002462 try:
2463 from cStringIO import StringIO
2464 except ImportError:
2465 from StringIO import StringIO
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002466 import calendar
2467 zinfo.name = zinfo.filename
2468 zinfo.size = zinfo.file_size
2469 zinfo.mtime = calendar.timegm(zinfo.date_time)
Raymond Hettingera6172712004-12-31 19:15:26 +00002470 self.tarfile.addfile(zinfo, StringIO(bytes))
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002471 def close(self):
2472 self.tarfile.close()
2473#class TarFileCompat
2474
2475#--------------------
2476# exported functions
2477#--------------------
2478def is_tarfile(name):
2479 """Return True if name points to a tar archive that we
2480 are able to handle, else return False.
2481 """
2482 try:
2483 t = open(name)
2484 t.close()
2485 return True
2486 except TarError:
2487 return False
2488
2489open = TarFile.open