blob: 2f05618bed4e7b516ef7f1d18faed6c7be820480 [file] [log] [blame]
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001#!/usr/bin/env python
2# -*- coding: iso-8859-1 -*-
3#-------------------------------------------------------------------
4# tarfile.py
5#-------------------------------------------------------------------
6# Copyright (C) 2002 Lars Gustäbel <lars@gustaebel.de>
7# All rights reserved.
8#
9# Permission is hereby granted, free of charge, to any person
10# obtaining a copy of this software and associated documentation
11# files (the "Software"), to deal in the Software without
12# restriction, including without limitation the rights to use,
13# copy, modify, merge, publish, distribute, sublicense, and/or sell
14# copies of the Software, and to permit persons to whom the
15# Software is furnished to do so, subject to the following
16# conditions:
17#
18# The above copyright notice and this permission notice shall be
19# included in all copies or substantial portions of the Software.
20#
21# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
22# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
23# OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
24# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
25# HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
26# WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
27# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
28# OTHER DEALINGS IN THE SOFTWARE.
29#
30"""Read from and write to tar format archives.
31"""
32
33__version__ = "$Revision$"
34# $Source$
35
Guido van Rossumd8faa362007-04-27 19:54:29 +000036version = "0.9.0"
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +000037__author__ = "Lars Gustäbel (lars@gustaebel.de)"
38__date__ = "$Date$"
39__cvsid__ = "$Id$"
40__credits__ = "Gustavo Niemeyer, Niels Gustäbel, Richard Townsend."
41
42#---------
43# Imports
44#---------
45import sys
46import os
47import shutil
48import stat
49import errno
50import time
51import struct
Thomas Wouters89f507f2006-12-13 04:49:30 +000052import copy
Guido van Rossumd8faa362007-04-27 19:54:29 +000053import re
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +000054
Jack Jansencfc49022003-03-07 13:37:32 +000055if sys.platform == 'mac':
56 # This module needs work for MacOS9, especially in the area of pathname
57 # handling. In many places it is assumed a simple substitution of / by the
58 # local os.path.sep is good enough to convert pathnames, but this does not
59 # work with the mac rooted:path:name versus :nonrooted:path:name syntax
60 raise ImportError, "tarfile does not work for platform==mac"
61
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +000062try:
63 import grp, pwd
64except ImportError:
65 grp = pwd = None
66
67# from tarfile import *
68__all__ = ["TarFile", "TarInfo", "is_tarfile", "TarError"]
69
Guido van Rossum8f78fe92006-08-24 04:03:53 +000070from __builtin__ import open as _open # Since 'open' is TarFile.open
71
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +000072#---------------------------------------------------------
73# tar constants
74#---------------------------------------------------------
Lars Gustäbelb506dc32007-08-07 18:36:16 +000075NUL = b"\0" # the null character
Guido van Rossumd8faa362007-04-27 19:54:29 +000076BLOCKSIZE = 512 # length of processing blocks
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +000077RECORDSIZE = BLOCKSIZE * 20 # length of records
Lars Gustäbelb506dc32007-08-07 18:36:16 +000078GNU_MAGIC = b"ustar \0" # magic gnu tar string
79POSIX_MAGIC = b"ustar\x0000" # magic posix tar string
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +000080
Guido van Rossumd8faa362007-04-27 19:54:29 +000081LENGTH_NAME = 100 # maximum length of a filename
82LENGTH_LINK = 100 # maximum length of a linkname
83LENGTH_PREFIX = 155 # maximum length of the prefix field
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +000084
Lars Gustäbelb506dc32007-08-07 18:36:16 +000085REGTYPE = b"0" # regular file
86AREGTYPE = b"\0" # regular file
87LNKTYPE = b"1" # link (inside tarfile)
88SYMTYPE = b"2" # symbolic link
89CHRTYPE = b"3" # character special device
90BLKTYPE = b"4" # block special device
91DIRTYPE = b"5" # directory
92FIFOTYPE = b"6" # fifo special device
93CONTTYPE = b"7" # contiguous file
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +000094
Lars Gustäbelb506dc32007-08-07 18:36:16 +000095GNUTYPE_LONGNAME = b"L" # GNU tar longname
96GNUTYPE_LONGLINK = b"K" # GNU tar longlink
97GNUTYPE_SPARSE = b"S" # GNU tar sparse file
Guido van Rossumd8faa362007-04-27 19:54:29 +000098
Lars Gustäbelb506dc32007-08-07 18:36:16 +000099XHDTYPE = b"x" # POSIX.1-2001 extended header
100XGLTYPE = b"g" # POSIX.1-2001 global header
101SOLARIS_XHDTYPE = b"X" # Solaris extended header
Guido van Rossumd8faa362007-04-27 19:54:29 +0000102
103USTAR_FORMAT = 0 # POSIX.1-1988 (ustar) format
104GNU_FORMAT = 1 # GNU tar format
105PAX_FORMAT = 2 # POSIX.1-2001 (pax) format
106DEFAULT_FORMAT = GNU_FORMAT
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000107
108#---------------------------------------------------------
109# tarfile constants
110#---------------------------------------------------------
Guido van Rossumd8faa362007-04-27 19:54:29 +0000111# File types that tarfile supports:
112SUPPORTED_TYPES = (REGTYPE, AREGTYPE, LNKTYPE,
113 SYMTYPE, DIRTYPE, FIFOTYPE,
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000114 CONTTYPE, CHRTYPE, BLKTYPE,
115 GNUTYPE_LONGNAME, GNUTYPE_LONGLINK,
116 GNUTYPE_SPARSE)
117
Guido van Rossumd8faa362007-04-27 19:54:29 +0000118# File types that will be treated as a regular file.
119REGULAR_TYPES = (REGTYPE, AREGTYPE,
120 CONTTYPE, GNUTYPE_SPARSE)
121
122# File types that are part of the GNU tar format.
123GNU_TYPES = (GNUTYPE_LONGNAME, GNUTYPE_LONGLINK,
124 GNUTYPE_SPARSE)
125
126# Fields from a pax header that override a TarInfo attribute.
127PAX_FIELDS = ("path", "linkpath", "size", "mtime",
128 "uid", "gid", "uname", "gname")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000129
Guido van Rossume7ba4952007-06-06 23:52:48 +0000130# Fields in a pax header that are numbers, all other fields
131# are treated as strings.
132PAX_NUMBER_FIELDS = {
133 "atime": float,
134 "ctime": float,
135 "mtime": float,
136 "uid": int,
137 "gid": int,
138 "size": int
139}
140
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000141#---------------------------------------------------------
142# Bits used in the mode field, values in octal.
143#---------------------------------------------------------
Guido van Rossumcd16bf62007-06-13 18:07:49 +0000144S_IFLNK = 0o120000 # symbolic link
145S_IFREG = 0o100000 # regular file
146S_IFBLK = 0o060000 # block device
147S_IFDIR = 0o040000 # directory
148S_IFCHR = 0o020000 # character device
149S_IFIFO = 0o010000 # fifo
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000150
Guido van Rossumcd16bf62007-06-13 18:07:49 +0000151TSUID = 0o4000 # set UID on execution
152TSGID = 0o2000 # set GID on execution
153TSVTX = 0o1000 # reserved
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000154
Guido van Rossumcd16bf62007-06-13 18:07:49 +0000155TUREAD = 0o400 # read by owner
156TUWRITE = 0o200 # write by owner
157TUEXEC = 0o100 # execute/search by owner
158TGREAD = 0o040 # read by group
159TGWRITE = 0o020 # write by group
160TGEXEC = 0o010 # execute/search by group
161TOREAD = 0o004 # read by other
162TOWRITE = 0o002 # write by other
163TOEXEC = 0o001 # execute/search by other
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000164
165#---------------------------------------------------------
Guido van Rossumd8faa362007-04-27 19:54:29 +0000166# initialization
167#---------------------------------------------------------
168ENCODING = sys.getfilesystemencoding()
169if ENCODING is None:
Guido van Rossume7ba4952007-06-06 23:52:48 +0000170 ENCODING = sys.getdefaultencoding()
Guido van Rossumd8faa362007-04-27 19:54:29 +0000171
172#---------------------------------------------------------
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000173# Some useful functions
174#---------------------------------------------------------
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000175
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000176def stn(s, length, encoding, errors):
177 """Convert a string to a null-terminated bytes object.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000178 """
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000179 s = s.encode(encoding, errors)
Thomas Wouters89f507f2006-12-13 04:49:30 +0000180 return s[:length] + (length - len(s)) * NUL
Thomas Wouters477c8d52006-05-27 19:21:47 +0000181
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000182def nts(s, encoding, errors):
183 """Convert a null-terminated bytes object to a string.
Guido van Rossumd8faa362007-04-27 19:54:29 +0000184 """
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000185 p = s.find(b"\0")
186 if p != -1:
187 s = s[:p]
188 return s.decode(encoding, errors)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000189
Thomas Wouters477c8d52006-05-27 19:21:47 +0000190def nti(s):
191 """Convert a number field to a python number.
192 """
193 # There are two possible encodings for a number field, see
194 # itn() below.
Guido van Rossumcd16bf62007-06-13 18:07:49 +0000195 if s[0] != chr(0o200):
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000196 try:
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000197 n = int(nts(s, "ascii", "strict") or "0", 8)
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000198 except ValueError:
199 raise HeaderError("invalid header")
Thomas Wouters477c8d52006-05-27 19:21:47 +0000200 else:
Guido van Rossume2a383d2007-01-15 16:59:06 +0000201 n = 0
Guido van Rossum805365e2007-05-07 22:24:25 +0000202 for i in range(len(s) - 1):
Thomas Wouters477c8d52006-05-27 19:21:47 +0000203 n <<= 8
204 n += ord(s[i + 1])
205 return n
206
Guido van Rossumd8faa362007-04-27 19:54:29 +0000207def itn(n, digits=8, format=DEFAULT_FORMAT):
Thomas Wouters477c8d52006-05-27 19:21:47 +0000208 """Convert a python number to a number field.
209 """
210 # POSIX 1003.1-1988 requires numbers to be encoded as a string of
211 # octal digits followed by a null-byte, this allows values up to
212 # (8**(digits-1))-1. GNU tar allows storing numbers greater than
Guido van Rossumcd16bf62007-06-13 18:07:49 +0000213 # that if necessary. A leading 0o200 byte indicates this particular
Thomas Wouters477c8d52006-05-27 19:21:47 +0000214 # encoding, the following digits-1 bytes are a big-endian
215 # representation. This allows values up to (256**(digits-1))-1.
216 if 0 <= n < 8 ** (digits - 1):
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000217 s = bytes("%0*o" % (digits - 1, n)) + NUL
Thomas Wouters477c8d52006-05-27 19:21:47 +0000218 else:
Guido van Rossumd8faa362007-04-27 19:54:29 +0000219 if format != GNU_FORMAT or n >= 256 ** (digits - 1):
Thomas Wouters477c8d52006-05-27 19:21:47 +0000220 raise ValueError("overflow in number field")
221
222 if n < 0:
223 # XXX We mimic GNU tar's behaviour with negative numbers,
224 # this could raise OverflowError.
225 n = struct.unpack("L", struct.pack("l", n))[0]
226
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000227 s = b""
Guido van Rossum805365e2007-05-07 22:24:25 +0000228 for i in range(digits - 1):
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000229 s.insert(0, n & 0o377)
Thomas Wouters477c8d52006-05-27 19:21:47 +0000230 n >>= 8
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000231 s.insert(0, 0o200)
Thomas Wouters477c8d52006-05-27 19:21:47 +0000232 return s
233
234def calc_chksums(buf):
235 """Calculate the checksum for a member's header by summing up all
236 characters except for the chksum field which is treated as if
237 it was filled with spaces. According to the GNU tar sources,
238 some tars (Sun and NeXT) calculate chksum with signed char,
239 which will be different if there are chars in the buffer with
240 the high bit set. So we calculate two checksums, unsigned and
241 signed.
242 """
243 unsigned_chksum = 256 + sum(struct.unpack("148B", buf[:148]) + struct.unpack("356B", buf[156:512]))
244 signed_chksum = 256 + sum(struct.unpack("148b", buf[:148]) + struct.unpack("356b", buf[156:512]))
245 return unsigned_chksum, signed_chksum
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000246
247def copyfileobj(src, dst, length=None):
248 """Copy length bytes from fileobj src to fileobj dst.
249 If length is None, copy the entire content.
250 """
251 if length == 0:
252 return
253 if length is None:
254 shutil.copyfileobj(src, dst)
255 return
256
257 BUFSIZE = 16 * 1024
258 blocks, remainder = divmod(length, BUFSIZE)
Guido van Rossum805365e2007-05-07 22:24:25 +0000259 for b in range(blocks):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000260 buf = src.read(BUFSIZE)
261 if len(buf) < BUFSIZE:
Thomas Wouters477c8d52006-05-27 19:21:47 +0000262 raise IOError("end of file reached")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000263 dst.write(buf)
264
265 if remainder != 0:
266 buf = src.read(remainder)
267 if len(buf) < remainder:
Thomas Wouters477c8d52006-05-27 19:21:47 +0000268 raise IOError("end of file reached")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000269 dst.write(buf)
270 return
271
272filemode_table = (
Andrew M. Kuchling8bc462f2004-10-20 11:48:42 +0000273 ((S_IFLNK, "l"),
274 (S_IFREG, "-"),
275 (S_IFBLK, "b"),
276 (S_IFDIR, "d"),
277 (S_IFCHR, "c"),
278 (S_IFIFO, "p")),
279
280 ((TUREAD, "r"),),
281 ((TUWRITE, "w"),),
282 ((TUEXEC|TSUID, "s"),
283 (TSUID, "S"),
284 (TUEXEC, "x")),
285
286 ((TGREAD, "r"),),
287 ((TGWRITE, "w"),),
288 ((TGEXEC|TSGID, "s"),
289 (TSGID, "S"),
290 (TGEXEC, "x")),
291
292 ((TOREAD, "r"),),
293 ((TOWRITE, "w"),),
294 ((TOEXEC|TSVTX, "t"),
295 (TSVTX, "T"),
296 (TOEXEC, "x"))
297)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000298
299def filemode(mode):
300 """Convert a file's mode to a string of the form
301 -rwxrwxrwx.
302 Used by TarFile.list()
303 """
Andrew M. Kuchling8bc462f2004-10-20 11:48:42 +0000304 perm = []
305 for table in filemode_table:
306 for bit, char in table:
307 if mode & bit == bit:
308 perm.append(char)
309 break
310 else:
311 perm.append("-")
312 return "".join(perm)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000313
314if os.sep != "/":
315 normpath = lambda path: os.path.normpath(path).replace(os.sep, "/")
316else:
317 normpath = os.path.normpath
318
319class TarError(Exception):
320 """Base exception."""
321 pass
322class ExtractError(TarError):
323 """General exception for extract errors."""
324 pass
325class ReadError(TarError):
326 """Exception for unreadble tar archives."""
327 pass
328class CompressionError(TarError):
329 """Exception for unavailable compression methods."""
330 pass
331class StreamError(TarError):
332 """Exception for unsupported operations on stream-like TarFiles."""
333 pass
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000334class HeaderError(TarError):
335 """Exception for invalid headers."""
336 pass
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000337
338#---------------------------
339# internal stream interface
340#---------------------------
341class _LowLevelFile:
342 """Low-level file object. Supports reading and writing.
343 It is used instead of a regular file object for streaming
344 access.
345 """
346
347 def __init__(self, name, mode):
348 mode = {
349 "r": os.O_RDONLY,
350 "w": os.O_WRONLY | os.O_CREAT | os.O_TRUNC,
351 }[mode]
352 if hasattr(os, "O_BINARY"):
353 mode |= os.O_BINARY
354 self.fd = os.open(name, mode)
355
356 def close(self):
357 os.close(self.fd)
358
359 def read(self, size):
360 return os.read(self.fd, size)
361
362 def write(self, s):
363 os.write(self.fd, s)
364
365class _Stream:
366 """Class that serves as an adapter between TarFile and
367 a stream-like object. The stream-like object only
368 needs to have a read() or write() method and is accessed
369 blockwise. Use of gzip or bzip2 compression is possible.
370 A stream-like object could be for example: sys.stdin,
371 sys.stdout, a socket, a tape device etc.
372
373 _Stream is intended to be used only internally.
374 """
375
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000376 def __init__(self, name, mode, comptype, fileobj, bufsize):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000377 """Construct a _Stream object.
378 """
379 self._extfileobj = True
380 if fileobj is None:
381 fileobj = _LowLevelFile(name, mode)
382 self._extfileobj = False
383
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000384 if comptype == '*':
385 # Enable transparent compression detection for the
386 # stream interface
387 fileobj = _StreamProxy(fileobj)
388 comptype = fileobj.getcomptype()
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000389
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000390 self.name = name or ""
391 self.mode = mode
392 self.comptype = comptype
393 self.fileobj = fileobj
394 self.bufsize = bufsize
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000395 self.buf = b""
Guido van Rossume2a383d2007-01-15 16:59:06 +0000396 self.pos = 0
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000397 self.closed = False
398
399 if comptype == "gz":
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000400 try:
401 import zlib
402 except ImportError:
Thomas Wouters477c8d52006-05-27 19:21:47 +0000403 raise CompressionError("zlib module is not available")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000404 self.zlib = zlib
405 self.crc = zlib.crc32("")
406 if mode == "r":
407 self._init_read_gz()
408 else:
409 self._init_write_gz()
410
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000411 if comptype == "bz2":
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000412 try:
413 import bz2
414 except ImportError:
Thomas Wouters477c8d52006-05-27 19:21:47 +0000415 raise CompressionError("bz2 module is not available")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000416 if mode == "r":
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000417 self.dbuf = b""
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000418 self.cmp = bz2.BZ2Decompressor()
419 else:
420 self.cmp = bz2.BZ2Compressor()
421
422 def __del__(self):
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000423 if hasattr(self, "closed") and not self.closed:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000424 self.close()
425
426 def _init_write_gz(self):
427 """Initialize for writing with gzip compression.
428 """
429 self.cmp = self.zlib.compressobj(9, self.zlib.DEFLATED,
430 -self.zlib.MAX_WBITS,
431 self.zlib.DEF_MEM_LEVEL,
432 0)
Guido van Rossume2a383d2007-01-15 16:59:06 +0000433 timestamp = struct.pack("<L", int(time.time()))
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000434 self.__write(b"\037\213\010\010" + timestamp + b"\002\377")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000435 if self.name.endswith(".gz"):
436 self.name = self.name[:-3]
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000437 # RFC1952 says we must use ISO-8859-1 for the FNAME field.
438 self.__write(self.name.encode("iso-8859-1", "replace") + NUL)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000439
440 def write(self, s):
441 """Write string s to the stream.
442 """
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000443 if self.comptype == "gz":
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000444 self.crc = self.zlib.crc32(s, self.crc)
445 self.pos += len(s)
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000446 if self.comptype != "tar":
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000447 s = self.cmp.compress(s)
448 self.__write(s)
449
450 def __write(self, s):
451 """Write string s to the stream if a whole new block
452 is ready to be written.
453 """
454 self.buf += s
455 while len(self.buf) > self.bufsize:
456 self.fileobj.write(self.buf[:self.bufsize])
457 self.buf = self.buf[self.bufsize:]
458
459 def close(self):
460 """Close the _Stream object. No operation should be
461 done on it afterwards.
462 """
463 if self.closed:
464 return
465
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000466 if self.mode == "w" and self.comptype != "tar":
Martin v. Löwisc234a522004-08-22 21:28:33 +0000467 self.buf += self.cmp.flush()
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000468
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000469 if self.mode == "w" and self.buf:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000470 self.fileobj.write(self.buf)
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000471 self.buf = b""
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000472 if self.comptype == "gz":
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000473 # The native zlib crc is an unsigned 32-bit integer, but
474 # the Python wrapper implicitly casts that to a signed C
475 # long. So, on a 32-bit box self.crc may "look negative",
476 # while the same crc on a 64-bit box may "look positive".
477 # To avoid irksome warnings from the `struct` module, force
478 # it to look positive on all boxes.
Guido van Rossume2a383d2007-01-15 16:59:06 +0000479 self.fileobj.write(struct.pack("<L", self.crc & 0xffffffff))
480 self.fileobj.write(struct.pack("<L", self.pos & 0xffffFFFF))
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000481
482 if not self._extfileobj:
483 self.fileobj.close()
484
485 self.closed = True
486
487 def _init_read_gz(self):
488 """Initialize for reading a gzip compressed fileobj.
489 """
490 self.cmp = self.zlib.decompressobj(-self.zlib.MAX_WBITS)
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000491 self.dbuf = b""
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000492
493 # taken from gzip.GzipFile with some alterations
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000494 if self.__read(2) != b"\037\213":
Thomas Wouters477c8d52006-05-27 19:21:47 +0000495 raise ReadError("not a gzip file")
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000496 if self.__read(1) != b"\010":
Thomas Wouters477c8d52006-05-27 19:21:47 +0000497 raise CompressionError("unsupported compression method")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000498
499 flag = ord(self.__read(1))
500 self.__read(6)
501
502 if flag & 4:
503 xlen = ord(self.__read(1)) + 256 * ord(self.__read(1))
504 self.read(xlen)
505 if flag & 8:
506 while True:
507 s = self.__read(1)
508 if not s or s == NUL:
509 break
510 if flag & 16:
511 while True:
512 s = self.__read(1)
513 if not s or s == NUL:
514 break
515 if flag & 2:
516 self.__read(2)
517
518 def tell(self):
519 """Return the stream's file pointer position.
520 """
521 return self.pos
522
523 def seek(self, pos=0):
524 """Set the stream's file pointer to pos. Negative seeking
525 is forbidden.
526 """
527 if pos - self.pos >= 0:
528 blocks, remainder = divmod(pos - self.pos, self.bufsize)
Guido van Rossum805365e2007-05-07 22:24:25 +0000529 for i in range(blocks):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000530 self.read(self.bufsize)
531 self.read(remainder)
532 else:
Thomas Wouters477c8d52006-05-27 19:21:47 +0000533 raise StreamError("seeking backwards is not allowed")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000534 return self.pos
535
536 def read(self, size=None):
537 """Return the next size number of bytes from the stream.
538 If size is not defined, return all bytes of the stream
539 up to EOF.
540 """
541 if size is None:
542 t = []
543 while True:
544 buf = self._read(self.bufsize)
545 if not buf:
546 break
547 t.append(buf)
548 buf = "".join(t)
549 else:
550 buf = self._read(size)
551 self.pos += len(buf)
552 return buf
553
554 def _read(self, size):
555 """Return size bytes from the stream.
556 """
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000557 if self.comptype == "tar":
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000558 return self.__read(size)
559
560 c = len(self.dbuf)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000561 while c < size:
562 buf = self.__read(self.bufsize)
563 if not buf:
564 break
Guido van Rossumd8faa362007-04-27 19:54:29 +0000565 try:
566 buf = self.cmp.decompress(buf)
567 except IOError:
568 raise ReadError("invalid compressed data")
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000569 self.dbuf += buf
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000570 c += len(buf)
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000571 buf = self.dbuf[:size]
572 self.dbuf = self.dbuf[size:]
573 return buf
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000574
575 def __read(self, size):
576 """Return size bytes from stream. If internal buffer is empty,
577 read another block from the stream.
578 """
579 c = len(self.buf)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000580 while c < size:
581 buf = self.fileobj.read(self.bufsize)
582 if not buf:
583 break
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000584 self.buf += buf
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000585 c += len(buf)
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000586 buf = self.buf[:size]
587 self.buf = self.buf[size:]
588 return buf
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000589# class _Stream
590
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000591class _StreamProxy(object):
592 """Small proxy class that enables transparent compression
593 detection for the Stream interface (mode 'r|*').
594 """
595
596 def __init__(self, fileobj):
597 self.fileobj = fileobj
598 self.buf = self.fileobj.read(BLOCKSIZE)
599
600 def read(self, size):
601 self.read = self.fileobj.read
602 return self.buf
603
604 def getcomptype(self):
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000605 if self.buf.startswith(b"\037\213\010"):
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000606 return "gz"
607 if self.buf.startswith("BZh91"):
608 return "bz2"
609 return "tar"
610
611 def close(self):
612 self.fileobj.close()
613# class StreamProxy
614
Thomas Wouters477c8d52006-05-27 19:21:47 +0000615class _BZ2Proxy(object):
616 """Small proxy class that enables external file object
617 support for "r:bz2" and "w:bz2" modes. This is actually
618 a workaround for a limitation in bz2 module's BZ2File
619 class which (unlike gzip.GzipFile) has no support for
620 a file object argument.
621 """
622
623 blocksize = 16 * 1024
624
625 def __init__(self, fileobj, mode):
626 self.fileobj = fileobj
627 self.mode = mode
Guido van Rossumd8faa362007-04-27 19:54:29 +0000628 self.name = getattr(self.fileobj, "name", None)
Thomas Wouters477c8d52006-05-27 19:21:47 +0000629 self.init()
630
631 def init(self):
632 import bz2
633 self.pos = 0
634 if self.mode == "r":
635 self.bz2obj = bz2.BZ2Decompressor()
636 self.fileobj.seek(0)
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000637 self.buf = b""
Thomas Wouters477c8d52006-05-27 19:21:47 +0000638 else:
639 self.bz2obj = bz2.BZ2Compressor()
640
641 def read(self, size):
Thomas Wouters477c8d52006-05-27 19:21:47 +0000642 x = len(self.buf)
643 while x < size:
644 try:
645 raw = self.fileobj.read(self.blocksize)
646 data = self.bz2obj.decompress(raw)
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000647 self.buf += data
Thomas Wouters477c8d52006-05-27 19:21:47 +0000648 except EOFError:
649 break
650 x += len(data)
Thomas Wouters477c8d52006-05-27 19:21:47 +0000651
652 buf = self.buf[:size]
653 self.buf = self.buf[size:]
654 self.pos += len(buf)
655 return buf
656
657 def seek(self, pos):
658 if pos < self.pos:
659 self.init()
660 self.read(pos - self.pos)
661
662 def tell(self):
663 return self.pos
664
665 def write(self, data):
666 self.pos += len(data)
667 raw = self.bz2obj.compress(data)
668 self.fileobj.write(raw)
669
670 def close(self):
671 if self.mode == "w":
672 raw = self.bz2obj.flush()
673 self.fileobj.write(raw)
674 self.fileobj.close()
675# class _BZ2Proxy
676
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000677#------------------------
678# Extraction file object
679#------------------------
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000680class _FileInFile(object):
681 """A thin wrapper around an existing file object that
682 provides a part of its data as an individual file
683 object.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000684 """
685
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000686 def __init__(self, fileobj, offset, size, sparse=None):
687 self.fileobj = fileobj
688 self.offset = offset
689 self.size = size
690 self.sparse = sparse
691 self.position = 0
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000692
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000693 def seekable(self):
694 if not hasattr(self.fileobj, "seekable"):
695 # XXX gzip.GzipFile and bz2.BZ2File
696 return True
697 return self.fileobj.seekable()
698
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000699 def tell(self):
700 """Return the current file position.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000701 """
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000702 return self.position
703
704 def seek(self, position):
705 """Seek to a position in the file.
706 """
707 self.position = position
708
709 def read(self, size=None):
710 """Read data from the file.
711 """
712 if size is None:
713 size = self.size - self.position
714 else:
715 size = min(size, self.size - self.position)
716
717 if self.sparse is None:
718 return self.readnormal(size)
719 else:
720 return self.readsparse(size)
721
722 def readnormal(self, size):
723 """Read operation for regular files.
724 """
725 self.fileobj.seek(self.offset + self.position)
726 self.position += size
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000727 return self.fileobj.read(size)
728
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000729 def readsparse(self, size):
730 """Read operation for sparse files.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000731 """
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000732 data = b""
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000733 while size > 0:
734 buf = self.readsparsesection(size)
735 if not buf:
736 break
737 size -= len(buf)
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000738 data += buf
739 return data
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000740
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000741 def readsparsesection(self, size):
742 """Read a single section of a sparse file.
743 """
744 section = self.sparse.find(self.position)
745
746 if section is None:
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000747 return b""
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000748
749 size = min(size, section.offset + section.size - self.position)
750
751 if isinstance(section, _data):
752 realpos = section.realpos + self.position - section.offset
753 self.fileobj.seek(self.offset + realpos)
754 self.position += size
755 return self.fileobj.read(size)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000756 else:
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000757 self.position += size
758 return NUL * size
759#class _FileInFile
760
761
762class ExFileObject(object):
763 """File-like object for reading an archive member.
764 Is returned by TarFile.extractfile().
765 """
766 blocksize = 1024
767
768 def __init__(self, tarfile, tarinfo):
769 self.fileobj = _FileInFile(tarfile.fileobj,
770 tarinfo.offset_data,
771 tarinfo.size,
772 getattr(tarinfo, "sparse", None))
773 self.name = tarinfo.name
774 self.mode = "r"
775 self.closed = False
776 self.size = tarinfo.size
777
778 self.position = 0
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000779 self.buffer = b""
780
781 def readable(self):
782 return True
783
784 def writable(self):
785 return False
786
787 def seekable(self):
788 return self.fileobj.seekable()
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000789
790 def read(self, size=None):
791 """Read at most size bytes from the file. If size is not
792 present or None, read all data until EOF is reached.
793 """
794 if self.closed:
795 raise ValueError("I/O operation on closed file")
796
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000797 buf = b""
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000798 if self.buffer:
799 if size is None:
800 buf = self.buffer
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000801 self.buffer = b""
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000802 else:
803 buf = self.buffer[:size]
804 self.buffer = self.buffer[size:]
805
806 if size is None:
807 buf += self.fileobj.read()
808 else:
809 buf += self.fileobj.read(size - len(buf))
810
811 self.position += len(buf)
812 return buf
813
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000814 # XXX TextIOWrapper uses the read1() method.
815 read1 = read
816
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000817 def readline(self, size=-1):
818 """Read one entire line from the file. If size is present
819 and non-negative, return a string with at most that
820 size, which may be an incomplete line.
821 """
822 if self.closed:
823 raise ValueError("I/O operation on closed file")
824
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000825 pos = self.buffer.find(b"\n") + 1
826 if pos == 0:
827 # no newline found.
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000828 while True:
829 buf = self.fileobj.read(self.blocksize)
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000830 self.buffer += buf
831 if not buf or b"\n" in buf:
832 pos = self.buffer.find(b"\n") + 1
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000833 if pos == 0:
834 # no newline found.
835 pos = len(self.buffer)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000836 break
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000837
838 if size != -1:
839 pos = min(size, pos)
840
841 buf = self.buffer[:pos]
842 self.buffer = self.buffer[pos:]
843 self.position += len(buf)
844 return buf
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000845
846 def readlines(self):
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000847 """Return a list with all remaining lines.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000848 """
849 result = []
850 while True:
851 line = self.readline()
852 if not line: break
853 result.append(line)
854 return result
855
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000856 def tell(self):
857 """Return the current file position.
858 """
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000859 if self.closed:
860 raise ValueError("I/O operation on closed file")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000861
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000862 return self.position
863
864 def seek(self, pos, whence=os.SEEK_SET):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000865 """Seek to a position in the file.
866 """
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000867 if self.closed:
868 raise ValueError("I/O operation on closed file")
869
870 if whence == os.SEEK_SET:
871 self.position = min(max(pos, 0), self.size)
872 elif whence == os.SEEK_CUR:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000873 if pos < 0:
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000874 self.position = max(self.position + pos, 0)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000875 else:
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000876 self.position = min(self.position + pos, self.size)
877 elif whence == os.SEEK_END:
878 self.position = max(min(self.size + pos, self.size), 0)
879 else:
880 raise ValueError("Invalid argument")
881
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000882 self.buffer = b""
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000883 self.fileobj.seek(self.position)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000884
885 def close(self):
886 """Close the file object.
887 """
888 self.closed = True
Martin v. Löwisdf241532005-03-03 08:17:42 +0000889
890 def __iter__(self):
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000891 """Get an iterator over the file's lines.
Martin v. Löwisdf241532005-03-03 08:17:42 +0000892 """
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000893 while True:
894 line = self.readline()
895 if not line:
896 break
897 yield line
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000898#class ExFileObject
899
900#------------------
901# Exported Classes
902#------------------
903class TarInfo(object):
904 """Informational class which holds the details about an
905 archive member given by a tar header block.
906 TarInfo objects are returned by TarFile.getmember(),
907 TarFile.getmembers() and TarFile.gettarinfo() and are
908 usually created internally.
909 """
910
911 def __init__(self, name=""):
912 """Construct a TarInfo object. name is the optional name
913 of the member.
914 """
Guido van Rossumd8faa362007-04-27 19:54:29 +0000915 self.name = name # member name
Guido van Rossumcd16bf62007-06-13 18:07:49 +0000916 self.mode = 0o644 # file permissions
Thomas Wouters477c8d52006-05-27 19:21:47 +0000917 self.uid = 0 # user id
918 self.gid = 0 # group id
919 self.size = 0 # file size
920 self.mtime = 0 # modification time
921 self.chksum = 0 # header checksum
922 self.type = REGTYPE # member type
923 self.linkname = "" # link name
Guido van Rossumd8faa362007-04-27 19:54:29 +0000924 self.uname = "root" # user name
925 self.gname = "root" # group name
Thomas Wouters477c8d52006-05-27 19:21:47 +0000926 self.devmajor = 0 # device major number
927 self.devminor = 0 # device minor number
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000928
Thomas Wouters477c8d52006-05-27 19:21:47 +0000929 self.offset = 0 # the tar header starts here
930 self.offset_data = 0 # the file's data starts here
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000931
Guido van Rossumd8faa362007-04-27 19:54:29 +0000932 self.pax_headers = {} # pax header information
933
934 # In pax headers the "name" and "linkname" field are called
935 # "path" and "linkpath".
936 def _getpath(self):
937 return self.name
938 def _setpath(self, name):
939 self.name = name
940 path = property(_getpath, _setpath)
941
942 def _getlinkpath(self):
943 return self.linkname
944 def _setlinkpath(self, linkname):
945 self.linkname = linkname
946 linkpath = property(_getlinkpath, _setlinkpath)
947
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000948 def __repr__(self):
949 return "<%s %r at %#x>" % (self.__class__.__name__,self.name,id(self))
950
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000951 def get_info(self):
Guido van Rossumd8faa362007-04-27 19:54:29 +0000952 """Return the TarInfo's attributes as a dictionary.
953 """
954 info = {
955 "name": normpath(self.name),
Guido van Rossumcd16bf62007-06-13 18:07:49 +0000956 "mode": self.mode & 0o7777,
Guido van Rossumd8faa362007-04-27 19:54:29 +0000957 "uid": self.uid,
958 "gid": self.gid,
959 "size": self.size,
960 "mtime": self.mtime,
961 "chksum": self.chksum,
962 "type": self.type,
963 "linkname": normpath(self.linkname) if self.linkname else "",
964 "uname": self.uname,
965 "gname": self.gname,
966 "devmajor": self.devmajor,
967 "devminor": self.devminor
968 }
969
970 if info["type"] == DIRTYPE and not info["name"].endswith("/"):
971 info["name"] += "/"
972
973 return info
974
Guido van Rossume7ba4952007-06-06 23:52:48 +0000975 def tobuf(self, format=DEFAULT_FORMAT, encoding=ENCODING, errors="strict"):
Guido van Rossumd8faa362007-04-27 19:54:29 +0000976 """Return a tar header as a string of 512 byte blocks.
977 """
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000978 info = self.get_info()
Guido van Rossume7ba4952007-06-06 23:52:48 +0000979
Guido van Rossumd8faa362007-04-27 19:54:29 +0000980 if format == USTAR_FORMAT:
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000981 return self.create_ustar_header(info, encoding, errors)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000982 elif format == GNU_FORMAT:
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000983 return self.create_gnu_header(info, encoding, errors)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000984 elif format == PAX_FORMAT:
Guido van Rossume7ba4952007-06-06 23:52:48 +0000985 return self.create_pax_header(info, encoding, errors)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000986 else:
987 raise ValueError("invalid format")
988
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000989 def create_ustar_header(self, info, encoding, errors):
Guido van Rossumd8faa362007-04-27 19:54:29 +0000990 """Return the object as a ustar header block.
991 """
Guido van Rossumd8faa362007-04-27 19:54:29 +0000992 info["magic"] = POSIX_MAGIC
993
994 if len(info["linkname"]) > LENGTH_LINK:
995 raise ValueError("linkname is too long")
996
997 if len(info["name"]) > LENGTH_NAME:
998 info["prefix"], info["name"] = self._posix_split_name(info["name"])
999
Lars Gustäbelb506dc32007-08-07 18:36:16 +00001000 return self._create_header(info, USTAR_FORMAT, encoding, errors)
Guido van Rossumd8faa362007-04-27 19:54:29 +00001001
Lars Gustäbelb506dc32007-08-07 18:36:16 +00001002 def create_gnu_header(self, info, encoding, errors):
Guido van Rossumd8faa362007-04-27 19:54:29 +00001003 """Return the object as a GNU header block sequence.
1004 """
Guido van Rossumd8faa362007-04-27 19:54:29 +00001005 info["magic"] = GNU_MAGIC
1006
Lars Gustäbelb506dc32007-08-07 18:36:16 +00001007 buf = b""
Guido van Rossumd8faa362007-04-27 19:54:29 +00001008 if len(info["linkname"]) > LENGTH_LINK:
Lars Gustäbelb506dc32007-08-07 18:36:16 +00001009 buf += self._create_gnu_long_header(info["linkname"], GNUTYPE_LONGLINK, encoding, errors)
Guido van Rossumd8faa362007-04-27 19:54:29 +00001010
1011 if len(info["name"]) > LENGTH_NAME:
Lars Gustäbelb506dc32007-08-07 18:36:16 +00001012 buf += self._create_gnu_long_header(info["name"], GNUTYPE_LONGNAME, encoding, errors)
Guido van Rossumd8faa362007-04-27 19:54:29 +00001013
Lars Gustäbelb506dc32007-08-07 18:36:16 +00001014 return buf + self._create_header(info, GNU_FORMAT, encoding, errors)
Guido van Rossumd8faa362007-04-27 19:54:29 +00001015
Guido van Rossume7ba4952007-06-06 23:52:48 +00001016 def create_pax_header(self, info, encoding, errors):
Guido van Rossumd8faa362007-04-27 19:54:29 +00001017 """Return the object as a ustar header block. If it cannot be
1018 represented this way, prepend a pax extended header sequence
1019 with supplement information.
1020 """
Guido van Rossumd8faa362007-04-27 19:54:29 +00001021 info["magic"] = POSIX_MAGIC
1022 pax_headers = self.pax_headers.copy()
1023
1024 # Test string fields for values that exceed the field length or cannot
1025 # be represented in ASCII encoding.
1026 for name, hname, length in (
1027 ("name", "path", LENGTH_NAME), ("linkname", "linkpath", LENGTH_LINK),
1028 ("uname", "uname", 32), ("gname", "gname", 32)):
1029
Guido van Rossume7ba4952007-06-06 23:52:48 +00001030 if hname in pax_headers:
1031 # The pax header has priority.
1032 continue
1033
Guido van Rossumd8faa362007-04-27 19:54:29 +00001034 # Try to encode the string as ASCII.
1035 try:
Lars Gustäbelb506dc32007-08-07 18:36:16 +00001036 info[name].encode("ascii", "strict")
Guido van Rossumd8faa362007-04-27 19:54:29 +00001037 except UnicodeEncodeError:
Lars Gustäbelb506dc32007-08-07 18:36:16 +00001038 pax_headers[hname] = info[name]
Guido van Rossumd8faa362007-04-27 19:54:29 +00001039 continue
1040
Guido van Rossume7ba4952007-06-06 23:52:48 +00001041 if len(info[name]) > length:
Lars Gustäbelb506dc32007-08-07 18:36:16 +00001042 pax_headers[hname] = info[name]
Guido van Rossumd8faa362007-04-27 19:54:29 +00001043
1044 # Test number fields for values that exceed the field limit or values
1045 # that like to be stored as float.
1046 for name, digits in (("uid", 8), ("gid", 8), ("size", 12), ("mtime", 12)):
Guido van Rossume7ba4952007-06-06 23:52:48 +00001047 if name in pax_headers:
1048 # The pax header has priority. Avoid overflow.
1049 info[name] = 0
1050 continue
1051
Guido van Rossumd8faa362007-04-27 19:54:29 +00001052 val = info[name]
1053 if not 0 <= val < 8 ** (digits - 1) or isinstance(val, float):
Guido van Rossumef87d6e2007-05-02 19:09:54 +00001054 pax_headers[name] = str(val)
Guido van Rossumd8faa362007-04-27 19:54:29 +00001055 info[name] = 0
1056
Guido van Rossume7ba4952007-06-06 23:52:48 +00001057 # Create a pax extended header if necessary.
Guido van Rossumd8faa362007-04-27 19:54:29 +00001058 if pax_headers:
Lars Gustäbelb506dc32007-08-07 18:36:16 +00001059 buf = self._create_pax_generic_header(pax_headers, XHDTYPE, encoding, errors)
Guido van Rossumd8faa362007-04-27 19:54:29 +00001060 else:
Lars Gustäbelb506dc32007-08-07 18:36:16 +00001061 buf = b""
Guido van Rossumd8faa362007-04-27 19:54:29 +00001062
Lars Gustäbelb506dc32007-08-07 18:36:16 +00001063 return buf + self._create_header(info, USTAR_FORMAT, encoding, errors)
Guido van Rossumd8faa362007-04-27 19:54:29 +00001064
1065 @classmethod
Lars Gustäbelb506dc32007-08-07 18:36:16 +00001066 def create_pax_global_header(cls, pax_headers, encoding, errors):
Guido van Rossumd8faa362007-04-27 19:54:29 +00001067 """Return the object as a pax global header block sequence.
1068 """
Lars Gustäbelb506dc32007-08-07 18:36:16 +00001069 return cls._create_pax_generic_header(pax_headers, XGLTYPE, encoding, errors)
Guido van Rossumd8faa362007-04-27 19:54:29 +00001070
1071 def _posix_split_name(self, name):
1072 """Split a name longer than 100 chars into a prefix
1073 and a name part.
1074 """
1075 prefix = name[:LENGTH_PREFIX + 1]
1076 while prefix and prefix[-1] != "/":
1077 prefix = prefix[:-1]
1078
1079 name = name[len(prefix):]
1080 prefix = prefix[:-1]
1081
1082 if not prefix or len(name) > LENGTH_NAME:
1083 raise ValueError("name is too long")
1084 return prefix, name
1085
1086 @staticmethod
Lars Gustäbelb506dc32007-08-07 18:36:16 +00001087 def _create_header(info, format, encoding, errors):
Guido van Rossumd8faa362007-04-27 19:54:29 +00001088 """Return a header block. info is a dictionary with file
1089 information, format must be one of the *_FORMAT constants.
1090 """
1091 parts = [
Lars Gustäbelb506dc32007-08-07 18:36:16 +00001092 stn(info.get("name", ""), 100, encoding, errors),
Guido van Rossumcd16bf62007-06-13 18:07:49 +00001093 itn(info.get("mode", 0) & 0o7777, 8, format),
Guido van Rossumd8faa362007-04-27 19:54:29 +00001094 itn(info.get("uid", 0), 8, format),
1095 itn(info.get("gid", 0), 8, format),
1096 itn(info.get("size", 0), 12, format),
1097 itn(info.get("mtime", 0), 12, format),
Lars Gustäbelb506dc32007-08-07 18:36:16 +00001098 b" ", # checksum field
Guido van Rossumd8faa362007-04-27 19:54:29 +00001099 info.get("type", REGTYPE),
Lars Gustäbelb506dc32007-08-07 18:36:16 +00001100 stn(info.get("linkname", ""), 100, encoding, errors),
1101 info.get("magic", POSIX_MAGIC),
1102 stn(info.get("uname", "root"), 32, encoding, errors),
1103 stn(info.get("gname", "root"), 32, encoding, errors),
Guido van Rossumd8faa362007-04-27 19:54:29 +00001104 itn(info.get("devmajor", 0), 8, format),
1105 itn(info.get("devminor", 0), 8, format),
Lars Gustäbelb506dc32007-08-07 18:36:16 +00001106 stn(info.get("prefix", ""), 155, encoding, errors)
Guido van Rossumd8faa362007-04-27 19:54:29 +00001107 ]
1108
Lars Gustäbelb506dc32007-08-07 18:36:16 +00001109 buf = struct.pack("%ds" % BLOCKSIZE, b"".join(parts))
Guido van Rossumd8faa362007-04-27 19:54:29 +00001110 chksum = calc_chksums(buf[-BLOCKSIZE:])[0]
Lars Gustäbelb506dc32007-08-07 18:36:16 +00001111 buf = buf[:-364] + bytes("%06o\0" % chksum) + buf[-357:]
Guido van Rossumd8faa362007-04-27 19:54:29 +00001112 return buf
1113
1114 @staticmethod
1115 def _create_payload(payload):
1116 """Return the string payload filled with zero bytes
1117 up to the next 512 byte border.
1118 """
1119 blocks, remainder = divmod(len(payload), BLOCKSIZE)
1120 if remainder > 0:
1121 payload += (BLOCKSIZE - remainder) * NUL
1122 return payload
1123
1124 @classmethod
Lars Gustäbelb506dc32007-08-07 18:36:16 +00001125 def _create_gnu_long_header(cls, name, type, encoding, errors):
Guido van Rossumd8faa362007-04-27 19:54:29 +00001126 """Return a GNUTYPE_LONGNAME or GNUTYPE_LONGLINK sequence
1127 for name.
1128 """
Lars Gustäbelb506dc32007-08-07 18:36:16 +00001129 name = name.encode(encoding, errors) + NUL
Guido van Rossumd8faa362007-04-27 19:54:29 +00001130
1131 info = {}
1132 info["name"] = "././@LongLink"
1133 info["type"] = type
1134 info["size"] = len(name)
1135 info["magic"] = GNU_MAGIC
1136
1137 # create extended header + name blocks.
Lars Gustäbelb506dc32007-08-07 18:36:16 +00001138 return cls._create_header(info, USTAR_FORMAT, encoding, errors) + \
Guido van Rossumd8faa362007-04-27 19:54:29 +00001139 cls._create_payload(name)
1140
1141 @classmethod
Lars Gustäbelb506dc32007-08-07 18:36:16 +00001142 def _create_pax_generic_header(cls, pax_headers, type, encoding, errors):
Guido van Rossumd8faa362007-04-27 19:54:29 +00001143 """Return a POSIX.1-2001 extended or global header sequence
1144 that contains a list of keyword, value pairs. The values
Lars Gustäbelb506dc32007-08-07 18:36:16 +00001145 must be strings.
Guido van Rossumd8faa362007-04-27 19:54:29 +00001146 """
Lars Gustäbelb506dc32007-08-07 18:36:16 +00001147 records = b""
Guido van Rossumd8faa362007-04-27 19:54:29 +00001148 for keyword, value in pax_headers.items():
1149 keyword = keyword.encode("utf8")
1150 value = value.encode("utf8")
1151 l = len(keyword) + len(value) + 3 # ' ' + '=' + '\n'
1152 n = p = 0
1153 while True:
1154 n = l + len(str(p))
1155 if n == p:
1156 break
1157 p = n
Lars Gustäbelb506dc32007-08-07 18:36:16 +00001158 records += bytes(str(p)) + b" " + keyword + b"=" + value + b"\n"
Guido van Rossumd8faa362007-04-27 19:54:29 +00001159
1160 # We use a hardcoded "././@PaxHeader" name like star does
1161 # instead of the one that POSIX recommends.
1162 info = {}
1163 info["name"] = "././@PaxHeader"
1164 info["type"] = type
1165 info["size"] = len(records)
1166 info["magic"] = POSIX_MAGIC
1167
1168 # Create pax header + record blocks.
Lars Gustäbelb506dc32007-08-07 18:36:16 +00001169 return cls._create_header(info, USTAR_FORMAT, encoding, errors) + \
Guido van Rossumd8faa362007-04-27 19:54:29 +00001170 cls._create_payload(records)
1171
Guido van Rossum75b64e62005-01-16 00:16:11 +00001172 @classmethod
Lars Gustäbelb506dc32007-08-07 18:36:16 +00001173 def frombuf(cls, buf, encoding, errors):
1174 """Construct a TarInfo object from a 512 byte bytes object.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001175 """
Thomas Wouters477c8d52006-05-27 19:21:47 +00001176 if len(buf) != BLOCKSIZE:
Thomas Wouters902d6eb2007-01-09 23:18:33 +00001177 raise HeaderError("truncated header")
Thomas Wouters477c8d52006-05-27 19:21:47 +00001178 if buf.count(NUL) == BLOCKSIZE:
Thomas Wouters902d6eb2007-01-09 23:18:33 +00001179 raise HeaderError("empty header")
1180
1181 chksum = nti(buf[148:156])
1182 if chksum not in calc_chksums(buf):
1183 raise HeaderError("bad checksum")
Thomas Wouters477c8d52006-05-27 19:21:47 +00001184
Guido van Rossumd8faa362007-04-27 19:54:29 +00001185 obj = cls()
1186 obj.buf = buf
Lars Gustäbelb506dc32007-08-07 18:36:16 +00001187 obj.name = nts(buf[0:100], encoding, errors)
Guido van Rossumd8faa362007-04-27 19:54:29 +00001188 obj.mode = nti(buf[100:108])
1189 obj.uid = nti(buf[108:116])
1190 obj.gid = nti(buf[116:124])
1191 obj.size = nti(buf[124:136])
1192 obj.mtime = nti(buf[136:148])
1193 obj.chksum = chksum
1194 obj.type = buf[156:157]
Lars Gustäbelb506dc32007-08-07 18:36:16 +00001195 obj.linkname = nts(buf[157:257], encoding, errors)
1196 obj.uname = nts(buf[265:297], encoding, errors)
1197 obj.gname = nts(buf[297:329], encoding, errors)
Guido van Rossumd8faa362007-04-27 19:54:29 +00001198 obj.devmajor = nti(buf[329:337])
1199 obj.devminor = nti(buf[337:345])
Lars Gustäbelb506dc32007-08-07 18:36:16 +00001200 prefix = nts(buf[345:500], encoding, errors)
Thomas Wouters89f507f2006-12-13 04:49:30 +00001201
Guido van Rossumd8faa362007-04-27 19:54:29 +00001202 # Old V7 tar format represents a directory as a regular
1203 # file with a trailing slash.
1204 if obj.type == AREGTYPE and obj.name.endswith("/"):
1205 obj.type = DIRTYPE
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001206
Guido van Rossumd8faa362007-04-27 19:54:29 +00001207 # Remove redundant slashes from directories.
1208 if obj.isdir():
1209 obj.name = obj.name.rstrip("/")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001210
Guido van Rossumd8faa362007-04-27 19:54:29 +00001211 # Reconstruct a ustar longname.
1212 if prefix and obj.type not in GNU_TYPES:
1213 obj.name = prefix + "/" + obj.name
1214 return obj
1215
1216 @classmethod
1217 def fromtarfile(cls, tarfile):
1218 """Return the next TarInfo object from TarFile object
1219 tarfile.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001220 """
Guido van Rossumd8faa362007-04-27 19:54:29 +00001221 buf = tarfile.fileobj.read(BLOCKSIZE)
1222 if not buf:
1223 return
Lars Gustäbelb506dc32007-08-07 18:36:16 +00001224 obj = cls.frombuf(buf, tarfile.encoding, tarfile.errors)
Guido van Rossumd8faa362007-04-27 19:54:29 +00001225 obj.offset = tarfile.fileobj.tell() - BLOCKSIZE
1226 return obj._proc_member(tarfile)
Thomas Wouters89f507f2006-12-13 04:49:30 +00001227
Guido van Rossumd8faa362007-04-27 19:54:29 +00001228 #--------------------------------------------------------------------------
1229 # The following are methods that are called depending on the type of a
1230 # member. The entry point is _proc_member() which can be overridden in a
1231 # subclass to add custom _proc_*() methods. A _proc_*() method MUST
1232 # implement the following
1233 # operations:
1234 # 1. Set self.offset_data to the position where the data blocks begin,
1235 # if there is data that follows.
1236 # 2. Set tarfile.offset to the position where the next member's header will
1237 # begin.
1238 # 3. Return self or another valid TarInfo object.
1239 def _proc_member(self, tarfile):
1240 """Choose the right processing method depending on
1241 the type and call it.
Thomas Wouters89f507f2006-12-13 04:49:30 +00001242 """
Guido van Rossumd8faa362007-04-27 19:54:29 +00001243 if self.type in (GNUTYPE_LONGNAME, GNUTYPE_LONGLINK):
1244 return self._proc_gnulong(tarfile)
1245 elif self.type == GNUTYPE_SPARSE:
1246 return self._proc_sparse(tarfile)
1247 elif self.type in (XHDTYPE, XGLTYPE, SOLARIS_XHDTYPE):
1248 return self._proc_pax(tarfile)
1249 else:
1250 return self._proc_builtin(tarfile)
Thomas Wouters89f507f2006-12-13 04:49:30 +00001251
Guido van Rossumd8faa362007-04-27 19:54:29 +00001252 def _proc_builtin(self, tarfile):
1253 """Process a builtin type or an unknown type which
1254 will be treated as a regular file.
1255 """
1256 self.offset_data = tarfile.fileobj.tell()
1257 offset = self.offset_data
1258 if self.isreg() or self.type not in SUPPORTED_TYPES:
1259 # Skip the following data blocks.
1260 offset += self._block(self.size)
1261 tarfile.offset = offset
Thomas Wouters89f507f2006-12-13 04:49:30 +00001262
Guido van Rossume7ba4952007-06-06 23:52:48 +00001263 # Patch the TarInfo object with saved global
Guido van Rossumd8faa362007-04-27 19:54:29 +00001264 # header information.
Guido van Rossume7ba4952007-06-06 23:52:48 +00001265 self._apply_pax_info(tarfile.pax_headers, tarfile.encoding, tarfile.errors)
Guido van Rossumd8faa362007-04-27 19:54:29 +00001266
1267 return self
1268
1269 def _proc_gnulong(self, tarfile):
1270 """Process the blocks that hold a GNU longname
1271 or longlink member.
1272 """
1273 buf = tarfile.fileobj.read(self._block(self.size))
1274
1275 # Fetch the next header and process it.
Guido van Rossume7ba4952007-06-06 23:52:48 +00001276 next = self.fromtarfile(tarfile)
1277 if next is None:
1278 raise HeaderError("missing subsequent header")
Guido van Rossumd8faa362007-04-27 19:54:29 +00001279
1280 # Patch the TarInfo object from the next header with
1281 # the longname information.
1282 next.offset = self.offset
1283 if self.type == GNUTYPE_LONGNAME:
Lars Gustäbelb506dc32007-08-07 18:36:16 +00001284 next.name = nts(buf, tarfile.encoding, tarfile.errors)
Guido van Rossumd8faa362007-04-27 19:54:29 +00001285 elif self.type == GNUTYPE_LONGLINK:
Lars Gustäbelb506dc32007-08-07 18:36:16 +00001286 next.linkname = nts(buf, tarfile.encoding, tarfile.errors)
Guido van Rossumd8faa362007-04-27 19:54:29 +00001287
1288 return next
1289
1290 def _proc_sparse(self, tarfile):
1291 """Process a GNU sparse header plus extra headers.
1292 """
1293 buf = self.buf
1294 sp = _ringbuffer()
1295 pos = 386
1296 lastpos = 0
1297 realpos = 0
1298 # There are 4 possible sparse structs in the
1299 # first header.
Guido van Rossum805365e2007-05-07 22:24:25 +00001300 for i in range(4):
Guido van Rossumd8faa362007-04-27 19:54:29 +00001301 try:
1302 offset = nti(buf[pos:pos + 12])
1303 numbytes = nti(buf[pos + 12:pos + 24])
1304 except ValueError:
1305 break
1306 if offset > lastpos:
1307 sp.append(_hole(lastpos, offset - lastpos))
1308 sp.append(_data(offset, numbytes, realpos))
1309 realpos += numbytes
1310 lastpos = offset + numbytes
1311 pos += 24
1312
Lars Gustäbelb506dc32007-08-07 18:36:16 +00001313 isextended = bool(buf[482])
Guido van Rossumd8faa362007-04-27 19:54:29 +00001314 origsize = nti(buf[483:495])
1315
1316 # If the isextended flag is given,
1317 # there are extra headers to process.
Lars Gustäbelb506dc32007-08-07 18:36:16 +00001318 while isextended:
Guido van Rossumd8faa362007-04-27 19:54:29 +00001319 buf = tarfile.fileobj.read(BLOCKSIZE)
1320 pos = 0
Guido van Rossum805365e2007-05-07 22:24:25 +00001321 for i in range(21):
Guido van Rossumd8faa362007-04-27 19:54:29 +00001322 try:
1323 offset = nti(buf[pos:pos + 12])
1324 numbytes = nti(buf[pos + 12:pos + 24])
1325 except ValueError:
1326 break
1327 if offset > lastpos:
1328 sp.append(_hole(lastpos, offset - lastpos))
1329 sp.append(_data(offset, numbytes, realpos))
1330 realpos += numbytes
1331 lastpos = offset + numbytes
1332 pos += 24
Lars Gustäbelb506dc32007-08-07 18:36:16 +00001333 isextended = bool(buf[504])
Guido van Rossumd8faa362007-04-27 19:54:29 +00001334
1335 if lastpos < origsize:
1336 sp.append(_hole(lastpos, origsize - lastpos))
1337
1338 self.sparse = sp
1339
1340 self.offset_data = tarfile.fileobj.tell()
1341 tarfile.offset = self.offset_data + self._block(self.size)
1342 self.size = origsize
1343
1344 return self
1345
1346 def _proc_pax(self, tarfile):
1347 """Process an extended or global header as described in
1348 POSIX.1-2001.
1349 """
1350 # Read the header information.
1351 buf = tarfile.fileobj.read(self._block(self.size))
1352
1353 # A pax header stores supplemental information for either
1354 # the following file (extended) or all following files
1355 # (global).
1356 if self.type == XGLTYPE:
1357 pax_headers = tarfile.pax_headers
1358 else:
1359 pax_headers = tarfile.pax_headers.copy()
1360
Guido van Rossumd8faa362007-04-27 19:54:29 +00001361 # Parse pax header information. A record looks like that:
1362 # "%d %s=%s\n" % (length, keyword, value). length is the size
1363 # of the complete record including the length field itself and
Guido van Rossume7ba4952007-06-06 23:52:48 +00001364 # the newline. keyword and value are both UTF-8 encoded strings.
Guido van Rossumd8faa362007-04-27 19:54:29 +00001365 regex = re.compile(r"(\d+) ([^=]+)=", re.U)
1366 pos = 0
1367 while True:
1368 match = regex.match(buf, pos)
1369 if not match:
1370 break
1371
1372 length, keyword = match.groups()
1373 length = int(length)
1374 value = buf[match.end(2) + 1:match.start(1) + length - 1]
1375
1376 keyword = keyword.decode("utf8")
Guido van Rossumd8faa362007-04-27 19:54:29 +00001377 value = value.decode("utf8")
Guido van Rossumd8faa362007-04-27 19:54:29 +00001378
1379 pax_headers[keyword] = value
1380 pos += length
1381
Guido van Rossume7ba4952007-06-06 23:52:48 +00001382 # Fetch the next header.
1383 next = self.fromtarfile(tarfile)
Guido van Rossumd8faa362007-04-27 19:54:29 +00001384
Guido van Rossume7ba4952007-06-06 23:52:48 +00001385 if self.type in (XHDTYPE, SOLARIS_XHDTYPE):
1386 if next is None:
1387 raise HeaderError("missing subsequent header")
Guido van Rossumd8faa362007-04-27 19:54:29 +00001388
Guido van Rossume7ba4952007-06-06 23:52:48 +00001389 # Patch the TarInfo object with the extended header info.
1390 next._apply_pax_info(pax_headers, tarfile.encoding, tarfile.errors)
1391 next.offset = self.offset
1392
1393 if "size" in pax_headers:
1394 # If the extended header replaces the size field,
1395 # we need to recalculate the offset where the next
1396 # header starts.
1397 offset = next.offset_data
1398 if next.isreg() or next.type not in SUPPORTED_TYPES:
1399 offset += next._block(next.size)
1400 tarfile.offset = offset
1401
1402 return next
1403
1404 def _apply_pax_info(self, pax_headers, encoding, errors):
1405 """Replace fields with supplemental information from a previous
1406 pax extended or global header.
1407 """
1408 for keyword, value in pax_headers.items():
1409 if keyword not in PAX_FIELDS:
1410 continue
1411
1412 if keyword == "path":
1413 value = value.rstrip("/")
1414
1415 if keyword in PAX_NUMBER_FIELDS:
1416 try:
1417 value = PAX_NUMBER_FIELDS[keyword](value)
1418 except ValueError:
1419 value = 0
Guido van Rossume7ba4952007-06-06 23:52:48 +00001420
1421 setattr(self, keyword, value)
1422
1423 self.pax_headers = pax_headers.copy()
Guido van Rossumd8faa362007-04-27 19:54:29 +00001424
1425 def _block(self, count):
1426 """Round up a byte count by BLOCKSIZE and return it,
1427 e.g. _block(834) => 1024.
1428 """
1429 blocks, remainder = divmod(count, BLOCKSIZE)
1430 if remainder:
1431 blocks += 1
1432 return blocks * BLOCKSIZE
Thomas Wouters89f507f2006-12-13 04:49:30 +00001433
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001434 def isreg(self):
1435 return self.type in REGULAR_TYPES
1436 def isfile(self):
1437 return self.isreg()
1438 def isdir(self):
1439 return self.type == DIRTYPE
1440 def issym(self):
1441 return self.type == SYMTYPE
1442 def islnk(self):
1443 return self.type == LNKTYPE
1444 def ischr(self):
1445 return self.type == CHRTYPE
1446 def isblk(self):
1447 return self.type == BLKTYPE
1448 def isfifo(self):
1449 return self.type == FIFOTYPE
1450 def issparse(self):
1451 return self.type == GNUTYPE_SPARSE
1452 def isdev(self):
1453 return self.type in (CHRTYPE, BLKTYPE, FIFOTYPE)
1454# class TarInfo
1455
1456class TarFile(object):
1457 """The TarFile Class provides an interface to tar archives.
1458 """
1459
1460 debug = 0 # May be set from 0 (no msgs) to 3 (all msgs)
1461
1462 dereference = False # If true, add content of linked file to the
1463 # tar file, else the link.
1464
1465 ignore_zeros = False # If true, skips empty or invalid blocks and
1466 # continues processing.
1467
1468 errorlevel = 0 # If 0, fatal errors only appear in debug
1469 # messages (if debug >= 0). If > 0, errors
1470 # are passed to the caller as exceptions.
1471
Guido van Rossumd8faa362007-04-27 19:54:29 +00001472 format = DEFAULT_FORMAT # The format to use when creating an archive.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001473
Guido van Rossume7ba4952007-06-06 23:52:48 +00001474 encoding = ENCODING # Encoding for 8-bit character strings.
1475
1476 errors = None # Error handler for unicode conversion.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001477
Guido van Rossumd8faa362007-04-27 19:54:29 +00001478 tarinfo = TarInfo # The default TarInfo class to use.
1479
1480 fileobject = ExFileObject # The default ExFileObject class to use.
1481
1482 def __init__(self, name=None, mode="r", fileobj=None, format=None,
1483 tarinfo=None, dereference=None, ignore_zeros=None, encoding=None,
Guido van Rossume7ba4952007-06-06 23:52:48 +00001484 errors=None, pax_headers=None, debug=None, errorlevel=None):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001485 """Open an (uncompressed) tar archive `name'. `mode' is either 'r' to
1486 read from an existing archive, 'a' to append data to an existing
1487 file or 'w' to create a new file overwriting an existing one. `mode'
1488 defaults to 'r'.
1489 If `fileobj' is given, it is used for reading or writing data. If it
1490 can be determined, `mode' is overridden by `fileobj's mode.
1491 `fileobj' is not closed, when TarFile is closed.
1492 """
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001493 if len(mode) > 1 or mode not in "raw":
Thomas Wouters477c8d52006-05-27 19:21:47 +00001494 raise ValueError("mode must be 'r', 'a' or 'w'")
Guido van Rossumd8faa362007-04-27 19:54:29 +00001495 self.mode = mode
1496 self._mode = {"r": "rb", "a": "r+b", "w": "wb"}[mode]
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001497
1498 if not fileobj:
Guido van Rossumd8faa362007-04-27 19:54:29 +00001499 if self.mode == "a" and not os.path.exists(name):
Thomas Wouterscf297e42007-02-23 15:07:44 +00001500 # Create nonexistent files in append mode.
Guido van Rossumd8faa362007-04-27 19:54:29 +00001501 self.mode = "w"
1502 self._mode = "wb"
Guido van Rossume7ba4952007-06-06 23:52:48 +00001503 fileobj = bltn_open(name, self._mode)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001504 self._extfileobj = False
1505 else:
Guido van Rossumd8faa362007-04-27 19:54:29 +00001506 if name is None and hasattr(fileobj, "name"):
1507 name = fileobj.name
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001508 if hasattr(fileobj, "mode"):
Guido van Rossumd8faa362007-04-27 19:54:29 +00001509 self._mode = fileobj.mode
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001510 self._extfileobj = True
Guido van Rossumd8faa362007-04-27 19:54:29 +00001511 self.name = os.path.abspath(name)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001512 self.fileobj = fileobj
1513
Guido van Rossumd8faa362007-04-27 19:54:29 +00001514 # Init attributes.
1515 if format is not None:
1516 self.format = format
1517 if tarinfo is not None:
1518 self.tarinfo = tarinfo
1519 if dereference is not None:
1520 self.dereference = dereference
1521 if ignore_zeros is not None:
1522 self.ignore_zeros = ignore_zeros
1523 if encoding is not None:
1524 self.encoding = encoding
Guido van Rossume7ba4952007-06-06 23:52:48 +00001525
1526 if errors is not None:
1527 self.errors = errors
1528 elif mode == "r":
Lars Gustäbelb506dc32007-08-07 18:36:16 +00001529 self.errors = "replace"
Guido van Rossume7ba4952007-06-06 23:52:48 +00001530 else:
1531 self.errors = "strict"
1532
1533 if pax_headers is not None and self.format == PAX_FORMAT:
1534 self.pax_headers = pax_headers
1535 else:
1536 self.pax_headers = {}
1537
Guido van Rossumd8faa362007-04-27 19:54:29 +00001538 if debug is not None:
1539 self.debug = debug
1540 if errorlevel is not None:
1541 self.errorlevel = errorlevel
1542
1543 # Init datastructures.
Thomas Wouters477c8d52006-05-27 19:21:47 +00001544 self.closed = False
1545 self.members = [] # list of members as TarInfo objects
1546 self._loaded = False # flag if all members have been read
Lars Gustäbelb506dc32007-08-07 18:36:16 +00001547 self.offset = 0 # current position in the archive file
Thomas Wouters477c8d52006-05-27 19:21:47 +00001548 self.inodes = {} # dictionary caching the inodes of
1549 # archive members already added
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001550
Guido van Rossumd8faa362007-04-27 19:54:29 +00001551 if self.mode == "r":
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001552 self.firstmember = None
1553 self.firstmember = self.next()
1554
Guido van Rossumd8faa362007-04-27 19:54:29 +00001555 if self.mode == "a":
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001556 # Move to the end of the archive,
1557 # before the first empty block.
1558 self.firstmember = None
1559 while True:
Guido van Rossumd8faa362007-04-27 19:54:29 +00001560 if self.next() is None:
Thomas Wouterscf297e42007-02-23 15:07:44 +00001561 if self.offset > 0:
Lars Gustäbelb506dc32007-08-07 18:36:16 +00001562 self.fileobj.seek(self.fileobj.tell() - BLOCKSIZE)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001563 break
1564
Guido van Rossumd8faa362007-04-27 19:54:29 +00001565 if self.mode in "aw":
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001566 self._loaded = True
1567
Guido van Rossume7ba4952007-06-06 23:52:48 +00001568 if self.pax_headers:
Lars Gustäbelb506dc32007-08-07 18:36:16 +00001569 buf = self.tarinfo.create_pax_global_header(
1570 self.pax_headers.copy(), self.encoding, self.errors)
Guido van Rossumd8faa362007-04-27 19:54:29 +00001571 self.fileobj.write(buf)
1572 self.offset += len(buf)
1573
1574 def _getposix(self):
1575 return self.format == USTAR_FORMAT
1576 def _setposix(self, value):
1577 import warnings
1578 warnings.warn("use the format attribute instead", DeprecationWarning)
1579 if value:
1580 self.format = USTAR_FORMAT
1581 else:
1582 self.format = GNU_FORMAT
1583 posix = property(_getposix, _setposix)
1584
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001585 #--------------------------------------------------------------------------
1586 # Below are the classmethods which act as alternate constructors to the
1587 # TarFile class. The open() method is the only one that is needed for
1588 # public use; it is the "super"-constructor and is able to select an
1589 # adequate "sub"-constructor for a particular compression using the mapping
1590 # from OPEN_METH.
1591 #
1592 # This concept allows one to subclass TarFile without losing the comfort of
1593 # the super-constructor. A sub-constructor is registered and made available
1594 # by adding it to the mapping in OPEN_METH.
1595
Guido van Rossum75b64e62005-01-16 00:16:11 +00001596 @classmethod
Guido van Rossumd8faa362007-04-27 19:54:29 +00001597 def open(cls, name=None, mode="r", fileobj=None, bufsize=RECORDSIZE, **kwargs):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001598 """Open a tar archive for reading, writing or appending. Return
1599 an appropriate TarFile class.
1600
1601 mode:
Martin v. Löwis78be7df2005-03-05 12:47:42 +00001602 'r' or 'r:*' open for reading with transparent compression
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001603 'r:' open for reading exclusively uncompressed
1604 'r:gz' open for reading with gzip compression
1605 'r:bz2' open for reading with bzip2 compression
Thomas Wouterscf297e42007-02-23 15:07:44 +00001606 'a' or 'a:' open for appending, creating the file if necessary
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001607 'w' or 'w:' open for writing without compression
1608 'w:gz' open for writing with gzip compression
1609 'w:bz2' open for writing with bzip2 compression
Martin v. Löwis78be7df2005-03-05 12:47:42 +00001610
1611 'r|*' open a stream of tar blocks with transparent compression
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001612 'r|' open an uncompressed stream of tar blocks for reading
1613 'r|gz' open a gzip compressed stream of tar blocks
1614 'r|bz2' open a bzip2 compressed stream of tar blocks
1615 'w|' open an uncompressed stream for writing
1616 'w|gz' open a gzip compressed stream for writing
1617 'w|bz2' open a bzip2 compressed stream for writing
1618 """
1619
1620 if not name and not fileobj:
Thomas Wouters477c8d52006-05-27 19:21:47 +00001621 raise ValueError("nothing to open")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001622
Martin v. Löwis78be7df2005-03-05 12:47:42 +00001623 if mode in ("r", "r:*"):
1624 # Find out which *open() is appropriate for opening the file.
1625 for comptype in cls.OPEN_METH:
1626 func = getattr(cls, cls.OPEN_METH[comptype])
Thomas Wouters902d6eb2007-01-09 23:18:33 +00001627 if fileobj is not None:
1628 saved_pos = fileobj.tell()
Martin v. Löwis78be7df2005-03-05 12:47:42 +00001629 try:
Guido van Rossumd8faa362007-04-27 19:54:29 +00001630 return func(name, "r", fileobj, **kwargs)
1631 except (ReadError, CompressionError) as e:
Thomas Wouters902d6eb2007-01-09 23:18:33 +00001632 if fileobj is not None:
1633 fileobj.seek(saved_pos)
Martin v. Löwis78be7df2005-03-05 12:47:42 +00001634 continue
Thomas Wouters477c8d52006-05-27 19:21:47 +00001635 raise ReadError("file could not be opened successfully")
Martin v. Löwis78be7df2005-03-05 12:47:42 +00001636
1637 elif ":" in mode:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001638 filemode, comptype = mode.split(":", 1)
1639 filemode = filemode or "r"
1640 comptype = comptype or "tar"
1641
1642 # Select the *open() function according to
1643 # given compression.
1644 if comptype in cls.OPEN_METH:
1645 func = getattr(cls, cls.OPEN_METH[comptype])
1646 else:
Thomas Wouters477c8d52006-05-27 19:21:47 +00001647 raise CompressionError("unknown compression type %r" % comptype)
Guido van Rossumd8faa362007-04-27 19:54:29 +00001648 return func(name, filemode, fileobj, **kwargs)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001649
1650 elif "|" in mode:
1651 filemode, comptype = mode.split("|", 1)
1652 filemode = filemode or "r"
1653 comptype = comptype or "tar"
1654
1655 if filemode not in "rw":
Thomas Wouters477c8d52006-05-27 19:21:47 +00001656 raise ValueError("mode must be 'r' or 'w'")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001657
1658 t = cls(name, filemode,
Guido van Rossumd8faa362007-04-27 19:54:29 +00001659 _Stream(name, filemode, comptype, fileobj, bufsize),
1660 **kwargs)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001661 t._extfileobj = False
1662 return t
1663
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001664 elif mode in "aw":
Guido van Rossumd8faa362007-04-27 19:54:29 +00001665 return cls.taropen(name, mode, fileobj, **kwargs)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001666
Thomas Wouters477c8d52006-05-27 19:21:47 +00001667 raise ValueError("undiscernible mode")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001668
Guido van Rossum75b64e62005-01-16 00:16:11 +00001669 @classmethod
Guido van Rossumd8faa362007-04-27 19:54:29 +00001670 def taropen(cls, name, mode="r", fileobj=None, **kwargs):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001671 """Open uncompressed tar archive name for reading or writing.
1672 """
1673 if len(mode) > 1 or mode not in "raw":
Thomas Wouters477c8d52006-05-27 19:21:47 +00001674 raise ValueError("mode must be 'r', 'a' or 'w'")
Guido van Rossumd8faa362007-04-27 19:54:29 +00001675 return cls(name, mode, fileobj, **kwargs)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001676
Guido van Rossum75b64e62005-01-16 00:16:11 +00001677 @classmethod
Guido van Rossumd8faa362007-04-27 19:54:29 +00001678 def gzopen(cls, name, mode="r", fileobj=None, compresslevel=9, **kwargs):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001679 """Open gzip compressed tar archive name for reading or writing.
1680 Appending is not allowed.
1681 """
1682 if len(mode) > 1 or mode not in "rw":
Thomas Wouters477c8d52006-05-27 19:21:47 +00001683 raise ValueError("mode must be 'r' or 'w'")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001684
1685 try:
1686 import gzip
Neal Norwitz4ec68242003-04-11 03:05:56 +00001687 gzip.GzipFile
1688 except (ImportError, AttributeError):
Thomas Wouters477c8d52006-05-27 19:21:47 +00001689 raise CompressionError("gzip module is not available")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001690
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001691 if fileobj is None:
Guido van Rossume7ba4952007-06-06 23:52:48 +00001692 fileobj = bltn_open(name, mode + "b")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001693
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001694 try:
Thomas Wouters902d6eb2007-01-09 23:18:33 +00001695 t = cls.taropen(name, mode,
Guido van Rossumd8faa362007-04-27 19:54:29 +00001696 gzip.GzipFile(name, mode, compresslevel, fileobj),
1697 **kwargs)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001698 except IOError:
Thomas Wouters477c8d52006-05-27 19:21:47 +00001699 raise ReadError("not a gzip file")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001700 t._extfileobj = False
1701 return t
1702
Guido van Rossum75b64e62005-01-16 00:16:11 +00001703 @classmethod
Guido van Rossumd8faa362007-04-27 19:54:29 +00001704 def bz2open(cls, name, mode="r", fileobj=None, compresslevel=9, **kwargs):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001705 """Open bzip2 compressed tar archive name for reading or writing.
1706 Appending is not allowed.
1707 """
1708 if len(mode) > 1 or mode not in "rw":
Thomas Wouters477c8d52006-05-27 19:21:47 +00001709 raise ValueError("mode must be 'r' or 'w'.")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001710
1711 try:
1712 import bz2
1713 except ImportError:
Thomas Wouters477c8d52006-05-27 19:21:47 +00001714 raise CompressionError("bz2 module is not available")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001715
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001716 if fileobj is not None:
Thomas Wouters477c8d52006-05-27 19:21:47 +00001717 fileobj = _BZ2Proxy(fileobj, mode)
1718 else:
1719 fileobj = bz2.BZ2File(name, mode, compresslevel=compresslevel)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001720
1721 try:
Guido van Rossumd8faa362007-04-27 19:54:29 +00001722 t = cls.taropen(name, mode, fileobj, **kwargs)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001723 except IOError:
Thomas Wouters477c8d52006-05-27 19:21:47 +00001724 raise ReadError("not a bzip2 file")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001725 t._extfileobj = False
1726 return t
1727
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001728 # All *open() methods are registered here.
1729 OPEN_METH = {
1730 "tar": "taropen", # uncompressed tar
1731 "gz": "gzopen", # gzip compressed tar
1732 "bz2": "bz2open" # bzip2 compressed tar
1733 }
1734
1735 #--------------------------------------------------------------------------
1736 # The public methods which TarFile provides:
1737
1738 def close(self):
1739 """Close the TarFile. In write-mode, two finishing zero blocks are
1740 appended to the archive.
1741 """
1742 if self.closed:
1743 return
1744
Guido van Rossumd8faa362007-04-27 19:54:29 +00001745 if self.mode in "aw":
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001746 self.fileobj.write(NUL * (BLOCKSIZE * 2))
1747 self.offset += (BLOCKSIZE * 2)
1748 # fill up the end with zero-blocks
1749 # (like option -b20 for tar does)
1750 blocks, remainder = divmod(self.offset, RECORDSIZE)
1751 if remainder > 0:
1752 self.fileobj.write(NUL * (RECORDSIZE - remainder))
1753
1754 if not self._extfileobj:
1755 self.fileobj.close()
1756 self.closed = True
1757
1758 def getmember(self, name):
1759 """Return a TarInfo object for member `name'. If `name' can not be
1760 found in the archive, KeyError is raised. If a member occurs more
1761 than once in the archive, its last occurence is assumed to be the
1762 most up-to-date version.
1763 """
Martin v. Löwisf3c56112004-09-18 09:08:52 +00001764 tarinfo = self._getmember(name)
1765 if tarinfo is None:
Thomas Wouters477c8d52006-05-27 19:21:47 +00001766 raise KeyError("filename %r not found" % name)
Martin v. Löwisf3c56112004-09-18 09:08:52 +00001767 return tarinfo
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001768
1769 def getmembers(self):
1770 """Return the members of the archive as a list of TarInfo objects. The
1771 list has the same order as the members in the archive.
1772 """
1773 self._check()
1774 if not self._loaded: # if we want to obtain a list of
1775 self._load() # all members, we first have to
1776 # scan the whole archive.
1777 return self.members
1778
1779 def getnames(self):
1780 """Return the members of the archive as a list of their names. It has
1781 the same order as the list returned by getmembers().
1782 """
Martin v. Löwisf3c56112004-09-18 09:08:52 +00001783 return [tarinfo.name for tarinfo in self.getmembers()]
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001784
1785 def gettarinfo(self, name=None, arcname=None, fileobj=None):
1786 """Create a TarInfo object for either the file `name' or the file
1787 object `fileobj' (using os.fstat on its file descriptor). You can
1788 modify some of the TarInfo's attributes before you add it using
1789 addfile(). If given, `arcname' specifies an alternative name for the
1790 file in the archive.
1791 """
1792 self._check("aw")
1793
1794 # When fileobj is given, replace name by
1795 # fileobj's real name.
1796 if fileobj is not None:
1797 name = fileobj.name
1798
1799 # Building the name of the member in the archive.
1800 # Backward slashes are converted to forward slashes,
1801 # Absolute paths are turned to relative paths.
1802 if arcname is None:
1803 arcname = name
1804 arcname = normpath(arcname)
1805 drv, arcname = os.path.splitdrive(arcname)
1806 while arcname[0:1] == "/":
1807 arcname = arcname[1:]
1808
1809 # Now, fill the TarInfo object with
1810 # information specific for the file.
Guido van Rossumd8faa362007-04-27 19:54:29 +00001811 tarinfo = self.tarinfo()
1812 tarinfo.tarfile = self
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001813
1814 # Use os.stat or os.lstat, depending on platform
1815 # and if symlinks shall be resolved.
1816 if fileobj is None:
1817 if hasattr(os, "lstat") and not self.dereference:
1818 statres = os.lstat(name)
1819 else:
1820 statres = os.stat(name)
1821 else:
1822 statres = os.fstat(fileobj.fileno())
1823 linkname = ""
1824
1825 stmd = statres.st_mode
1826 if stat.S_ISREG(stmd):
1827 inode = (statres.st_ino, statres.st_dev)
Guido van Rossumd8faa362007-04-27 19:54:29 +00001828 if not self.dereference and statres.st_nlink > 1 and \
1829 inode in self.inodes and arcname != self.inodes[inode]:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001830 # Is it a hardlink to an already
1831 # archived file?
1832 type = LNKTYPE
1833 linkname = self.inodes[inode]
1834 else:
1835 # The inode is added only if its valid.
1836 # For win32 it is always 0.
1837 type = REGTYPE
1838 if inode[0]:
1839 self.inodes[inode] = arcname
1840 elif stat.S_ISDIR(stmd):
1841 type = DIRTYPE
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001842 elif stat.S_ISFIFO(stmd):
1843 type = FIFOTYPE
1844 elif stat.S_ISLNK(stmd):
1845 type = SYMTYPE
1846 linkname = os.readlink(name)
1847 elif stat.S_ISCHR(stmd):
1848 type = CHRTYPE
1849 elif stat.S_ISBLK(stmd):
1850 type = BLKTYPE
1851 else:
1852 return None
1853
1854 # Fill the TarInfo object with all
1855 # information we can get.
Martin v. Löwis5dbdc592005-08-27 10:07:56 +00001856 tarinfo.name = arcname
1857 tarinfo.mode = stmd
1858 tarinfo.uid = statres.st_uid
1859 tarinfo.gid = statres.st_gid
1860 if stat.S_ISREG(stmd):
Martin v. Löwis61d77e02004-08-20 06:35:46 +00001861 tarinfo.size = statres.st_size
Martin v. Löwis5dbdc592005-08-27 10:07:56 +00001862 else:
Guido van Rossume2a383d2007-01-15 16:59:06 +00001863 tarinfo.size = 0
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001864 tarinfo.mtime = statres.st_mtime
Martin v. Löwis5dbdc592005-08-27 10:07:56 +00001865 tarinfo.type = type
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001866 tarinfo.linkname = linkname
1867 if pwd:
1868 try:
1869 tarinfo.uname = pwd.getpwuid(tarinfo.uid)[0]
1870 except KeyError:
1871 pass
1872 if grp:
1873 try:
1874 tarinfo.gname = grp.getgrgid(tarinfo.gid)[0]
1875 except KeyError:
1876 pass
1877
1878 if type in (CHRTYPE, BLKTYPE):
1879 if hasattr(os, "major") and hasattr(os, "minor"):
1880 tarinfo.devmajor = os.major(statres.st_rdev)
1881 tarinfo.devminor = os.minor(statres.st_rdev)
1882 return tarinfo
1883
1884 def list(self, verbose=True):
1885 """Print a table of contents to sys.stdout. If `verbose' is False, only
1886 the names of the members are printed. If it is True, an `ls -l'-like
1887 output is produced.
1888 """
1889 self._check()
1890
1891 for tarinfo in self:
1892 if verbose:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001893 print(filemode(tarinfo.mode), end=' ')
1894 print("%s/%s" % (tarinfo.uname or tarinfo.uid,
1895 tarinfo.gname or tarinfo.gid), end=' ')
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001896 if tarinfo.ischr() or tarinfo.isblk():
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001897 print("%10s" % ("%d,%d" \
1898 % (tarinfo.devmajor, tarinfo.devminor)), end=' ')
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001899 else:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001900 print("%10d" % tarinfo.size, end=' ')
1901 print("%d-%02d-%02d %02d:%02d:%02d" \
1902 % time.localtime(tarinfo.mtime)[:6], end=' ')
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001903
Guido van Rossumd8faa362007-04-27 19:54:29 +00001904 print(tarinfo.name + ("/" if tarinfo.isdir() else ""), end=' ')
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001905
1906 if verbose:
1907 if tarinfo.issym():
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001908 print("->", tarinfo.linkname, end=' ')
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001909 if tarinfo.islnk():
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001910 print("link to", tarinfo.linkname, end=' ')
1911 print()
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001912
Guido van Rossum486364b2007-06-30 05:01:58 +00001913 def add(self, name, arcname=None, recursive=True, exclude=None):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001914 """Add the file `name' to the archive. `name' may be any type of file
1915 (directory, fifo, symbolic link, etc.). If given, `arcname'
1916 specifies an alternative name for the file in the archive.
1917 Directories are added recursively by default. This can be avoided by
Guido van Rossum486364b2007-06-30 05:01:58 +00001918 setting `recursive' to False. `exclude' is a function that should
1919 return True for each filename to be excluded.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001920 """
1921 self._check("aw")
1922
1923 if arcname is None:
1924 arcname = name
1925
Guido van Rossum486364b2007-06-30 05:01:58 +00001926 # Exclude pathnames.
1927 if exclude is not None and exclude(name):
1928 self._dbg(2, "tarfile: Excluded %r" % name)
1929 return
1930
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001931 # Skip if somebody tries to archive the archive...
Thomas Wouters902d6eb2007-01-09 23:18:33 +00001932 if self.name is not None and os.path.abspath(name) == self.name:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001933 self._dbg(2, "tarfile: Skipped %r" % name)
1934 return
1935
1936 # Special case: The user wants to add the current
1937 # working directory.
1938 if name == ".":
1939 if recursive:
1940 if arcname == ".":
1941 arcname = ""
Guido van Rossumd8faa362007-04-27 19:54:29 +00001942 for f in os.listdir(name):
Guido van Rossum486364b2007-06-30 05:01:58 +00001943 self.add(f, os.path.join(arcname, f), recursive, exclude)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001944 return
1945
1946 self._dbg(1, name)
1947
1948 # Create a TarInfo object from the file.
1949 tarinfo = self.gettarinfo(name, arcname)
1950
1951 if tarinfo is None:
1952 self._dbg(1, "tarfile: Unsupported type %r" % name)
1953 return
1954
1955 # Append the tar header and data to the archive.
1956 if tarinfo.isreg():
Guido van Rossume7ba4952007-06-06 23:52:48 +00001957 f = bltn_open(name, "rb")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001958 self.addfile(tarinfo, f)
1959 f.close()
1960
Martin v. Löwis5dbdc592005-08-27 10:07:56 +00001961 elif tarinfo.isdir():
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001962 self.addfile(tarinfo)
1963 if recursive:
1964 for f in os.listdir(name):
Guido van Rossum486364b2007-06-30 05:01:58 +00001965 self.add(os.path.join(name, f), os.path.join(arcname, f), recursive, exclude)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001966
Martin v. Löwis5dbdc592005-08-27 10:07:56 +00001967 else:
1968 self.addfile(tarinfo)
1969
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001970 def addfile(self, tarinfo, fileobj=None):
1971 """Add the TarInfo object `tarinfo' to the archive. If `fileobj' is
1972 given, tarinfo.size bytes are read from it and added to the archive.
1973 You can create TarInfo objects using gettarinfo().
1974 On Windows platforms, `fileobj' should always be opened with mode
1975 'rb' to avoid irritation about the file size.
1976 """
1977 self._check("aw")
1978
Thomas Wouters89f507f2006-12-13 04:49:30 +00001979 tarinfo = copy.copy(tarinfo)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001980
Guido van Rossume7ba4952007-06-06 23:52:48 +00001981 buf = tarinfo.tobuf(self.format, self.encoding, self.errors)
Thomas Wouters89f507f2006-12-13 04:49:30 +00001982 self.fileobj.write(buf)
1983 self.offset += len(buf)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001984
1985 # If there's data to follow, append it.
1986 if fileobj is not None:
1987 copyfileobj(fileobj, self.fileobj, tarinfo.size)
1988 blocks, remainder = divmod(tarinfo.size, BLOCKSIZE)
1989 if remainder > 0:
1990 self.fileobj.write(NUL * (BLOCKSIZE - remainder))
1991 blocks += 1
1992 self.offset += blocks * BLOCKSIZE
1993
Martin v. Löwisf3c56112004-09-18 09:08:52 +00001994 self.members.append(tarinfo)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001995
Martin v. Löwis00a73e72005-03-04 19:40:34 +00001996 def extractall(self, path=".", members=None):
1997 """Extract all members from the archive to the current working
1998 directory and set owner, modification time and permissions on
1999 directories afterwards. `path' specifies a different directory
2000 to extract to. `members' is optional and must be a subset of the
2001 list returned by getmembers().
2002 """
2003 directories = []
2004
2005 if members is None:
2006 members = self
2007
2008 for tarinfo in members:
2009 if tarinfo.isdir():
2010 # Extract directory with a safe mode, so that
2011 # all files below can be extracted as well.
2012 try:
Guido van Rossumcd16bf62007-06-13 18:07:49 +00002013 os.makedirs(os.path.join(path, tarinfo.name), 0o700)
Martin v. Löwis00a73e72005-03-04 19:40:34 +00002014 except EnvironmentError:
2015 pass
2016 directories.append(tarinfo)
2017 else:
2018 self.extract(tarinfo, path)
2019
2020 # Reverse sort directories.
2021 directories.sort(lambda a, b: cmp(a.name, b.name))
2022 directories.reverse()
2023
2024 # Set correct owner, mtime and filemode on directories.
2025 for tarinfo in directories:
2026 path = os.path.join(path, tarinfo.name)
2027 try:
2028 self.chown(tarinfo, path)
2029 self.utime(tarinfo, path)
2030 self.chmod(tarinfo, path)
Guido van Rossumb940e112007-01-10 16:19:56 +00002031 except ExtractError as e:
Martin v. Löwis00a73e72005-03-04 19:40:34 +00002032 if self.errorlevel > 1:
2033 raise
2034 else:
2035 self._dbg(1, "tarfile: %s" % e)
2036
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002037 def extract(self, member, path=""):
2038 """Extract a member from the archive to the current working directory,
2039 using its full name. Its file information is extracted as accurately
2040 as possible. `member' may be a filename or a TarInfo object. You can
2041 specify a different directory using `path'.
2042 """
2043 self._check("r")
2044
Guido van Rossumd8faa362007-04-27 19:54:29 +00002045 if isinstance(member, basestring):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002046 tarinfo = self.getmember(member)
Guido van Rossumd8faa362007-04-27 19:54:29 +00002047 else:
2048 tarinfo = member
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002049
Neal Norwitza4f651a2004-07-20 22:07:44 +00002050 # Prepare the link target for makelink().
2051 if tarinfo.islnk():
2052 tarinfo._link_target = os.path.join(path, tarinfo.linkname)
2053
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002054 try:
2055 self._extract_member(tarinfo, os.path.join(path, tarinfo.name))
Guido van Rossumb940e112007-01-10 16:19:56 +00002056 except EnvironmentError as e:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002057 if self.errorlevel > 0:
2058 raise
2059 else:
2060 if e.filename is None:
2061 self._dbg(1, "tarfile: %s" % e.strerror)
2062 else:
2063 self._dbg(1, "tarfile: %s %r" % (e.strerror, e.filename))
Guido van Rossumb940e112007-01-10 16:19:56 +00002064 except ExtractError as e:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002065 if self.errorlevel > 1:
2066 raise
2067 else:
2068 self._dbg(1, "tarfile: %s" % e)
2069
2070 def extractfile(self, member):
2071 """Extract a member from the archive as a file object. `member' may be
2072 a filename or a TarInfo object. If `member' is a regular file, a
2073 file-like object is returned. If `member' is a link, a file-like
2074 object is constructed from the link's target. If `member' is none of
2075 the above, None is returned.
2076 The file-like object is read-only and provides the following
2077 methods: read(), readline(), readlines(), seek() and tell()
2078 """
2079 self._check("r")
2080
Guido van Rossumd8faa362007-04-27 19:54:29 +00002081 if isinstance(member, basestring):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002082 tarinfo = self.getmember(member)
Guido van Rossumd8faa362007-04-27 19:54:29 +00002083 else:
2084 tarinfo = member
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002085
2086 if tarinfo.isreg():
2087 return self.fileobject(self, tarinfo)
2088
2089 elif tarinfo.type not in SUPPORTED_TYPES:
2090 # If a member's type is unknown, it is treated as a
2091 # regular file.
2092 return self.fileobject(self, tarinfo)
2093
2094 elif tarinfo.islnk() or tarinfo.issym():
2095 if isinstance(self.fileobj, _Stream):
2096 # A small but ugly workaround for the case that someone tries
2097 # to extract a (sym)link as a file-object from a non-seekable
2098 # stream of tar blocks.
Thomas Wouters477c8d52006-05-27 19:21:47 +00002099 raise StreamError("cannot extract (sym)link as file object")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002100 else:
Georg Brandl7eb4b7d2005-07-22 21:49:32 +00002101 # A (sym)link's file object is its target's file object.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002102 return self.extractfile(self._getmember(tarinfo.linkname,
2103 tarinfo))
2104 else:
2105 # If there's no data associated with the member (directory, chrdev,
2106 # blkdev, etc.), return None instead of a file object.
2107 return None
2108
2109 def _extract_member(self, tarinfo, targetpath):
2110 """Extract the TarInfo object tarinfo to a physical
2111 file called targetpath.
2112 """
2113 # Fetch the TarInfo object for the given name
2114 # and build the destination pathname, replacing
2115 # forward slashes to platform specific separators.
2116 if targetpath[-1:] == "/":
2117 targetpath = targetpath[:-1]
2118 targetpath = os.path.normpath(targetpath)
2119
2120 # Create all upper directories.
2121 upperdirs = os.path.dirname(targetpath)
2122 if upperdirs and not os.path.exists(upperdirs):
Thomas Woutersb2137042007-02-01 18:02:27 +00002123 os.makedirs(upperdirs)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002124
2125 if tarinfo.islnk() or tarinfo.issym():
2126 self._dbg(1, "%s -> %s" % (tarinfo.name, tarinfo.linkname))
2127 else:
2128 self._dbg(1, tarinfo.name)
2129
2130 if tarinfo.isreg():
2131 self.makefile(tarinfo, targetpath)
2132 elif tarinfo.isdir():
2133 self.makedir(tarinfo, targetpath)
2134 elif tarinfo.isfifo():
2135 self.makefifo(tarinfo, targetpath)
2136 elif tarinfo.ischr() or tarinfo.isblk():
2137 self.makedev(tarinfo, targetpath)
2138 elif tarinfo.islnk() or tarinfo.issym():
2139 self.makelink(tarinfo, targetpath)
2140 elif tarinfo.type not in SUPPORTED_TYPES:
2141 self.makeunknown(tarinfo, targetpath)
2142 else:
2143 self.makefile(tarinfo, targetpath)
2144
2145 self.chown(tarinfo, targetpath)
2146 if not tarinfo.issym():
2147 self.chmod(tarinfo, targetpath)
2148 self.utime(tarinfo, targetpath)
2149
2150 #--------------------------------------------------------------------------
2151 # Below are the different file methods. They are called via
2152 # _extract_member() when extract() is called. They can be replaced in a
2153 # subclass to implement other functionality.
2154
2155 def makedir(self, tarinfo, targetpath):
2156 """Make a directory called targetpath.
2157 """
2158 try:
2159 os.mkdir(targetpath)
Guido van Rossumb940e112007-01-10 16:19:56 +00002160 except EnvironmentError as e:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002161 if e.errno != errno.EEXIST:
2162 raise
2163
2164 def makefile(self, tarinfo, targetpath):
2165 """Make a file called targetpath.
2166 """
2167 source = self.extractfile(tarinfo)
Guido van Rossume7ba4952007-06-06 23:52:48 +00002168 target = bltn_open(targetpath, "wb")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002169 copyfileobj(source, target)
2170 source.close()
2171 target.close()
2172
2173 def makeunknown(self, tarinfo, targetpath):
2174 """Make a file from a TarInfo object with an unknown type
2175 at targetpath.
2176 """
2177 self.makefile(tarinfo, targetpath)
2178 self._dbg(1, "tarfile: Unknown file type %r, " \
2179 "extracted as regular file." % tarinfo.type)
2180
2181 def makefifo(self, tarinfo, targetpath):
2182 """Make a fifo called targetpath.
2183 """
2184 if hasattr(os, "mkfifo"):
2185 os.mkfifo(targetpath)
2186 else:
Thomas Wouters477c8d52006-05-27 19:21:47 +00002187 raise ExtractError("fifo not supported by system")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002188
2189 def makedev(self, tarinfo, targetpath):
2190 """Make a character or block device called targetpath.
2191 """
2192 if not hasattr(os, "mknod") or not hasattr(os, "makedev"):
Thomas Wouters477c8d52006-05-27 19:21:47 +00002193 raise ExtractError("special devices not supported by system")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002194
2195 mode = tarinfo.mode
2196 if tarinfo.isblk():
2197 mode |= stat.S_IFBLK
2198 else:
2199 mode |= stat.S_IFCHR
2200
2201 os.mknod(targetpath, mode,
2202 os.makedev(tarinfo.devmajor, tarinfo.devminor))
2203
2204 def makelink(self, tarinfo, targetpath):
2205 """Make a (symbolic) link called targetpath. If it cannot be created
2206 (platform limitation), we try to make a copy of the referenced file
2207 instead of a link.
2208 """
2209 linkpath = tarinfo.linkname
2210 try:
2211 if tarinfo.issym():
2212 os.symlink(linkpath, targetpath)
2213 else:
Neal Norwitza4f651a2004-07-20 22:07:44 +00002214 # See extract().
2215 os.link(tarinfo._link_target, targetpath)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002216 except AttributeError:
2217 if tarinfo.issym():
2218 linkpath = os.path.join(os.path.dirname(tarinfo.name),
2219 linkpath)
2220 linkpath = normpath(linkpath)
2221
2222 try:
2223 self._extract_member(self.getmember(linkpath), targetpath)
Guido van Rossumb940e112007-01-10 16:19:56 +00002224 except (EnvironmentError, KeyError) as e:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002225 linkpath = os.path.normpath(linkpath)
2226 try:
2227 shutil.copy2(linkpath, targetpath)
Guido van Rossumb940e112007-01-10 16:19:56 +00002228 except EnvironmentError as e:
Thomas Wouters477c8d52006-05-27 19:21:47 +00002229 raise IOError("link could not be created")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002230
2231 def chown(self, tarinfo, targetpath):
2232 """Set owner of targetpath according to tarinfo.
2233 """
2234 if pwd and hasattr(os, "geteuid") and os.geteuid() == 0:
2235 # We have to be root to do so.
2236 try:
2237 g = grp.getgrnam(tarinfo.gname)[2]
2238 except KeyError:
2239 try:
2240 g = grp.getgrgid(tarinfo.gid)[2]
2241 except KeyError:
2242 g = os.getgid()
2243 try:
2244 u = pwd.getpwnam(tarinfo.uname)[2]
2245 except KeyError:
2246 try:
2247 u = pwd.getpwuid(tarinfo.uid)[2]
2248 except KeyError:
2249 u = os.getuid()
2250 try:
2251 if tarinfo.issym() and hasattr(os, "lchown"):
2252 os.lchown(targetpath, u, g)
2253 else:
Andrew MacIntyre7970d202003-02-19 12:51:34 +00002254 if sys.platform != "os2emx":
2255 os.chown(targetpath, u, g)
Guido van Rossumb940e112007-01-10 16:19:56 +00002256 except EnvironmentError as e:
Thomas Wouters477c8d52006-05-27 19:21:47 +00002257 raise ExtractError("could not change owner")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002258
2259 def chmod(self, tarinfo, targetpath):
2260 """Set file permissions of targetpath according to tarinfo.
2261 """
Jack Jansen834eff62003-03-07 12:47:06 +00002262 if hasattr(os, 'chmod'):
2263 try:
2264 os.chmod(targetpath, tarinfo.mode)
Guido van Rossumb940e112007-01-10 16:19:56 +00002265 except EnvironmentError as e:
Thomas Wouters477c8d52006-05-27 19:21:47 +00002266 raise ExtractError("could not change mode")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002267
2268 def utime(self, tarinfo, targetpath):
2269 """Set modification time of targetpath according to tarinfo.
2270 """
Jack Jansen834eff62003-03-07 12:47:06 +00002271 if not hasattr(os, 'utime'):
Tim Petersf9347782003-03-07 15:36:41 +00002272 return
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002273 if sys.platform == "win32" and tarinfo.isdir():
2274 # According to msdn.microsoft.com, it is an error (EACCES)
2275 # to use utime() on directories.
2276 return
2277 try:
2278 os.utime(targetpath, (tarinfo.mtime, tarinfo.mtime))
Guido van Rossumb940e112007-01-10 16:19:56 +00002279 except EnvironmentError as e:
Thomas Wouters477c8d52006-05-27 19:21:47 +00002280 raise ExtractError("could not change modification time")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002281
2282 #--------------------------------------------------------------------------
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002283 def next(self):
2284 """Return the next member of the archive as a TarInfo object, when
2285 TarFile is opened for reading. Return None if there is no more
2286 available.
2287 """
2288 self._check("ra")
2289 if self.firstmember is not None:
2290 m = self.firstmember
2291 self.firstmember = None
2292 return m
2293
2294 # Read the next block.
Andrew M. Kuchling864bba12004-07-10 22:02:11 +00002295 self.fileobj.seek(self.offset)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002296 while True:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002297 try:
Guido van Rossumd8faa362007-04-27 19:54:29 +00002298 tarinfo = self.tarinfo.fromtarfile(self)
2299 if tarinfo is None:
2300 return
2301 self.members.append(tarinfo)
Thomas Wouters477c8d52006-05-27 19:21:47 +00002302
Guido van Rossumb940e112007-01-10 16:19:56 +00002303 except HeaderError as e:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002304 if self.ignore_zeros:
Thomas Wouters902d6eb2007-01-09 23:18:33 +00002305 self._dbg(2, "0x%X: %s" % (self.offset, e))
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002306 self.offset += BLOCKSIZE
2307 continue
2308 else:
Andrew M. Kuchling864bba12004-07-10 22:02:11 +00002309 if self.offset == 0:
Thomas Wouters902d6eb2007-01-09 23:18:33 +00002310 raise ReadError(str(e))
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002311 return None
2312 break
2313
Thomas Wouters477c8d52006-05-27 19:21:47 +00002314 return tarinfo
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002315
2316 #--------------------------------------------------------------------------
2317 # Little helper methods:
2318
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002319 def _getmember(self, name, tarinfo=None):
2320 """Find an archive member by name from bottom to top.
2321 If tarinfo is given, it is used as the starting point.
2322 """
Martin v. Löwisf3c56112004-09-18 09:08:52 +00002323 # Ensure that all members have been loaded.
2324 members = self.getmembers()
2325
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002326 if tarinfo is None:
Martin v. Löwisf3c56112004-09-18 09:08:52 +00002327 end = len(members)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002328 else:
Martin v. Löwisf3c56112004-09-18 09:08:52 +00002329 end = members.index(tarinfo)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002330
Guido van Rossum805365e2007-05-07 22:24:25 +00002331 for i in range(end - 1, -1, -1):
Martin v. Löwisf3c56112004-09-18 09:08:52 +00002332 if name == members[i].name:
2333 return members[i]
Andrew M. Kuchling864bba12004-07-10 22:02:11 +00002334
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002335 def _load(self):
2336 """Read through the entire archive file and look for readable
2337 members.
2338 """
2339 while True:
2340 tarinfo = self.next()
2341 if tarinfo is None:
2342 break
2343 self._loaded = True
2344
2345 def _check(self, mode=None):
2346 """Check if TarFile is still open, and if the operation's mode
2347 corresponds to TarFile's mode.
2348 """
2349 if self.closed:
Thomas Wouters477c8d52006-05-27 19:21:47 +00002350 raise IOError("%s is closed" % self.__class__.__name__)
Guido van Rossumd8faa362007-04-27 19:54:29 +00002351 if mode is not None and self.mode not in mode:
2352 raise IOError("bad operation for mode %r" % self.mode)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002353
2354 def __iter__(self):
2355 """Provide an iterator object.
2356 """
2357 if self._loaded:
2358 return iter(self.members)
2359 else:
2360 return TarIter(self)
2361
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002362 def _dbg(self, level, msg):
2363 """Write debugging output to sys.stderr.
2364 """
2365 if level <= self.debug:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00002366 print(msg, file=sys.stderr)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002367# class TarFile
2368
2369class TarIter:
2370 """Iterator Class.
2371
2372 for tarinfo in TarFile(...):
2373 suite...
2374 """
2375
2376 def __init__(self, tarfile):
2377 """Construct a TarIter object.
2378 """
2379 self.tarfile = tarfile
Martin v. Löwis637431b2005-03-03 23:12:42 +00002380 self.index = 0
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002381 def __iter__(self):
2382 """Return iterator object.
2383 """
2384 return self
Georg Brandla18af4e2007-04-21 15:47:16 +00002385 def __next__(self):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002386 """Return the next item using TarFile's next() method.
2387 When all members have been read, set TarFile as _loaded.
2388 """
Martin v. Löwis637431b2005-03-03 23:12:42 +00002389 # Fix for SF #1100429: Under rare circumstances it can
2390 # happen that getmembers() is called during iteration,
2391 # which will cause TarIter to stop prematurely.
2392 if not self.tarfile._loaded:
2393 tarinfo = self.tarfile.next()
2394 if not tarinfo:
2395 self.tarfile._loaded = True
2396 raise StopIteration
2397 else:
2398 try:
2399 tarinfo = self.tarfile.members[self.index]
2400 except IndexError:
2401 raise StopIteration
2402 self.index += 1
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002403 return tarinfo
2404
2405# Helper classes for sparse file support
2406class _section:
2407 """Base class for _data and _hole.
2408 """
2409 def __init__(self, offset, size):
2410 self.offset = offset
2411 self.size = size
2412 def __contains__(self, offset):
2413 return self.offset <= offset < self.offset + self.size
2414
2415class _data(_section):
2416 """Represent a data section in a sparse file.
2417 """
2418 def __init__(self, offset, size, realpos):
2419 _section.__init__(self, offset, size)
2420 self.realpos = realpos
2421
2422class _hole(_section):
2423 """Represent a hole section in a sparse file.
2424 """
2425 pass
2426
2427class _ringbuffer(list):
2428 """Ringbuffer class which increases performance
2429 over a regular list.
2430 """
2431 def __init__(self):
2432 self.idx = 0
2433 def find(self, offset):
2434 idx = self.idx
2435 while True:
2436 item = self[idx]
2437 if offset in item:
2438 break
2439 idx += 1
2440 if idx == len(self):
2441 idx = 0
2442 if idx == self.idx:
2443 # End of File
2444 return None
2445 self.idx = idx
2446 return item
2447
2448#---------------------------------------------
2449# zipfile compatible TarFile class
2450#---------------------------------------------
2451TAR_PLAIN = 0 # zipfile.ZIP_STORED
2452TAR_GZIPPED = 8 # zipfile.ZIP_DEFLATED
2453class TarFileCompat:
2454 """TarFile class compatible with standard module zipfile's
2455 ZipFile class.
2456 """
2457 def __init__(self, file, mode="r", compression=TAR_PLAIN):
2458 if compression == TAR_PLAIN:
2459 self.tarfile = TarFile.taropen(file, mode)
2460 elif compression == TAR_GZIPPED:
2461 self.tarfile = TarFile.gzopen(file, mode)
2462 else:
Thomas Wouters477c8d52006-05-27 19:21:47 +00002463 raise ValueError("unknown compression constant")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002464 if mode[0:1] == "r":
2465 members = self.tarfile.getmembers()
Raymond Hettingera1d09e22005-09-11 16:34:05 +00002466 for m in members:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002467 m.filename = m.name
2468 m.file_size = m.size
2469 m.date_time = time.gmtime(m.mtime)[:6]
2470 def namelist(self):
2471 return map(lambda m: m.name, self.infolist())
2472 def infolist(self):
2473 return filter(lambda m: m.type in REGULAR_TYPES,
2474 self.tarfile.getmembers())
2475 def printdir(self):
2476 self.tarfile.list()
2477 def testzip(self):
2478 return
2479 def getinfo(self, name):
2480 return self.tarfile.getmember(name)
2481 def read(self, name):
2482 return self.tarfile.extractfile(self.tarfile.getmember(name)).read()
2483 def write(self, filename, arcname=None, compress_type=None):
2484 self.tarfile.add(filename, arcname)
2485 def writestr(self, zinfo, bytes):
Guido van Rossum68937b42007-05-18 00:51:22 +00002486 from io import StringIO
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002487 import calendar
2488 zinfo.name = zinfo.filename
2489 zinfo.size = zinfo.file_size
2490 zinfo.mtime = calendar.timegm(zinfo.date_time)
Raymond Hettingera6172712004-12-31 19:15:26 +00002491 self.tarfile.addfile(zinfo, StringIO(bytes))
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002492 def close(self):
2493 self.tarfile.close()
2494#class TarFileCompat
2495
2496#--------------------
2497# exported functions
2498#--------------------
2499def is_tarfile(name):
2500 """Return True if name points to a tar archive that we
2501 are able to handle, else return False.
2502 """
2503 try:
2504 t = open(name)
2505 t.close()
2506 return True
2507 except TarError:
2508 return False
2509
Guido van Rossume7ba4952007-06-06 23:52:48 +00002510bltn_open = open
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002511open = TarFile.open