blob: a888d692dea230c8aab82671c36c7240fdef8d88 [file] [log] [blame]
Benjamin Peterson90f5ba52010-03-11 22:53:45 +00001#!/usr/bin/env python3
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002#-------------------------------------------------------------------
3# tarfile.py
4#-------------------------------------------------------------------
Christian Heimes9c1257e2007-11-04 11:37:22 +00005# Copyright (C) 2002 Lars Gustaebel <lars@gustaebel.de>
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00006# All rights reserved.
7#
8# Permission is hereby granted, free of charge, to any person
9# obtaining a copy of this software and associated documentation
10# files (the "Software"), to deal in the Software without
11# restriction, including without limitation the rights to use,
12# copy, modify, merge, publish, distribute, sublicense, and/or sell
13# copies of the Software, and to permit persons to whom the
14# Software is furnished to do so, subject to the following
15# conditions:
16#
17# The above copyright notice and this permission notice shall be
18# included in all copies or substantial portions of the Software.
19#
20# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
21# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
22# OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
23# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
24# HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
25# WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
26# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
27# OTHER DEALINGS IN THE SOFTWARE.
28#
29"""Read from and write to tar format archives.
30"""
31
32__version__ = "$Revision$"
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +000033
Guido van Rossumd8faa362007-04-27 19:54:29 +000034version = "0.9.0"
Guido van Rossum98297ee2007-11-06 21:34:58 +000035__author__ = "Lars Gust\u00e4bel (lars@gustaebel.de)"
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +000036__date__ = "$Date$"
37__cvsid__ = "$Id$"
Guido van Rossum98297ee2007-11-06 21:34:58 +000038__credits__ = "Gustavo Niemeyer, Niels Gust\u00e4bel, Richard Townsend."
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +000039
40#---------
41# Imports
42#---------
43import sys
44import os
45import shutil
46import stat
47import errno
48import time
49import struct
Thomas Wouters89f507f2006-12-13 04:49:30 +000050import copy
Guido van Rossumd8faa362007-04-27 19:54:29 +000051import re
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +000052
53try:
54 import grp, pwd
55except ImportError:
56 grp = pwd = None
57
58# from tarfile import *
59__all__ = ["TarFile", "TarInfo", "is_tarfile", "TarError"]
60
Georg Brandl1a3284e2007-12-02 09:40:06 +000061from builtins import open as _open # Since 'open' is TarFile.open
Guido van Rossum8f78fe92006-08-24 04:03:53 +000062
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +000063#---------------------------------------------------------
64# tar constants
65#---------------------------------------------------------
Lars Gustäbelb506dc32007-08-07 18:36:16 +000066NUL = b"\0" # the null character
Guido van Rossumd8faa362007-04-27 19:54:29 +000067BLOCKSIZE = 512 # length of processing blocks
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +000068RECORDSIZE = BLOCKSIZE * 20 # length of records
Lars Gustäbelb506dc32007-08-07 18:36:16 +000069GNU_MAGIC = b"ustar \0" # magic gnu tar string
70POSIX_MAGIC = b"ustar\x0000" # magic posix tar string
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +000071
Guido van Rossumd8faa362007-04-27 19:54:29 +000072LENGTH_NAME = 100 # maximum length of a filename
73LENGTH_LINK = 100 # maximum length of a linkname
74LENGTH_PREFIX = 155 # maximum length of the prefix field
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +000075
Lars Gustäbelb506dc32007-08-07 18:36:16 +000076REGTYPE = b"0" # regular file
77AREGTYPE = b"\0" # regular file
78LNKTYPE = b"1" # link (inside tarfile)
79SYMTYPE = b"2" # symbolic link
80CHRTYPE = b"3" # character special device
81BLKTYPE = b"4" # block special device
82DIRTYPE = b"5" # directory
83FIFOTYPE = b"6" # fifo special device
84CONTTYPE = b"7" # contiguous file
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +000085
Lars Gustäbelb506dc32007-08-07 18:36:16 +000086GNUTYPE_LONGNAME = b"L" # GNU tar longname
87GNUTYPE_LONGLINK = b"K" # GNU tar longlink
88GNUTYPE_SPARSE = b"S" # GNU tar sparse file
Guido van Rossumd8faa362007-04-27 19:54:29 +000089
Lars Gustäbelb506dc32007-08-07 18:36:16 +000090XHDTYPE = b"x" # POSIX.1-2001 extended header
91XGLTYPE = b"g" # POSIX.1-2001 global header
92SOLARIS_XHDTYPE = b"X" # Solaris extended header
Guido van Rossumd8faa362007-04-27 19:54:29 +000093
94USTAR_FORMAT = 0 # POSIX.1-1988 (ustar) format
95GNU_FORMAT = 1 # GNU tar format
96PAX_FORMAT = 2 # POSIX.1-2001 (pax) format
97DEFAULT_FORMAT = GNU_FORMAT
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +000098
99#---------------------------------------------------------
100# tarfile constants
101#---------------------------------------------------------
Guido van Rossumd8faa362007-04-27 19:54:29 +0000102# File types that tarfile supports:
103SUPPORTED_TYPES = (REGTYPE, AREGTYPE, LNKTYPE,
104 SYMTYPE, DIRTYPE, FIFOTYPE,
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000105 CONTTYPE, CHRTYPE, BLKTYPE,
106 GNUTYPE_LONGNAME, GNUTYPE_LONGLINK,
107 GNUTYPE_SPARSE)
108
Guido van Rossumd8faa362007-04-27 19:54:29 +0000109# File types that will be treated as a regular file.
110REGULAR_TYPES = (REGTYPE, AREGTYPE,
111 CONTTYPE, GNUTYPE_SPARSE)
112
113# File types that are part of the GNU tar format.
114GNU_TYPES = (GNUTYPE_LONGNAME, GNUTYPE_LONGLINK,
115 GNUTYPE_SPARSE)
116
117# Fields from a pax header that override a TarInfo attribute.
118PAX_FIELDS = ("path", "linkpath", "size", "mtime",
119 "uid", "gid", "uname", "gname")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000120
Guido van Rossume7ba4952007-06-06 23:52:48 +0000121# Fields in a pax header that are numbers, all other fields
122# are treated as strings.
123PAX_NUMBER_FIELDS = {
124 "atime": float,
125 "ctime": float,
126 "mtime": float,
127 "uid": int,
128 "gid": int,
129 "size": int
130}
131
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000132#---------------------------------------------------------
133# Bits used in the mode field, values in octal.
134#---------------------------------------------------------
Guido van Rossumcd16bf62007-06-13 18:07:49 +0000135S_IFLNK = 0o120000 # symbolic link
136S_IFREG = 0o100000 # regular file
137S_IFBLK = 0o060000 # block device
138S_IFDIR = 0o040000 # directory
139S_IFCHR = 0o020000 # character device
140S_IFIFO = 0o010000 # fifo
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000141
Guido van Rossumcd16bf62007-06-13 18:07:49 +0000142TSUID = 0o4000 # set UID on execution
143TSGID = 0o2000 # set GID on execution
144TSVTX = 0o1000 # reserved
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000145
Guido van Rossumcd16bf62007-06-13 18:07:49 +0000146TUREAD = 0o400 # read by owner
147TUWRITE = 0o200 # write by owner
148TUEXEC = 0o100 # execute/search by owner
149TGREAD = 0o040 # read by group
150TGWRITE = 0o020 # write by group
151TGEXEC = 0o010 # execute/search by group
152TOREAD = 0o004 # read by other
153TOWRITE = 0o002 # write by other
154TOEXEC = 0o001 # execute/search by other
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000155
156#---------------------------------------------------------
Guido van Rossumd8faa362007-04-27 19:54:29 +0000157# initialization
158#---------------------------------------------------------
159ENCODING = sys.getfilesystemencoding()
160if ENCODING is None:
Lars Gustäbel3741eff2007-08-21 12:17:05 +0000161 ENCODING = "ascii"
Guido van Rossumd8faa362007-04-27 19:54:29 +0000162
163#---------------------------------------------------------
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000164# Some useful functions
165#---------------------------------------------------------
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000166
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000167def stn(s, length, encoding, errors):
168 """Convert a string to a null-terminated bytes object.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000169 """
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000170 s = s.encode(encoding, errors)
Thomas Wouters89f507f2006-12-13 04:49:30 +0000171 return s[:length] + (length - len(s)) * NUL
Thomas Wouters477c8d52006-05-27 19:21:47 +0000172
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000173def nts(s, encoding, errors):
174 """Convert a null-terminated bytes object to a string.
Guido van Rossumd8faa362007-04-27 19:54:29 +0000175 """
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000176 p = s.find(b"\0")
177 if p != -1:
178 s = s[:p]
179 return s.decode(encoding, errors)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000180
Thomas Wouters477c8d52006-05-27 19:21:47 +0000181def nti(s):
182 """Convert a number field to a python number.
183 """
184 # There are two possible encodings for a number field, see
185 # itn() below.
Guido van Rossumcd16bf62007-06-13 18:07:49 +0000186 if s[0] != chr(0o200):
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000187 try:
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000188 n = int(nts(s, "ascii", "strict") or "0", 8)
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000189 except ValueError:
Lars Gustäbel9520a432009-11-22 18:48:49 +0000190 raise InvalidHeaderError("invalid header")
Thomas Wouters477c8d52006-05-27 19:21:47 +0000191 else:
Guido van Rossume2a383d2007-01-15 16:59:06 +0000192 n = 0
Guido van Rossum805365e2007-05-07 22:24:25 +0000193 for i in range(len(s) - 1):
Thomas Wouters477c8d52006-05-27 19:21:47 +0000194 n <<= 8
195 n += ord(s[i + 1])
196 return n
197
Guido van Rossumd8faa362007-04-27 19:54:29 +0000198def itn(n, digits=8, format=DEFAULT_FORMAT):
Thomas Wouters477c8d52006-05-27 19:21:47 +0000199 """Convert a python number to a number field.
200 """
201 # POSIX 1003.1-1988 requires numbers to be encoded as a string of
202 # octal digits followed by a null-byte, this allows values up to
203 # (8**(digits-1))-1. GNU tar allows storing numbers greater than
Guido van Rossumcd16bf62007-06-13 18:07:49 +0000204 # that if necessary. A leading 0o200 byte indicates this particular
Thomas Wouters477c8d52006-05-27 19:21:47 +0000205 # encoding, the following digits-1 bytes are a big-endian
206 # representation. This allows values up to (256**(digits-1))-1.
207 if 0 <= n < 8 ** (digits - 1):
Lars Gustäbela280ca72007-08-28 07:34:33 +0000208 s = bytes("%0*o" % (digits - 1, n), "ascii") + NUL
Thomas Wouters477c8d52006-05-27 19:21:47 +0000209 else:
Guido van Rossumd8faa362007-04-27 19:54:29 +0000210 if format != GNU_FORMAT or n >= 256 ** (digits - 1):
Thomas Wouters477c8d52006-05-27 19:21:47 +0000211 raise ValueError("overflow in number field")
212
213 if n < 0:
214 # XXX We mimic GNU tar's behaviour with negative numbers,
215 # this could raise OverflowError.
216 n = struct.unpack("L", struct.pack("l", n))[0]
217
Guido van Rossum254348e2007-11-21 19:29:53 +0000218 s = bytearray()
Guido van Rossum805365e2007-05-07 22:24:25 +0000219 for i in range(digits - 1):
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000220 s.insert(0, n & 0o377)
Thomas Wouters477c8d52006-05-27 19:21:47 +0000221 n >>= 8
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000222 s.insert(0, 0o200)
Thomas Wouters477c8d52006-05-27 19:21:47 +0000223 return s
224
225def calc_chksums(buf):
226 """Calculate the checksum for a member's header by summing up all
227 characters except for the chksum field which is treated as if
228 it was filled with spaces. According to the GNU tar sources,
229 some tars (Sun and NeXT) calculate chksum with signed char,
230 which will be different if there are chars in the buffer with
231 the high bit set. So we calculate two checksums, unsigned and
232 signed.
233 """
234 unsigned_chksum = 256 + sum(struct.unpack("148B", buf[:148]) + struct.unpack("356B", buf[156:512]))
235 signed_chksum = 256 + sum(struct.unpack("148b", buf[:148]) + struct.unpack("356b", buf[156:512]))
236 return unsigned_chksum, signed_chksum
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000237
238def copyfileobj(src, dst, length=None):
239 """Copy length bytes from fileobj src to fileobj dst.
240 If length is None, copy the entire content.
241 """
242 if length == 0:
243 return
244 if length is None:
245 shutil.copyfileobj(src, dst)
246 return
247
248 BUFSIZE = 16 * 1024
249 blocks, remainder = divmod(length, BUFSIZE)
Guido van Rossum805365e2007-05-07 22:24:25 +0000250 for b in range(blocks):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000251 buf = src.read(BUFSIZE)
252 if len(buf) < BUFSIZE:
Thomas Wouters477c8d52006-05-27 19:21:47 +0000253 raise IOError("end of file reached")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000254 dst.write(buf)
255
256 if remainder != 0:
257 buf = src.read(remainder)
258 if len(buf) < remainder:
Thomas Wouters477c8d52006-05-27 19:21:47 +0000259 raise IOError("end of file reached")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000260 dst.write(buf)
261 return
262
263filemode_table = (
Andrew M. Kuchling8bc462f2004-10-20 11:48:42 +0000264 ((S_IFLNK, "l"),
265 (S_IFREG, "-"),
266 (S_IFBLK, "b"),
267 (S_IFDIR, "d"),
268 (S_IFCHR, "c"),
269 (S_IFIFO, "p")),
270
271 ((TUREAD, "r"),),
272 ((TUWRITE, "w"),),
273 ((TUEXEC|TSUID, "s"),
274 (TSUID, "S"),
275 (TUEXEC, "x")),
276
277 ((TGREAD, "r"),),
278 ((TGWRITE, "w"),),
279 ((TGEXEC|TSGID, "s"),
280 (TSGID, "S"),
281 (TGEXEC, "x")),
282
283 ((TOREAD, "r"),),
284 ((TOWRITE, "w"),),
285 ((TOEXEC|TSVTX, "t"),
286 (TSVTX, "T"),
287 (TOEXEC, "x"))
288)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000289
290def filemode(mode):
291 """Convert a file's mode to a string of the form
292 -rwxrwxrwx.
293 Used by TarFile.list()
294 """
Andrew M. Kuchling8bc462f2004-10-20 11:48:42 +0000295 perm = []
296 for table in filemode_table:
297 for bit, char in table:
298 if mode & bit == bit:
299 perm.append(char)
300 break
301 else:
302 perm.append("-")
303 return "".join(perm)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000304
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000305class TarError(Exception):
306 """Base exception."""
307 pass
308class ExtractError(TarError):
309 """General exception for extract errors."""
310 pass
311class ReadError(TarError):
312 """Exception for unreadble tar archives."""
313 pass
314class CompressionError(TarError):
315 """Exception for unavailable compression methods."""
316 pass
317class StreamError(TarError):
318 """Exception for unsupported operations on stream-like TarFiles."""
319 pass
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000320class HeaderError(TarError):
Lars Gustäbel9520a432009-11-22 18:48:49 +0000321 """Base exception for header errors."""
322 pass
323class EmptyHeaderError(HeaderError):
324 """Exception for empty headers."""
325 pass
326class TruncatedHeaderError(HeaderError):
327 """Exception for truncated headers."""
328 pass
329class EOFHeaderError(HeaderError):
330 """Exception for end of file headers."""
331 pass
332class InvalidHeaderError(HeaderError):
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000333 """Exception for invalid headers."""
334 pass
Lars Gustäbel9520a432009-11-22 18:48:49 +0000335class SubsequentHeaderError(HeaderError):
336 """Exception for missing and invalid extended headers."""
337 pass
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000338
339#---------------------------
340# internal stream interface
341#---------------------------
342class _LowLevelFile:
343 """Low-level file object. Supports reading and writing.
344 It is used instead of a regular file object for streaming
345 access.
346 """
347
348 def __init__(self, name, mode):
349 mode = {
350 "r": os.O_RDONLY,
351 "w": os.O_WRONLY | os.O_CREAT | os.O_TRUNC,
352 }[mode]
353 if hasattr(os, "O_BINARY"):
354 mode |= os.O_BINARY
Lars Gustäbeld6eb70b2010-04-29 15:37:02 +0000355 self.fd = os.open(name, mode, 0o666)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000356
357 def close(self):
358 os.close(self.fd)
359
360 def read(self, size):
361 return os.read(self.fd, size)
362
363 def write(self, s):
364 os.write(self.fd, s)
365
366class _Stream:
367 """Class that serves as an adapter between TarFile and
368 a stream-like object. The stream-like object only
369 needs to have a read() or write() method and is accessed
370 blockwise. Use of gzip or bzip2 compression is possible.
371 A stream-like object could be for example: sys.stdin,
372 sys.stdout, a socket, a tape device etc.
373
374 _Stream is intended to be used only internally.
375 """
376
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000377 def __init__(self, name, mode, comptype, fileobj, bufsize):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000378 """Construct a _Stream object.
379 """
380 self._extfileobj = True
381 if fileobj is None:
382 fileobj = _LowLevelFile(name, mode)
383 self._extfileobj = False
384
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000385 if comptype == '*':
386 # Enable transparent compression detection for the
387 # stream interface
388 fileobj = _StreamProxy(fileobj)
389 comptype = fileobj.getcomptype()
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000390
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000391 self.name = name or ""
392 self.mode = mode
393 self.comptype = comptype
394 self.fileobj = fileobj
395 self.bufsize = bufsize
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000396 self.buf = b""
Guido van Rossume2a383d2007-01-15 16:59:06 +0000397 self.pos = 0
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000398 self.closed = False
399
400 if comptype == "gz":
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000401 try:
402 import zlib
403 except ImportError:
Thomas Wouters477c8d52006-05-27 19:21:47 +0000404 raise CompressionError("zlib module is not available")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000405 self.zlib = zlib
Antoine Pitrou77b338b2009-12-14 18:00:06 +0000406 self.crc = zlib.crc32(b"")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000407 if mode == "r":
408 self._init_read_gz()
409 else:
410 self._init_write_gz()
411
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000412 if comptype == "bz2":
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000413 try:
414 import bz2
415 except ImportError:
Thomas Wouters477c8d52006-05-27 19:21:47 +0000416 raise CompressionError("bz2 module is not available")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000417 if mode == "r":
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000418 self.dbuf = b""
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000419 self.cmp = bz2.BZ2Decompressor()
420 else:
421 self.cmp = bz2.BZ2Compressor()
422
423 def __del__(self):
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000424 if hasattr(self, "closed") and not self.closed:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000425 self.close()
426
427 def _init_write_gz(self):
428 """Initialize for writing with gzip compression.
429 """
430 self.cmp = self.zlib.compressobj(9, self.zlib.DEFLATED,
431 -self.zlib.MAX_WBITS,
432 self.zlib.DEF_MEM_LEVEL,
433 0)
Guido van Rossume2a383d2007-01-15 16:59:06 +0000434 timestamp = struct.pack("<L", int(time.time()))
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000435 self.__write(b"\037\213\010\010" + timestamp + b"\002\377")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000436 if self.name.endswith(".gz"):
437 self.name = self.name[:-3]
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000438 # RFC1952 says we must use ISO-8859-1 for the FNAME field.
439 self.__write(self.name.encode("iso-8859-1", "replace") + NUL)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000440
441 def write(self, s):
442 """Write string s to the stream.
443 """
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000444 if self.comptype == "gz":
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000445 self.crc = self.zlib.crc32(s, self.crc)
446 self.pos += len(s)
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000447 if self.comptype != "tar":
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000448 s = self.cmp.compress(s)
449 self.__write(s)
450
451 def __write(self, s):
452 """Write string s to the stream if a whole new block
453 is ready to be written.
454 """
455 self.buf += s
456 while len(self.buf) > self.bufsize:
457 self.fileobj.write(self.buf[:self.bufsize])
458 self.buf = self.buf[self.bufsize:]
459
460 def close(self):
461 """Close the _Stream object. No operation should be
462 done on it afterwards.
463 """
464 if self.closed:
465 return
466
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000467 if self.mode == "w" and self.comptype != "tar":
Martin v. Löwisc234a522004-08-22 21:28:33 +0000468 self.buf += self.cmp.flush()
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000469
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000470 if self.mode == "w" and self.buf:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000471 self.fileobj.write(self.buf)
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000472 self.buf = b""
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000473 if self.comptype == "gz":
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000474 # The native zlib crc is an unsigned 32-bit integer, but
475 # the Python wrapper implicitly casts that to a signed C
476 # long. So, on a 32-bit box self.crc may "look negative",
477 # while the same crc on a 64-bit box may "look positive".
478 # To avoid irksome warnings from the `struct` module, force
479 # it to look positive on all boxes.
Guido van Rossume2a383d2007-01-15 16:59:06 +0000480 self.fileobj.write(struct.pack("<L", self.crc & 0xffffffff))
481 self.fileobj.write(struct.pack("<L", self.pos & 0xffffFFFF))
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000482
483 if not self._extfileobj:
484 self.fileobj.close()
485
486 self.closed = True
487
488 def _init_read_gz(self):
489 """Initialize for reading a gzip compressed fileobj.
490 """
491 self.cmp = self.zlib.decompressobj(-self.zlib.MAX_WBITS)
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000492 self.dbuf = b""
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000493
494 # taken from gzip.GzipFile with some alterations
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000495 if self.__read(2) != b"\037\213":
Thomas Wouters477c8d52006-05-27 19:21:47 +0000496 raise ReadError("not a gzip file")
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000497 if self.__read(1) != b"\010":
Thomas Wouters477c8d52006-05-27 19:21:47 +0000498 raise CompressionError("unsupported compression method")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000499
500 flag = ord(self.__read(1))
501 self.__read(6)
502
503 if flag & 4:
504 xlen = ord(self.__read(1)) + 256 * ord(self.__read(1))
505 self.read(xlen)
506 if flag & 8:
507 while True:
508 s = self.__read(1)
509 if not s or s == NUL:
510 break
511 if flag & 16:
512 while True:
513 s = self.__read(1)
514 if not s or s == NUL:
515 break
516 if flag & 2:
517 self.__read(2)
518
519 def tell(self):
520 """Return the stream's file pointer position.
521 """
522 return self.pos
523
524 def seek(self, pos=0):
525 """Set the stream's file pointer to pos. Negative seeking
526 is forbidden.
527 """
528 if pos - self.pos >= 0:
529 blocks, remainder = divmod(pos - self.pos, self.bufsize)
Guido van Rossum805365e2007-05-07 22:24:25 +0000530 for i in range(blocks):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000531 self.read(self.bufsize)
532 self.read(remainder)
533 else:
Thomas Wouters477c8d52006-05-27 19:21:47 +0000534 raise StreamError("seeking backwards is not allowed")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000535 return self.pos
536
537 def read(self, size=None):
538 """Return the next size number of bytes from the stream.
539 If size is not defined, return all bytes of the stream
540 up to EOF.
541 """
542 if size is None:
543 t = []
544 while True:
545 buf = self._read(self.bufsize)
546 if not buf:
547 break
548 t.append(buf)
549 buf = "".join(t)
550 else:
551 buf = self._read(size)
552 self.pos += len(buf)
553 return buf
554
555 def _read(self, size):
556 """Return size bytes from the stream.
557 """
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000558 if self.comptype == "tar":
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000559 return self.__read(size)
560
561 c = len(self.dbuf)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000562 while c < size:
563 buf = self.__read(self.bufsize)
564 if not buf:
565 break
Guido van Rossumd8faa362007-04-27 19:54:29 +0000566 try:
567 buf = self.cmp.decompress(buf)
568 except IOError:
569 raise ReadError("invalid compressed data")
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000570 self.dbuf += buf
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000571 c += len(buf)
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000572 buf = self.dbuf[:size]
573 self.dbuf = self.dbuf[size:]
574 return buf
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000575
576 def __read(self, size):
577 """Return size bytes from stream. If internal buffer is empty,
578 read another block from the stream.
579 """
580 c = len(self.buf)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000581 while c < size:
582 buf = self.fileobj.read(self.bufsize)
583 if not buf:
584 break
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000585 self.buf += buf
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000586 c += len(buf)
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000587 buf = self.buf[:size]
588 self.buf = self.buf[size:]
589 return buf
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000590# class _Stream
591
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000592class _StreamProxy(object):
593 """Small proxy class that enables transparent compression
594 detection for the Stream interface (mode 'r|*').
595 """
596
597 def __init__(self, fileobj):
598 self.fileobj = fileobj
599 self.buf = self.fileobj.read(BLOCKSIZE)
600
601 def read(self, size):
602 self.read = self.fileobj.read
603 return self.buf
604
605 def getcomptype(self):
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000606 if self.buf.startswith(b"\037\213\010"):
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000607 return "gz"
Lars Gustäbela280ca72007-08-28 07:34:33 +0000608 if self.buf.startswith(b"BZh91"):
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000609 return "bz2"
610 return "tar"
611
612 def close(self):
613 self.fileobj.close()
614# class StreamProxy
615
Thomas Wouters477c8d52006-05-27 19:21:47 +0000616class _BZ2Proxy(object):
617 """Small proxy class that enables external file object
618 support for "r:bz2" and "w:bz2" modes. This is actually
619 a workaround for a limitation in bz2 module's BZ2File
620 class which (unlike gzip.GzipFile) has no support for
621 a file object argument.
622 """
623
624 blocksize = 16 * 1024
625
626 def __init__(self, fileobj, mode):
627 self.fileobj = fileobj
628 self.mode = mode
Guido van Rossumd8faa362007-04-27 19:54:29 +0000629 self.name = getattr(self.fileobj, "name", None)
Thomas Wouters477c8d52006-05-27 19:21:47 +0000630 self.init()
631
632 def init(self):
633 import bz2
634 self.pos = 0
635 if self.mode == "r":
636 self.bz2obj = bz2.BZ2Decompressor()
637 self.fileobj.seek(0)
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000638 self.buf = b""
Thomas Wouters477c8d52006-05-27 19:21:47 +0000639 else:
640 self.bz2obj = bz2.BZ2Compressor()
641
642 def read(self, size):
Thomas Wouters477c8d52006-05-27 19:21:47 +0000643 x = len(self.buf)
644 while x < size:
Lars Gustäbel42e00912009-03-22 20:34:29 +0000645 raw = self.fileobj.read(self.blocksize)
646 if not raw:
Thomas Wouters477c8d52006-05-27 19:21:47 +0000647 break
Lars Gustäbel42e00912009-03-22 20:34:29 +0000648 data = self.bz2obj.decompress(raw)
649 self.buf += data
Thomas Wouters477c8d52006-05-27 19:21:47 +0000650 x += len(data)
Thomas Wouters477c8d52006-05-27 19:21:47 +0000651
652 buf = self.buf[:size]
653 self.buf = self.buf[size:]
654 self.pos += len(buf)
655 return buf
656
657 def seek(self, pos):
658 if pos < self.pos:
659 self.init()
660 self.read(pos - self.pos)
661
662 def tell(self):
663 return self.pos
664
665 def write(self, data):
666 self.pos += len(data)
667 raw = self.bz2obj.compress(data)
668 self.fileobj.write(raw)
669
670 def close(self):
671 if self.mode == "w":
672 raw = self.bz2obj.flush()
673 self.fileobj.write(raw)
Thomas Wouters477c8d52006-05-27 19:21:47 +0000674# class _BZ2Proxy
675
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000676#------------------------
677# Extraction file object
678#------------------------
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000679class _FileInFile(object):
680 """A thin wrapper around an existing file object that
681 provides a part of its data as an individual file
682 object.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000683 """
684
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000685 def __init__(self, fileobj, offset, size, sparse=None):
686 self.fileobj = fileobj
687 self.offset = offset
688 self.size = size
689 self.sparse = sparse
690 self.position = 0
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000691
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000692 def seekable(self):
693 if not hasattr(self.fileobj, "seekable"):
694 # XXX gzip.GzipFile and bz2.BZ2File
695 return True
696 return self.fileobj.seekable()
697
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000698 def tell(self):
699 """Return the current file position.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000700 """
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000701 return self.position
702
703 def seek(self, position):
704 """Seek to a position in the file.
705 """
706 self.position = position
707
708 def read(self, size=None):
709 """Read data from the file.
710 """
711 if size is None:
712 size = self.size - self.position
713 else:
714 size = min(size, self.size - self.position)
715
716 if self.sparse is None:
717 return self.readnormal(size)
718 else:
719 return self.readsparse(size)
720
721 def readnormal(self, size):
722 """Read operation for regular files.
723 """
724 self.fileobj.seek(self.offset + self.position)
725 self.position += size
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000726 return self.fileobj.read(size)
727
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000728 def readsparse(self, size):
729 """Read operation for sparse files.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000730 """
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000731 data = b""
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000732 while size > 0:
733 buf = self.readsparsesection(size)
734 if not buf:
735 break
736 size -= len(buf)
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000737 data += buf
738 return data
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000739
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000740 def readsparsesection(self, size):
741 """Read a single section of a sparse file.
742 """
743 section = self.sparse.find(self.position)
744
745 if section is None:
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000746 return b""
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000747
748 size = min(size, section.offset + section.size - self.position)
749
750 if isinstance(section, _data):
751 realpos = section.realpos + self.position - section.offset
752 self.fileobj.seek(self.offset + realpos)
753 self.position += size
754 return self.fileobj.read(size)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000755 else:
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000756 self.position += size
757 return NUL * size
758#class _FileInFile
759
760
761class ExFileObject(object):
762 """File-like object for reading an archive member.
763 Is returned by TarFile.extractfile().
764 """
765 blocksize = 1024
766
767 def __init__(self, tarfile, tarinfo):
768 self.fileobj = _FileInFile(tarfile.fileobj,
769 tarinfo.offset_data,
770 tarinfo.size,
Lars Gustäbelc2ea8c62008-04-14 10:05:48 +0000771 tarinfo.sparse)
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000772 self.name = tarinfo.name
773 self.mode = "r"
774 self.closed = False
775 self.size = tarinfo.size
776
777 self.position = 0
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000778 self.buffer = b""
779
780 def readable(self):
781 return True
782
783 def writable(self):
784 return False
785
786 def seekable(self):
787 return self.fileobj.seekable()
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000788
789 def read(self, size=None):
790 """Read at most size bytes from the file. If size is not
791 present or None, read all data until EOF is reached.
792 """
793 if self.closed:
794 raise ValueError("I/O operation on closed file")
795
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000796 buf = b""
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000797 if self.buffer:
798 if size is None:
799 buf = self.buffer
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000800 self.buffer = b""
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000801 else:
802 buf = self.buffer[:size]
803 self.buffer = self.buffer[size:]
804
805 if size is None:
806 buf += self.fileobj.read()
807 else:
808 buf += self.fileobj.read(size - len(buf))
809
810 self.position += len(buf)
811 return buf
812
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000813 # XXX TextIOWrapper uses the read1() method.
814 read1 = read
815
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000816 def readline(self, size=-1):
817 """Read one entire line from the file. If size is present
818 and non-negative, return a string with at most that
819 size, which may be an incomplete line.
820 """
821 if self.closed:
822 raise ValueError("I/O operation on closed file")
823
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000824 pos = self.buffer.find(b"\n") + 1
825 if pos == 0:
826 # no newline found.
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000827 while True:
828 buf = self.fileobj.read(self.blocksize)
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000829 self.buffer += buf
830 if not buf or b"\n" in buf:
831 pos = self.buffer.find(b"\n") + 1
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000832 if pos == 0:
833 # no newline found.
834 pos = len(self.buffer)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000835 break
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000836
837 if size != -1:
838 pos = min(size, pos)
839
840 buf = self.buffer[:pos]
841 self.buffer = self.buffer[pos:]
842 self.position += len(buf)
843 return buf
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000844
845 def readlines(self):
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000846 """Return a list with all remaining lines.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000847 """
848 result = []
849 while True:
850 line = self.readline()
851 if not line: break
852 result.append(line)
853 return result
854
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000855 def tell(self):
856 """Return the current file position.
857 """
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000858 if self.closed:
859 raise ValueError("I/O operation on closed file")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000860
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000861 return self.position
862
863 def seek(self, pos, whence=os.SEEK_SET):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000864 """Seek to a position in the file.
865 """
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000866 if self.closed:
867 raise ValueError("I/O operation on closed file")
868
869 if whence == os.SEEK_SET:
870 self.position = min(max(pos, 0), self.size)
871 elif whence == os.SEEK_CUR:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000872 if pos < 0:
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000873 self.position = max(self.position + pos, 0)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000874 else:
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000875 self.position = min(self.position + pos, self.size)
876 elif whence == os.SEEK_END:
877 self.position = max(min(self.size + pos, self.size), 0)
878 else:
879 raise ValueError("Invalid argument")
880
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000881 self.buffer = b""
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000882 self.fileobj.seek(self.position)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000883
884 def close(self):
885 """Close the file object.
886 """
887 self.closed = True
Martin v. Löwisdf241532005-03-03 08:17:42 +0000888
889 def __iter__(self):
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000890 """Get an iterator over the file's lines.
Martin v. Löwisdf241532005-03-03 08:17:42 +0000891 """
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000892 while True:
893 line = self.readline()
894 if not line:
895 break
896 yield line
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000897#class ExFileObject
898
899#------------------
900# Exported Classes
901#------------------
902class TarInfo(object):
903 """Informational class which holds the details about an
904 archive member given by a tar header block.
905 TarInfo objects are returned by TarFile.getmember(),
906 TarFile.getmembers() and TarFile.gettarinfo() and are
907 usually created internally.
908 """
909
Lars Gustäbelc2ea8c62008-04-14 10:05:48 +0000910 __slots__ = ("name", "mode", "uid", "gid", "size", "mtime",
911 "chksum", "type", "linkname", "uname", "gname",
912 "devmajor", "devminor",
913 "offset", "offset_data", "pax_headers", "sparse",
914 "tarfile", "_sparse_structs", "_link_target")
915
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000916 def __init__(self, name=""):
917 """Construct a TarInfo object. name is the optional name
918 of the member.
919 """
Guido van Rossumd8faa362007-04-27 19:54:29 +0000920 self.name = name # member name
Guido van Rossumcd16bf62007-06-13 18:07:49 +0000921 self.mode = 0o644 # file permissions
Thomas Wouters477c8d52006-05-27 19:21:47 +0000922 self.uid = 0 # user id
923 self.gid = 0 # group id
924 self.size = 0 # file size
925 self.mtime = 0 # modification time
926 self.chksum = 0 # header checksum
927 self.type = REGTYPE # member type
928 self.linkname = "" # link name
Guido van Rossumd8faa362007-04-27 19:54:29 +0000929 self.uname = "root" # user name
930 self.gname = "root" # group name
Thomas Wouters477c8d52006-05-27 19:21:47 +0000931 self.devmajor = 0 # device major number
932 self.devminor = 0 # device minor number
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000933
Thomas Wouters477c8d52006-05-27 19:21:47 +0000934 self.offset = 0 # the tar header starts here
935 self.offset_data = 0 # the file's data starts here
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000936
Lars Gustäbelc2ea8c62008-04-14 10:05:48 +0000937 self.sparse = None # sparse member information
Guido van Rossumd8faa362007-04-27 19:54:29 +0000938 self.pax_headers = {} # pax header information
939
940 # In pax headers the "name" and "linkname" field are called
941 # "path" and "linkpath".
942 def _getpath(self):
943 return self.name
944 def _setpath(self, name):
945 self.name = name
946 path = property(_getpath, _setpath)
947
948 def _getlinkpath(self):
949 return self.linkname
950 def _setlinkpath(self, linkname):
951 self.linkname = linkname
952 linkpath = property(_getlinkpath, _setlinkpath)
953
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000954 def __repr__(self):
955 return "<%s %r at %#x>" % (self.__class__.__name__,self.name,id(self))
956
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000957 def get_info(self):
Guido van Rossumd8faa362007-04-27 19:54:29 +0000958 """Return the TarInfo's attributes as a dictionary.
959 """
960 info = {
Lars Gustäbelbfdfdda2009-08-28 19:59:59 +0000961 "name": self.name,
Guido van Rossumcd16bf62007-06-13 18:07:49 +0000962 "mode": self.mode & 0o7777,
Guido van Rossumd8faa362007-04-27 19:54:29 +0000963 "uid": self.uid,
964 "gid": self.gid,
965 "size": self.size,
966 "mtime": self.mtime,
967 "chksum": self.chksum,
968 "type": self.type,
Lars Gustäbelbfdfdda2009-08-28 19:59:59 +0000969 "linkname": self.linkname,
Guido van Rossumd8faa362007-04-27 19:54:29 +0000970 "uname": self.uname,
971 "gname": self.gname,
972 "devmajor": self.devmajor,
973 "devminor": self.devminor
974 }
975
976 if info["type"] == DIRTYPE and not info["name"].endswith("/"):
977 info["name"] += "/"
978
979 return info
980
Guido van Rossume7ba4952007-06-06 23:52:48 +0000981 def tobuf(self, format=DEFAULT_FORMAT, encoding=ENCODING, errors="strict"):
Guido van Rossumd8faa362007-04-27 19:54:29 +0000982 """Return a tar header as a string of 512 byte blocks.
983 """
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000984 info = self.get_info()
Guido van Rossume7ba4952007-06-06 23:52:48 +0000985
Guido van Rossumd8faa362007-04-27 19:54:29 +0000986 if format == USTAR_FORMAT:
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000987 return self.create_ustar_header(info, encoding, errors)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000988 elif format == GNU_FORMAT:
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000989 return self.create_gnu_header(info, encoding, errors)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000990 elif format == PAX_FORMAT:
Lars Gustäbel3741eff2007-08-21 12:17:05 +0000991 return self.create_pax_header(info)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000992 else:
993 raise ValueError("invalid format")
994
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000995 def create_ustar_header(self, info, encoding, errors):
Guido van Rossumd8faa362007-04-27 19:54:29 +0000996 """Return the object as a ustar header block.
997 """
Guido van Rossumd8faa362007-04-27 19:54:29 +0000998 info["magic"] = POSIX_MAGIC
999
1000 if len(info["linkname"]) > LENGTH_LINK:
1001 raise ValueError("linkname is too long")
1002
1003 if len(info["name"]) > LENGTH_NAME:
1004 info["prefix"], info["name"] = self._posix_split_name(info["name"])
1005
Lars Gustäbelb506dc32007-08-07 18:36:16 +00001006 return self._create_header(info, USTAR_FORMAT, encoding, errors)
Guido van Rossumd8faa362007-04-27 19:54:29 +00001007
Lars Gustäbelb506dc32007-08-07 18:36:16 +00001008 def create_gnu_header(self, info, encoding, errors):
Guido van Rossumd8faa362007-04-27 19:54:29 +00001009 """Return the object as a GNU header block sequence.
1010 """
Guido van Rossumd8faa362007-04-27 19:54:29 +00001011 info["magic"] = GNU_MAGIC
1012
Lars Gustäbelb506dc32007-08-07 18:36:16 +00001013 buf = b""
Guido van Rossumd8faa362007-04-27 19:54:29 +00001014 if len(info["linkname"]) > LENGTH_LINK:
Lars Gustäbelb506dc32007-08-07 18:36:16 +00001015 buf += self._create_gnu_long_header(info["linkname"], GNUTYPE_LONGLINK, encoding, errors)
Guido van Rossumd8faa362007-04-27 19:54:29 +00001016
1017 if len(info["name"]) > LENGTH_NAME:
Lars Gustäbelb506dc32007-08-07 18:36:16 +00001018 buf += self._create_gnu_long_header(info["name"], GNUTYPE_LONGNAME, encoding, errors)
Guido van Rossumd8faa362007-04-27 19:54:29 +00001019
Lars Gustäbelb506dc32007-08-07 18:36:16 +00001020 return buf + self._create_header(info, GNU_FORMAT, encoding, errors)
Guido van Rossumd8faa362007-04-27 19:54:29 +00001021
Lars Gustäbel3741eff2007-08-21 12:17:05 +00001022 def create_pax_header(self, info):
Guido van Rossumd8faa362007-04-27 19:54:29 +00001023 """Return the object as a ustar header block. If it cannot be
1024 represented this way, prepend a pax extended header sequence
1025 with supplement information.
1026 """
Guido van Rossumd8faa362007-04-27 19:54:29 +00001027 info["magic"] = POSIX_MAGIC
1028 pax_headers = self.pax_headers.copy()
1029
1030 # Test string fields for values that exceed the field length or cannot
1031 # be represented in ASCII encoding.
1032 for name, hname, length in (
1033 ("name", "path", LENGTH_NAME), ("linkname", "linkpath", LENGTH_LINK),
1034 ("uname", "uname", 32), ("gname", "gname", 32)):
1035
Guido van Rossume7ba4952007-06-06 23:52:48 +00001036 if hname in pax_headers:
1037 # The pax header has priority.
1038 continue
1039
Guido van Rossumd8faa362007-04-27 19:54:29 +00001040 # Try to encode the string as ASCII.
1041 try:
Lars Gustäbelb506dc32007-08-07 18:36:16 +00001042 info[name].encode("ascii", "strict")
Guido van Rossumd8faa362007-04-27 19:54:29 +00001043 except UnicodeEncodeError:
Lars Gustäbelb506dc32007-08-07 18:36:16 +00001044 pax_headers[hname] = info[name]
Guido van Rossumd8faa362007-04-27 19:54:29 +00001045 continue
1046
Guido van Rossume7ba4952007-06-06 23:52:48 +00001047 if len(info[name]) > length:
Lars Gustäbelb506dc32007-08-07 18:36:16 +00001048 pax_headers[hname] = info[name]
Guido van Rossumd8faa362007-04-27 19:54:29 +00001049
1050 # Test number fields for values that exceed the field limit or values
1051 # that like to be stored as float.
1052 for name, digits in (("uid", 8), ("gid", 8), ("size", 12), ("mtime", 12)):
Guido van Rossume7ba4952007-06-06 23:52:48 +00001053 if name in pax_headers:
1054 # The pax header has priority. Avoid overflow.
1055 info[name] = 0
1056 continue
1057
Guido van Rossumd8faa362007-04-27 19:54:29 +00001058 val = info[name]
1059 if not 0 <= val < 8 ** (digits - 1) or isinstance(val, float):
Guido van Rossumef87d6e2007-05-02 19:09:54 +00001060 pax_headers[name] = str(val)
Guido van Rossumd8faa362007-04-27 19:54:29 +00001061 info[name] = 0
1062
Guido van Rossume7ba4952007-06-06 23:52:48 +00001063 # Create a pax extended header if necessary.
Guido van Rossumd8faa362007-04-27 19:54:29 +00001064 if pax_headers:
Lars Gustäbel3741eff2007-08-21 12:17:05 +00001065 buf = self._create_pax_generic_header(pax_headers, XHDTYPE)
Guido van Rossumd8faa362007-04-27 19:54:29 +00001066 else:
Lars Gustäbelb506dc32007-08-07 18:36:16 +00001067 buf = b""
Guido van Rossumd8faa362007-04-27 19:54:29 +00001068
Lars Gustäbel3741eff2007-08-21 12:17:05 +00001069 return buf + self._create_header(info, USTAR_FORMAT, "ascii", "replace")
Guido van Rossumd8faa362007-04-27 19:54:29 +00001070
1071 @classmethod
Lars Gustäbel3741eff2007-08-21 12:17:05 +00001072 def create_pax_global_header(cls, pax_headers):
Guido van Rossumd8faa362007-04-27 19:54:29 +00001073 """Return the object as a pax global header block sequence.
1074 """
Lars Gustäbel3741eff2007-08-21 12:17:05 +00001075 return cls._create_pax_generic_header(pax_headers, XGLTYPE)
Guido van Rossumd8faa362007-04-27 19:54:29 +00001076
1077 def _posix_split_name(self, name):
1078 """Split a name longer than 100 chars into a prefix
1079 and a name part.
1080 """
1081 prefix = name[:LENGTH_PREFIX + 1]
1082 while prefix and prefix[-1] != "/":
1083 prefix = prefix[:-1]
1084
1085 name = name[len(prefix):]
1086 prefix = prefix[:-1]
1087
1088 if not prefix or len(name) > LENGTH_NAME:
1089 raise ValueError("name is too long")
1090 return prefix, name
1091
1092 @staticmethod
Lars Gustäbelb506dc32007-08-07 18:36:16 +00001093 def _create_header(info, format, encoding, errors):
Guido van Rossumd8faa362007-04-27 19:54:29 +00001094 """Return a header block. info is a dictionary with file
1095 information, format must be one of the *_FORMAT constants.
1096 """
1097 parts = [
Lars Gustäbelb506dc32007-08-07 18:36:16 +00001098 stn(info.get("name", ""), 100, encoding, errors),
Guido van Rossumcd16bf62007-06-13 18:07:49 +00001099 itn(info.get("mode", 0) & 0o7777, 8, format),
Guido van Rossumd8faa362007-04-27 19:54:29 +00001100 itn(info.get("uid", 0), 8, format),
1101 itn(info.get("gid", 0), 8, format),
1102 itn(info.get("size", 0), 12, format),
1103 itn(info.get("mtime", 0), 12, format),
Lars Gustäbelb506dc32007-08-07 18:36:16 +00001104 b" ", # checksum field
Guido van Rossumd8faa362007-04-27 19:54:29 +00001105 info.get("type", REGTYPE),
Lars Gustäbelb506dc32007-08-07 18:36:16 +00001106 stn(info.get("linkname", ""), 100, encoding, errors),
1107 info.get("magic", POSIX_MAGIC),
1108 stn(info.get("uname", "root"), 32, encoding, errors),
1109 stn(info.get("gname", "root"), 32, encoding, errors),
Guido van Rossumd8faa362007-04-27 19:54:29 +00001110 itn(info.get("devmajor", 0), 8, format),
1111 itn(info.get("devminor", 0), 8, format),
Lars Gustäbelb506dc32007-08-07 18:36:16 +00001112 stn(info.get("prefix", ""), 155, encoding, errors)
Guido van Rossumd8faa362007-04-27 19:54:29 +00001113 ]
1114
Lars Gustäbelb506dc32007-08-07 18:36:16 +00001115 buf = struct.pack("%ds" % BLOCKSIZE, b"".join(parts))
Guido van Rossumd8faa362007-04-27 19:54:29 +00001116 chksum = calc_chksums(buf[-BLOCKSIZE:])[0]
Lars Gustäbela280ca72007-08-28 07:34:33 +00001117 buf = buf[:-364] + bytes("%06o\0" % chksum, "ascii") + buf[-357:]
Guido van Rossumd8faa362007-04-27 19:54:29 +00001118 return buf
1119
1120 @staticmethod
1121 def _create_payload(payload):
1122 """Return the string payload filled with zero bytes
1123 up to the next 512 byte border.
1124 """
1125 blocks, remainder = divmod(len(payload), BLOCKSIZE)
1126 if remainder > 0:
1127 payload += (BLOCKSIZE - remainder) * NUL
1128 return payload
1129
1130 @classmethod
Lars Gustäbelb506dc32007-08-07 18:36:16 +00001131 def _create_gnu_long_header(cls, name, type, encoding, errors):
Guido van Rossumd8faa362007-04-27 19:54:29 +00001132 """Return a GNUTYPE_LONGNAME or GNUTYPE_LONGLINK sequence
1133 for name.
1134 """
Lars Gustäbelb506dc32007-08-07 18:36:16 +00001135 name = name.encode(encoding, errors) + NUL
Guido van Rossumd8faa362007-04-27 19:54:29 +00001136
1137 info = {}
1138 info["name"] = "././@LongLink"
1139 info["type"] = type
1140 info["size"] = len(name)
1141 info["magic"] = GNU_MAGIC
1142
1143 # create extended header + name blocks.
Lars Gustäbelb506dc32007-08-07 18:36:16 +00001144 return cls._create_header(info, USTAR_FORMAT, encoding, errors) + \
Guido van Rossumd8faa362007-04-27 19:54:29 +00001145 cls._create_payload(name)
1146
1147 @classmethod
Lars Gustäbel3741eff2007-08-21 12:17:05 +00001148 def _create_pax_generic_header(cls, pax_headers, type):
Guido van Rossumd8faa362007-04-27 19:54:29 +00001149 """Return a POSIX.1-2001 extended or global header sequence
1150 that contains a list of keyword, value pairs. The values
Lars Gustäbelb506dc32007-08-07 18:36:16 +00001151 must be strings.
Guido van Rossumd8faa362007-04-27 19:54:29 +00001152 """
Lars Gustäbelb506dc32007-08-07 18:36:16 +00001153 records = b""
Guido van Rossumd8faa362007-04-27 19:54:29 +00001154 for keyword, value in pax_headers.items():
1155 keyword = keyword.encode("utf8")
1156 value = value.encode("utf8")
1157 l = len(keyword) + len(value) + 3 # ' ' + '=' + '\n'
1158 n = p = 0
1159 while True:
1160 n = l + len(str(p))
1161 if n == p:
1162 break
1163 p = n
Lars Gustäbela280ca72007-08-28 07:34:33 +00001164 records += bytes(str(p), "ascii") + b" " + keyword + b"=" + value + b"\n"
Guido van Rossumd8faa362007-04-27 19:54:29 +00001165
1166 # We use a hardcoded "././@PaxHeader" name like star does
1167 # instead of the one that POSIX recommends.
1168 info = {}
1169 info["name"] = "././@PaxHeader"
1170 info["type"] = type
1171 info["size"] = len(records)
1172 info["magic"] = POSIX_MAGIC
1173
1174 # Create pax header + record blocks.
Lars Gustäbel3741eff2007-08-21 12:17:05 +00001175 return cls._create_header(info, USTAR_FORMAT, "ascii", "replace") + \
Guido van Rossumd8faa362007-04-27 19:54:29 +00001176 cls._create_payload(records)
1177
Guido van Rossum75b64e62005-01-16 00:16:11 +00001178 @classmethod
Lars Gustäbelb506dc32007-08-07 18:36:16 +00001179 def frombuf(cls, buf, encoding, errors):
1180 """Construct a TarInfo object from a 512 byte bytes object.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001181 """
Lars Gustäbel9520a432009-11-22 18:48:49 +00001182 if len(buf) == 0:
1183 raise EmptyHeaderError("empty header")
Thomas Wouters477c8d52006-05-27 19:21:47 +00001184 if len(buf) != BLOCKSIZE:
Lars Gustäbel9520a432009-11-22 18:48:49 +00001185 raise TruncatedHeaderError("truncated header")
Thomas Wouters477c8d52006-05-27 19:21:47 +00001186 if buf.count(NUL) == BLOCKSIZE:
Lars Gustäbel9520a432009-11-22 18:48:49 +00001187 raise EOFHeaderError("end of file header")
Thomas Wouters902d6eb2007-01-09 23:18:33 +00001188
1189 chksum = nti(buf[148:156])
1190 if chksum not in calc_chksums(buf):
Lars Gustäbel9520a432009-11-22 18:48:49 +00001191 raise InvalidHeaderError("bad checksum")
Thomas Wouters477c8d52006-05-27 19:21:47 +00001192
Guido van Rossumd8faa362007-04-27 19:54:29 +00001193 obj = cls()
Lars Gustäbelb506dc32007-08-07 18:36:16 +00001194 obj.name = nts(buf[0:100], encoding, errors)
Guido van Rossumd8faa362007-04-27 19:54:29 +00001195 obj.mode = nti(buf[100:108])
1196 obj.uid = nti(buf[108:116])
1197 obj.gid = nti(buf[116:124])
1198 obj.size = nti(buf[124:136])
1199 obj.mtime = nti(buf[136:148])
1200 obj.chksum = chksum
1201 obj.type = buf[156:157]
Lars Gustäbelb506dc32007-08-07 18:36:16 +00001202 obj.linkname = nts(buf[157:257], encoding, errors)
1203 obj.uname = nts(buf[265:297], encoding, errors)
1204 obj.gname = nts(buf[297:329], encoding, errors)
Guido van Rossumd8faa362007-04-27 19:54:29 +00001205 obj.devmajor = nti(buf[329:337])
1206 obj.devminor = nti(buf[337:345])
Lars Gustäbelb506dc32007-08-07 18:36:16 +00001207 prefix = nts(buf[345:500], encoding, errors)
Thomas Wouters89f507f2006-12-13 04:49:30 +00001208
Guido van Rossumd8faa362007-04-27 19:54:29 +00001209 # Old V7 tar format represents a directory as a regular
1210 # file with a trailing slash.
1211 if obj.type == AREGTYPE and obj.name.endswith("/"):
1212 obj.type = DIRTYPE
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001213
Lars Gustäbelc2ea8c62008-04-14 10:05:48 +00001214 # The old GNU sparse format occupies some of the unused
1215 # space in the buffer for up to 4 sparse structures.
1216 # Save the them for later processing in _proc_sparse().
1217 if obj.type == GNUTYPE_SPARSE:
1218 pos = 386
1219 structs = []
1220 for i in range(4):
1221 try:
1222 offset = nti(buf[pos:pos + 12])
1223 numbytes = nti(buf[pos + 12:pos + 24])
1224 except ValueError:
1225 break
1226 structs.append((offset, numbytes))
1227 pos += 24
1228 isextended = bool(buf[482])
1229 origsize = nti(buf[483:495])
1230 obj._sparse_structs = (structs, isextended, origsize)
1231
Guido van Rossumd8faa362007-04-27 19:54:29 +00001232 # Remove redundant slashes from directories.
1233 if obj.isdir():
1234 obj.name = obj.name.rstrip("/")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001235
Guido van Rossumd8faa362007-04-27 19:54:29 +00001236 # Reconstruct a ustar longname.
1237 if prefix and obj.type not in GNU_TYPES:
1238 obj.name = prefix + "/" + obj.name
1239 return obj
1240
1241 @classmethod
1242 def fromtarfile(cls, tarfile):
1243 """Return the next TarInfo object from TarFile object
1244 tarfile.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001245 """
Guido van Rossumd8faa362007-04-27 19:54:29 +00001246 buf = tarfile.fileobj.read(BLOCKSIZE)
Lars Gustäbelb506dc32007-08-07 18:36:16 +00001247 obj = cls.frombuf(buf, tarfile.encoding, tarfile.errors)
Guido van Rossumd8faa362007-04-27 19:54:29 +00001248 obj.offset = tarfile.fileobj.tell() - BLOCKSIZE
1249 return obj._proc_member(tarfile)
Thomas Wouters89f507f2006-12-13 04:49:30 +00001250
Guido van Rossumd8faa362007-04-27 19:54:29 +00001251 #--------------------------------------------------------------------------
1252 # The following are methods that are called depending on the type of a
1253 # member. The entry point is _proc_member() which can be overridden in a
1254 # subclass to add custom _proc_*() methods. A _proc_*() method MUST
1255 # implement the following
1256 # operations:
1257 # 1. Set self.offset_data to the position where the data blocks begin,
1258 # if there is data that follows.
1259 # 2. Set tarfile.offset to the position where the next member's header will
1260 # begin.
1261 # 3. Return self or another valid TarInfo object.
1262 def _proc_member(self, tarfile):
1263 """Choose the right processing method depending on
1264 the type and call it.
Thomas Wouters89f507f2006-12-13 04:49:30 +00001265 """
Guido van Rossumd8faa362007-04-27 19:54:29 +00001266 if self.type in (GNUTYPE_LONGNAME, GNUTYPE_LONGLINK):
1267 return self._proc_gnulong(tarfile)
1268 elif self.type == GNUTYPE_SPARSE:
1269 return self._proc_sparse(tarfile)
1270 elif self.type in (XHDTYPE, XGLTYPE, SOLARIS_XHDTYPE):
1271 return self._proc_pax(tarfile)
1272 else:
1273 return self._proc_builtin(tarfile)
Thomas Wouters89f507f2006-12-13 04:49:30 +00001274
Guido van Rossumd8faa362007-04-27 19:54:29 +00001275 def _proc_builtin(self, tarfile):
1276 """Process a builtin type or an unknown type which
1277 will be treated as a regular file.
1278 """
1279 self.offset_data = tarfile.fileobj.tell()
1280 offset = self.offset_data
1281 if self.isreg() or self.type not in SUPPORTED_TYPES:
1282 # Skip the following data blocks.
1283 offset += self._block(self.size)
1284 tarfile.offset = offset
Thomas Wouters89f507f2006-12-13 04:49:30 +00001285
Guido van Rossume7ba4952007-06-06 23:52:48 +00001286 # Patch the TarInfo object with saved global
Guido van Rossumd8faa362007-04-27 19:54:29 +00001287 # header information.
Guido van Rossume7ba4952007-06-06 23:52:48 +00001288 self._apply_pax_info(tarfile.pax_headers, tarfile.encoding, tarfile.errors)
Guido van Rossumd8faa362007-04-27 19:54:29 +00001289
1290 return self
1291
1292 def _proc_gnulong(self, tarfile):
1293 """Process the blocks that hold a GNU longname
1294 or longlink member.
1295 """
1296 buf = tarfile.fileobj.read(self._block(self.size))
1297
1298 # Fetch the next header and process it.
Lars Gustäbel9520a432009-11-22 18:48:49 +00001299 try:
1300 next = self.fromtarfile(tarfile)
1301 except HeaderError:
1302 raise SubsequentHeaderError("missing or bad subsequent header")
Guido van Rossumd8faa362007-04-27 19:54:29 +00001303
1304 # Patch the TarInfo object from the next header with
1305 # the longname information.
1306 next.offset = self.offset
1307 if self.type == GNUTYPE_LONGNAME:
Lars Gustäbelb506dc32007-08-07 18:36:16 +00001308 next.name = nts(buf, tarfile.encoding, tarfile.errors)
Guido van Rossumd8faa362007-04-27 19:54:29 +00001309 elif self.type == GNUTYPE_LONGLINK:
Lars Gustäbelb506dc32007-08-07 18:36:16 +00001310 next.linkname = nts(buf, tarfile.encoding, tarfile.errors)
Guido van Rossumd8faa362007-04-27 19:54:29 +00001311
1312 return next
1313
1314 def _proc_sparse(self, tarfile):
1315 """Process a GNU sparse header plus extra headers.
1316 """
Lars Gustäbelc2ea8c62008-04-14 10:05:48 +00001317 # We already collected some sparse structures in frombuf().
1318 structs, isextended, origsize = self._sparse_structs
1319 del self._sparse_structs
Guido van Rossumd8faa362007-04-27 19:54:29 +00001320
Lars Gustäbelc2ea8c62008-04-14 10:05:48 +00001321 # Collect sparse structures from extended header blocks.
Lars Gustäbelb506dc32007-08-07 18:36:16 +00001322 while isextended:
Guido van Rossumd8faa362007-04-27 19:54:29 +00001323 buf = tarfile.fileobj.read(BLOCKSIZE)
1324 pos = 0
Guido van Rossum805365e2007-05-07 22:24:25 +00001325 for i in range(21):
Guido van Rossumd8faa362007-04-27 19:54:29 +00001326 try:
1327 offset = nti(buf[pos:pos + 12])
1328 numbytes = nti(buf[pos + 12:pos + 24])
1329 except ValueError:
1330 break
Lars Gustäbelc2ea8c62008-04-14 10:05:48 +00001331 structs.append((offset, numbytes))
Guido van Rossumd8faa362007-04-27 19:54:29 +00001332 pos += 24
Lars Gustäbelb506dc32007-08-07 18:36:16 +00001333 isextended = bool(buf[504])
Guido van Rossumd8faa362007-04-27 19:54:29 +00001334
Lars Gustäbelc2ea8c62008-04-14 10:05:48 +00001335 # Transform the sparse structures to something we can use
1336 # in ExFileObject.
1337 self.sparse = _ringbuffer()
1338 lastpos = 0
1339 realpos = 0
1340 for offset, numbytes in structs:
1341 if offset > lastpos:
1342 self.sparse.append(_hole(lastpos, offset - lastpos))
1343 self.sparse.append(_data(offset, numbytes, realpos))
1344 realpos += numbytes
1345 lastpos = offset + numbytes
Guido van Rossumd8faa362007-04-27 19:54:29 +00001346 if lastpos < origsize:
Lars Gustäbelc2ea8c62008-04-14 10:05:48 +00001347 self.sparse.append(_hole(lastpos, origsize - lastpos))
Guido van Rossumd8faa362007-04-27 19:54:29 +00001348
1349 self.offset_data = tarfile.fileobj.tell()
1350 tarfile.offset = self.offset_data + self._block(self.size)
1351 self.size = origsize
1352
1353 return self
1354
1355 def _proc_pax(self, tarfile):
1356 """Process an extended or global header as described in
1357 POSIX.1-2001.
1358 """
1359 # Read the header information.
1360 buf = tarfile.fileobj.read(self._block(self.size))
1361
1362 # A pax header stores supplemental information for either
1363 # the following file (extended) or all following files
1364 # (global).
1365 if self.type == XGLTYPE:
1366 pax_headers = tarfile.pax_headers
1367 else:
1368 pax_headers = tarfile.pax_headers.copy()
1369
Guido van Rossumd8faa362007-04-27 19:54:29 +00001370 # Parse pax header information. A record looks like that:
1371 # "%d %s=%s\n" % (length, keyword, value). length is the size
1372 # of the complete record including the length field itself and
Guido van Rossume7ba4952007-06-06 23:52:48 +00001373 # the newline. keyword and value are both UTF-8 encoded strings.
Antoine Pitroufd036452008-08-19 17:56:33 +00001374 regex = re.compile(br"(\d+) ([^=]+)=")
Guido van Rossumd8faa362007-04-27 19:54:29 +00001375 pos = 0
1376 while True:
1377 match = regex.match(buf, pos)
1378 if not match:
1379 break
1380
1381 length, keyword = match.groups()
1382 length = int(length)
1383 value = buf[match.end(2) + 1:match.start(1) + length - 1]
1384
1385 keyword = keyword.decode("utf8")
Guido van Rossumd8faa362007-04-27 19:54:29 +00001386 value = value.decode("utf8")
Guido van Rossumd8faa362007-04-27 19:54:29 +00001387
1388 pax_headers[keyword] = value
1389 pos += length
1390
Guido van Rossume7ba4952007-06-06 23:52:48 +00001391 # Fetch the next header.
Lars Gustäbel9520a432009-11-22 18:48:49 +00001392 try:
1393 next = self.fromtarfile(tarfile)
1394 except HeaderError:
1395 raise SubsequentHeaderError("missing or bad subsequent header")
Guido van Rossumd8faa362007-04-27 19:54:29 +00001396
Guido van Rossume7ba4952007-06-06 23:52:48 +00001397 if self.type in (XHDTYPE, SOLARIS_XHDTYPE):
Guido van Rossume7ba4952007-06-06 23:52:48 +00001398 # Patch the TarInfo object with the extended header info.
1399 next._apply_pax_info(pax_headers, tarfile.encoding, tarfile.errors)
1400 next.offset = self.offset
1401
1402 if "size" in pax_headers:
1403 # If the extended header replaces the size field,
1404 # we need to recalculate the offset where the next
1405 # header starts.
1406 offset = next.offset_data
1407 if next.isreg() or next.type not in SUPPORTED_TYPES:
1408 offset += next._block(next.size)
1409 tarfile.offset = offset
1410
1411 return next
1412
1413 def _apply_pax_info(self, pax_headers, encoding, errors):
1414 """Replace fields with supplemental information from a previous
1415 pax extended or global header.
1416 """
1417 for keyword, value in pax_headers.items():
1418 if keyword not in PAX_FIELDS:
1419 continue
1420
1421 if keyword == "path":
1422 value = value.rstrip("/")
1423
1424 if keyword in PAX_NUMBER_FIELDS:
1425 try:
1426 value = PAX_NUMBER_FIELDS[keyword](value)
1427 except ValueError:
1428 value = 0
Guido van Rossume7ba4952007-06-06 23:52:48 +00001429
1430 setattr(self, keyword, value)
1431
1432 self.pax_headers = pax_headers.copy()
Guido van Rossumd8faa362007-04-27 19:54:29 +00001433
1434 def _block(self, count):
1435 """Round up a byte count by BLOCKSIZE and return it,
1436 e.g. _block(834) => 1024.
1437 """
1438 blocks, remainder = divmod(count, BLOCKSIZE)
1439 if remainder:
1440 blocks += 1
1441 return blocks * BLOCKSIZE
Thomas Wouters89f507f2006-12-13 04:49:30 +00001442
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001443 def isreg(self):
1444 return self.type in REGULAR_TYPES
1445 def isfile(self):
1446 return self.isreg()
1447 def isdir(self):
1448 return self.type == DIRTYPE
1449 def issym(self):
1450 return self.type == SYMTYPE
1451 def islnk(self):
1452 return self.type == LNKTYPE
1453 def ischr(self):
1454 return self.type == CHRTYPE
1455 def isblk(self):
1456 return self.type == BLKTYPE
1457 def isfifo(self):
1458 return self.type == FIFOTYPE
1459 def issparse(self):
1460 return self.type == GNUTYPE_SPARSE
1461 def isdev(self):
1462 return self.type in (CHRTYPE, BLKTYPE, FIFOTYPE)
1463# class TarInfo
1464
1465class TarFile(object):
1466 """The TarFile Class provides an interface to tar archives.
1467 """
1468
1469 debug = 0 # May be set from 0 (no msgs) to 3 (all msgs)
1470
1471 dereference = False # If true, add content of linked file to the
1472 # tar file, else the link.
1473
1474 ignore_zeros = False # If true, skips empty or invalid blocks and
1475 # continues processing.
1476
Lars Gustäbel365aff32009-12-13 11:42:29 +00001477 errorlevel = 1 # If 0, fatal errors only appear in debug
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001478 # messages (if debug >= 0). If > 0, errors
1479 # are passed to the caller as exceptions.
1480
Guido van Rossumd8faa362007-04-27 19:54:29 +00001481 format = DEFAULT_FORMAT # The format to use when creating an archive.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001482
Guido van Rossume7ba4952007-06-06 23:52:48 +00001483 encoding = ENCODING # Encoding for 8-bit character strings.
1484
1485 errors = None # Error handler for unicode conversion.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001486
Guido van Rossumd8faa362007-04-27 19:54:29 +00001487 tarinfo = TarInfo # The default TarInfo class to use.
1488
1489 fileobject = ExFileObject # The default ExFileObject class to use.
1490
1491 def __init__(self, name=None, mode="r", fileobj=None, format=None,
1492 tarinfo=None, dereference=None, ignore_zeros=None, encoding=None,
Guido van Rossume7ba4952007-06-06 23:52:48 +00001493 errors=None, pax_headers=None, debug=None, errorlevel=None):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001494 """Open an (uncompressed) tar archive `name'. `mode' is either 'r' to
1495 read from an existing archive, 'a' to append data to an existing
1496 file or 'w' to create a new file overwriting an existing one. `mode'
1497 defaults to 'r'.
1498 If `fileobj' is given, it is used for reading or writing data. If it
1499 can be determined, `mode' is overridden by `fileobj's mode.
1500 `fileobj' is not closed, when TarFile is closed.
1501 """
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001502 if len(mode) > 1 or mode not in "raw":
Thomas Wouters477c8d52006-05-27 19:21:47 +00001503 raise ValueError("mode must be 'r', 'a' or 'w'")
Guido van Rossumd8faa362007-04-27 19:54:29 +00001504 self.mode = mode
1505 self._mode = {"r": "rb", "a": "r+b", "w": "wb"}[mode]
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001506
1507 if not fileobj:
Guido van Rossumd8faa362007-04-27 19:54:29 +00001508 if self.mode == "a" and not os.path.exists(name):
Thomas Wouterscf297e42007-02-23 15:07:44 +00001509 # Create nonexistent files in append mode.
Guido van Rossumd8faa362007-04-27 19:54:29 +00001510 self.mode = "w"
1511 self._mode = "wb"
Guido van Rossume7ba4952007-06-06 23:52:48 +00001512 fileobj = bltn_open(name, self._mode)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001513 self._extfileobj = False
1514 else:
Guido van Rossumd8faa362007-04-27 19:54:29 +00001515 if name is None and hasattr(fileobj, "name"):
1516 name = fileobj.name
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001517 if hasattr(fileobj, "mode"):
Guido van Rossumd8faa362007-04-27 19:54:29 +00001518 self._mode = fileobj.mode
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001519 self._extfileobj = True
Thomas Woutersed03b412007-08-28 21:37:11 +00001520 self.name = os.path.abspath(name) if name else None
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001521 self.fileobj = fileobj
1522
Guido van Rossumd8faa362007-04-27 19:54:29 +00001523 # Init attributes.
1524 if format is not None:
1525 self.format = format
1526 if tarinfo is not None:
1527 self.tarinfo = tarinfo
1528 if dereference is not None:
1529 self.dereference = dereference
1530 if ignore_zeros is not None:
1531 self.ignore_zeros = ignore_zeros
1532 if encoding is not None:
1533 self.encoding = encoding
Guido van Rossume7ba4952007-06-06 23:52:48 +00001534
1535 if errors is not None:
1536 self.errors = errors
1537 elif mode == "r":
Lars Gustäbelb506dc32007-08-07 18:36:16 +00001538 self.errors = "replace"
Guido van Rossume7ba4952007-06-06 23:52:48 +00001539 else:
1540 self.errors = "strict"
1541
1542 if pax_headers is not None and self.format == PAX_FORMAT:
1543 self.pax_headers = pax_headers
1544 else:
1545 self.pax_headers = {}
1546
Guido van Rossumd8faa362007-04-27 19:54:29 +00001547 if debug is not None:
1548 self.debug = debug
1549 if errorlevel is not None:
1550 self.errorlevel = errorlevel
1551
1552 # Init datastructures.
Thomas Wouters477c8d52006-05-27 19:21:47 +00001553 self.closed = False
1554 self.members = [] # list of members as TarInfo objects
1555 self._loaded = False # flag if all members have been read
Christian Heimesd8654cf2007-12-02 15:22:16 +00001556 self.offset = self.fileobj.tell()
1557 # current position in the archive file
Thomas Wouters477c8d52006-05-27 19:21:47 +00001558 self.inodes = {} # dictionary caching the inodes of
1559 # archive members already added
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001560
Lars Gustäbel7b465392009-11-18 20:29:25 +00001561 try:
1562 if self.mode == "r":
1563 self.firstmember = None
1564 self.firstmember = self.next()
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001565
Lars Gustäbel7b465392009-11-18 20:29:25 +00001566 if self.mode == "a":
1567 # Move to the end of the archive,
1568 # before the first empty block.
Lars Gustäbel7b465392009-11-18 20:29:25 +00001569 while True:
Lars Gustäbel9520a432009-11-22 18:48:49 +00001570 self.fileobj.seek(self.offset)
1571 try:
1572 tarinfo = self.tarinfo.fromtarfile(self)
1573 self.members.append(tarinfo)
1574 except EOFHeaderError:
1575 self.fileobj.seek(self.offset)
Lars Gustäbel7b465392009-11-18 20:29:25 +00001576 break
Lars Gustäbel9520a432009-11-22 18:48:49 +00001577 except HeaderError as e:
1578 raise ReadError(str(e))
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001579
Lars Gustäbel7b465392009-11-18 20:29:25 +00001580 if self.mode in "aw":
1581 self._loaded = True
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001582
Lars Gustäbel7b465392009-11-18 20:29:25 +00001583 if self.pax_headers:
1584 buf = self.tarinfo.create_pax_global_header(self.pax_headers.copy())
1585 self.fileobj.write(buf)
1586 self.offset += len(buf)
1587 except:
1588 if not self._extfileobj:
1589 self.fileobj.close()
1590 self.closed = True
1591 raise
Guido van Rossumd8faa362007-04-27 19:54:29 +00001592
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001593 #--------------------------------------------------------------------------
1594 # Below are the classmethods which act as alternate constructors to the
1595 # TarFile class. The open() method is the only one that is needed for
1596 # public use; it is the "super"-constructor and is able to select an
1597 # adequate "sub"-constructor for a particular compression using the mapping
1598 # from OPEN_METH.
1599 #
1600 # This concept allows one to subclass TarFile without losing the comfort of
1601 # the super-constructor. A sub-constructor is registered and made available
1602 # by adding it to the mapping in OPEN_METH.
1603
Guido van Rossum75b64e62005-01-16 00:16:11 +00001604 @classmethod
Guido van Rossumd8faa362007-04-27 19:54:29 +00001605 def open(cls, name=None, mode="r", fileobj=None, bufsize=RECORDSIZE, **kwargs):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001606 """Open a tar archive for reading, writing or appending. Return
1607 an appropriate TarFile class.
1608
1609 mode:
Martin v. Löwis78be7df2005-03-05 12:47:42 +00001610 'r' or 'r:*' open for reading with transparent compression
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001611 'r:' open for reading exclusively uncompressed
1612 'r:gz' open for reading with gzip compression
1613 'r:bz2' open for reading with bzip2 compression
Thomas Wouterscf297e42007-02-23 15:07:44 +00001614 'a' or 'a:' open for appending, creating the file if necessary
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001615 'w' or 'w:' open for writing without compression
1616 'w:gz' open for writing with gzip compression
1617 'w:bz2' open for writing with bzip2 compression
Martin v. Löwis78be7df2005-03-05 12:47:42 +00001618
1619 'r|*' open a stream of tar blocks with transparent compression
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001620 'r|' open an uncompressed stream of tar blocks for reading
1621 'r|gz' open a gzip compressed stream of tar blocks
1622 'r|bz2' open a bzip2 compressed stream of tar blocks
1623 'w|' open an uncompressed stream for writing
1624 'w|gz' open a gzip compressed stream for writing
1625 'w|bz2' open a bzip2 compressed stream for writing
1626 """
1627
1628 if not name and not fileobj:
Thomas Wouters477c8d52006-05-27 19:21:47 +00001629 raise ValueError("nothing to open")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001630
Martin v. Löwis78be7df2005-03-05 12:47:42 +00001631 if mode in ("r", "r:*"):
1632 # Find out which *open() is appropriate for opening the file.
1633 for comptype in cls.OPEN_METH:
1634 func = getattr(cls, cls.OPEN_METH[comptype])
Thomas Wouters902d6eb2007-01-09 23:18:33 +00001635 if fileobj is not None:
1636 saved_pos = fileobj.tell()
Martin v. Löwis78be7df2005-03-05 12:47:42 +00001637 try:
Guido van Rossumd8faa362007-04-27 19:54:29 +00001638 return func(name, "r", fileobj, **kwargs)
1639 except (ReadError, CompressionError) as e:
Thomas Wouters902d6eb2007-01-09 23:18:33 +00001640 if fileobj is not None:
1641 fileobj.seek(saved_pos)
Martin v. Löwis78be7df2005-03-05 12:47:42 +00001642 continue
Thomas Wouters477c8d52006-05-27 19:21:47 +00001643 raise ReadError("file could not be opened successfully")
Martin v. Löwis78be7df2005-03-05 12:47:42 +00001644
1645 elif ":" in mode:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001646 filemode, comptype = mode.split(":", 1)
1647 filemode = filemode or "r"
1648 comptype = comptype or "tar"
1649
1650 # Select the *open() function according to
1651 # given compression.
1652 if comptype in cls.OPEN_METH:
1653 func = getattr(cls, cls.OPEN_METH[comptype])
1654 else:
Thomas Wouters477c8d52006-05-27 19:21:47 +00001655 raise CompressionError("unknown compression type %r" % comptype)
Guido van Rossumd8faa362007-04-27 19:54:29 +00001656 return func(name, filemode, fileobj, **kwargs)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001657
1658 elif "|" in mode:
1659 filemode, comptype = mode.split("|", 1)
1660 filemode = filemode or "r"
1661 comptype = comptype or "tar"
1662
1663 if filemode not in "rw":
Thomas Wouters477c8d52006-05-27 19:21:47 +00001664 raise ValueError("mode must be 'r' or 'w'")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001665
1666 t = cls(name, filemode,
Guido van Rossumd8faa362007-04-27 19:54:29 +00001667 _Stream(name, filemode, comptype, fileobj, bufsize),
1668 **kwargs)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001669 t._extfileobj = False
1670 return t
1671
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001672 elif mode in "aw":
Guido van Rossumd8faa362007-04-27 19:54:29 +00001673 return cls.taropen(name, mode, fileobj, **kwargs)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001674
Thomas Wouters477c8d52006-05-27 19:21:47 +00001675 raise ValueError("undiscernible mode")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001676
Guido van Rossum75b64e62005-01-16 00:16:11 +00001677 @classmethod
Guido van Rossumd8faa362007-04-27 19:54:29 +00001678 def taropen(cls, name, mode="r", fileobj=None, **kwargs):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001679 """Open uncompressed tar archive name for reading or writing.
1680 """
1681 if len(mode) > 1 or mode not in "raw":
Thomas Wouters477c8d52006-05-27 19:21:47 +00001682 raise ValueError("mode must be 'r', 'a' or 'w'")
Guido van Rossumd8faa362007-04-27 19:54:29 +00001683 return cls(name, mode, fileobj, **kwargs)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001684
Guido van Rossum75b64e62005-01-16 00:16:11 +00001685 @classmethod
Guido van Rossumd8faa362007-04-27 19:54:29 +00001686 def gzopen(cls, name, mode="r", fileobj=None, compresslevel=9, **kwargs):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001687 """Open gzip compressed tar archive name for reading or writing.
1688 Appending is not allowed.
1689 """
1690 if len(mode) > 1 or mode not in "rw":
Thomas Wouters477c8d52006-05-27 19:21:47 +00001691 raise ValueError("mode must be 'r' or 'w'")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001692
1693 try:
1694 import gzip
Neal Norwitz4ec68242003-04-11 03:05:56 +00001695 gzip.GzipFile
1696 except (ImportError, AttributeError):
Thomas Wouters477c8d52006-05-27 19:21:47 +00001697 raise CompressionError("gzip module is not available")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001698
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001699 if fileobj is None:
Guido van Rossume7ba4952007-06-06 23:52:48 +00001700 fileobj = bltn_open(name, mode + "b")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001701
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001702 try:
Thomas Wouters902d6eb2007-01-09 23:18:33 +00001703 t = cls.taropen(name, mode,
Guido van Rossumd8faa362007-04-27 19:54:29 +00001704 gzip.GzipFile(name, mode, compresslevel, fileobj),
1705 **kwargs)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001706 except IOError:
Thomas Wouters477c8d52006-05-27 19:21:47 +00001707 raise ReadError("not a gzip file")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001708 t._extfileobj = False
1709 return t
1710
Guido van Rossum75b64e62005-01-16 00:16:11 +00001711 @classmethod
Guido van Rossumd8faa362007-04-27 19:54:29 +00001712 def bz2open(cls, name, mode="r", fileobj=None, compresslevel=9, **kwargs):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001713 """Open bzip2 compressed tar archive name for reading or writing.
1714 Appending is not allowed.
1715 """
1716 if len(mode) > 1 or mode not in "rw":
Thomas Wouters477c8d52006-05-27 19:21:47 +00001717 raise ValueError("mode must be 'r' or 'w'.")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001718
1719 try:
1720 import bz2
1721 except ImportError:
Thomas Wouters477c8d52006-05-27 19:21:47 +00001722 raise CompressionError("bz2 module is not available")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001723
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001724 if fileobj is not None:
Thomas Wouters477c8d52006-05-27 19:21:47 +00001725 fileobj = _BZ2Proxy(fileobj, mode)
1726 else:
1727 fileobj = bz2.BZ2File(name, mode, compresslevel=compresslevel)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001728
1729 try:
Guido van Rossumd8faa362007-04-27 19:54:29 +00001730 t = cls.taropen(name, mode, fileobj, **kwargs)
Lars Gustäbel9520a432009-11-22 18:48:49 +00001731 except (IOError, EOFError):
Thomas Wouters477c8d52006-05-27 19:21:47 +00001732 raise ReadError("not a bzip2 file")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001733 t._extfileobj = False
1734 return t
1735
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001736 # All *open() methods are registered here.
1737 OPEN_METH = {
1738 "tar": "taropen", # uncompressed tar
1739 "gz": "gzopen", # gzip compressed tar
1740 "bz2": "bz2open" # bzip2 compressed tar
1741 }
1742
1743 #--------------------------------------------------------------------------
1744 # The public methods which TarFile provides:
1745
1746 def close(self):
1747 """Close the TarFile. In write-mode, two finishing zero blocks are
1748 appended to the archive.
1749 """
1750 if self.closed:
1751 return
1752
Guido van Rossumd8faa362007-04-27 19:54:29 +00001753 if self.mode in "aw":
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001754 self.fileobj.write(NUL * (BLOCKSIZE * 2))
1755 self.offset += (BLOCKSIZE * 2)
1756 # fill up the end with zero-blocks
1757 # (like option -b20 for tar does)
1758 blocks, remainder = divmod(self.offset, RECORDSIZE)
1759 if remainder > 0:
1760 self.fileobj.write(NUL * (RECORDSIZE - remainder))
1761
1762 if not self._extfileobj:
1763 self.fileobj.close()
1764 self.closed = True
1765
1766 def getmember(self, name):
1767 """Return a TarInfo object for member `name'. If `name' can not be
1768 found in the archive, KeyError is raised. If a member occurs more
Mark Dickinson934896d2009-02-21 20:59:32 +00001769 than once in the archive, its last occurrence is assumed to be the
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001770 most up-to-date version.
1771 """
Martin v. Löwisf3c56112004-09-18 09:08:52 +00001772 tarinfo = self._getmember(name)
1773 if tarinfo is None:
Thomas Wouters477c8d52006-05-27 19:21:47 +00001774 raise KeyError("filename %r not found" % name)
Martin v. Löwisf3c56112004-09-18 09:08:52 +00001775 return tarinfo
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001776
1777 def getmembers(self):
1778 """Return the members of the archive as a list of TarInfo objects. The
1779 list has the same order as the members in the archive.
1780 """
1781 self._check()
1782 if not self._loaded: # if we want to obtain a list of
1783 self._load() # all members, we first have to
1784 # scan the whole archive.
1785 return self.members
1786
1787 def getnames(self):
1788 """Return the members of the archive as a list of their names. It has
1789 the same order as the list returned by getmembers().
1790 """
Martin v. Löwisf3c56112004-09-18 09:08:52 +00001791 return [tarinfo.name for tarinfo in self.getmembers()]
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001792
1793 def gettarinfo(self, name=None, arcname=None, fileobj=None):
1794 """Create a TarInfo object for either the file `name' or the file
1795 object `fileobj' (using os.fstat on its file descriptor). You can
1796 modify some of the TarInfo's attributes before you add it using
1797 addfile(). If given, `arcname' specifies an alternative name for the
1798 file in the archive.
1799 """
1800 self._check("aw")
1801
1802 # When fileobj is given, replace name by
1803 # fileobj's real name.
1804 if fileobj is not None:
1805 name = fileobj.name
1806
1807 # Building the name of the member in the archive.
1808 # Backward slashes are converted to forward slashes,
1809 # Absolute paths are turned to relative paths.
1810 if arcname is None:
1811 arcname = name
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001812 drv, arcname = os.path.splitdrive(arcname)
Lars Gustäbelbfdfdda2009-08-28 19:59:59 +00001813 arcname = arcname.replace(os.sep, "/")
1814 arcname = arcname.lstrip("/")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001815
1816 # Now, fill the TarInfo object with
1817 # information specific for the file.
Guido van Rossumd8faa362007-04-27 19:54:29 +00001818 tarinfo = self.tarinfo()
1819 tarinfo.tarfile = self
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001820
1821 # Use os.stat or os.lstat, depending on platform
1822 # and if symlinks shall be resolved.
1823 if fileobj is None:
1824 if hasattr(os, "lstat") and not self.dereference:
1825 statres = os.lstat(name)
1826 else:
1827 statres = os.stat(name)
1828 else:
1829 statres = os.fstat(fileobj.fileno())
1830 linkname = ""
1831
1832 stmd = statres.st_mode
1833 if stat.S_ISREG(stmd):
1834 inode = (statres.st_ino, statres.st_dev)
Guido van Rossumd8faa362007-04-27 19:54:29 +00001835 if not self.dereference and statres.st_nlink > 1 and \
1836 inode in self.inodes and arcname != self.inodes[inode]:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001837 # Is it a hardlink to an already
1838 # archived file?
1839 type = LNKTYPE
1840 linkname = self.inodes[inode]
1841 else:
1842 # The inode is added only if its valid.
1843 # For win32 it is always 0.
1844 type = REGTYPE
1845 if inode[0]:
1846 self.inodes[inode] = arcname
1847 elif stat.S_ISDIR(stmd):
1848 type = DIRTYPE
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001849 elif stat.S_ISFIFO(stmd):
1850 type = FIFOTYPE
1851 elif stat.S_ISLNK(stmd):
1852 type = SYMTYPE
1853 linkname = os.readlink(name)
1854 elif stat.S_ISCHR(stmd):
1855 type = CHRTYPE
1856 elif stat.S_ISBLK(stmd):
1857 type = BLKTYPE
1858 else:
1859 return None
1860
1861 # Fill the TarInfo object with all
1862 # information we can get.
Martin v. Löwis5dbdc592005-08-27 10:07:56 +00001863 tarinfo.name = arcname
1864 tarinfo.mode = stmd
1865 tarinfo.uid = statres.st_uid
1866 tarinfo.gid = statres.st_gid
1867 if stat.S_ISREG(stmd):
Martin v. Löwis61d77e02004-08-20 06:35:46 +00001868 tarinfo.size = statres.st_size
Martin v. Löwis5dbdc592005-08-27 10:07:56 +00001869 else:
Guido van Rossume2a383d2007-01-15 16:59:06 +00001870 tarinfo.size = 0
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001871 tarinfo.mtime = statres.st_mtime
Martin v. Löwis5dbdc592005-08-27 10:07:56 +00001872 tarinfo.type = type
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001873 tarinfo.linkname = linkname
1874 if pwd:
1875 try:
1876 tarinfo.uname = pwd.getpwuid(tarinfo.uid)[0]
1877 except KeyError:
1878 pass
1879 if grp:
1880 try:
1881 tarinfo.gname = grp.getgrgid(tarinfo.gid)[0]
1882 except KeyError:
1883 pass
1884
1885 if type in (CHRTYPE, BLKTYPE):
1886 if hasattr(os, "major") and hasattr(os, "minor"):
1887 tarinfo.devmajor = os.major(statres.st_rdev)
1888 tarinfo.devminor = os.minor(statres.st_rdev)
1889 return tarinfo
1890
1891 def list(self, verbose=True):
1892 """Print a table of contents to sys.stdout. If `verbose' is False, only
1893 the names of the members are printed. If it is True, an `ls -l'-like
1894 output is produced.
1895 """
1896 self._check()
1897
1898 for tarinfo in self:
1899 if verbose:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001900 print(filemode(tarinfo.mode), end=' ')
1901 print("%s/%s" % (tarinfo.uname or tarinfo.uid,
1902 tarinfo.gname or tarinfo.gid), end=' ')
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001903 if tarinfo.ischr() or tarinfo.isblk():
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001904 print("%10s" % ("%d,%d" \
1905 % (tarinfo.devmajor, tarinfo.devminor)), end=' ')
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001906 else:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001907 print("%10d" % tarinfo.size, end=' ')
1908 print("%d-%02d-%02d %02d:%02d:%02d" \
1909 % time.localtime(tarinfo.mtime)[:6], end=' ')
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001910
Guido van Rossumd8faa362007-04-27 19:54:29 +00001911 print(tarinfo.name + ("/" if tarinfo.isdir() else ""), end=' ')
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001912
1913 if verbose:
1914 if tarinfo.issym():
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001915 print("->", tarinfo.linkname, end=' ')
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001916 if tarinfo.islnk():
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001917 print("link to", tarinfo.linkname, end=' ')
1918 print()
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001919
Lars Gustäbel049d2aa2009-09-12 10:44:00 +00001920 def add(self, name, arcname=None, recursive=True, exclude=None, filter=None):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001921 """Add the file `name' to the archive. `name' may be any type of file
1922 (directory, fifo, symbolic link, etc.). If given, `arcname'
1923 specifies an alternative name for the file in the archive.
1924 Directories are added recursively by default. This can be avoided by
Guido van Rossum486364b2007-06-30 05:01:58 +00001925 setting `recursive' to False. `exclude' is a function that should
Lars Gustäbel049d2aa2009-09-12 10:44:00 +00001926 return True for each filename to be excluded. `filter' is a function
1927 that expects a TarInfo object argument and returns the changed
1928 TarInfo object, if it returns None the TarInfo object will be
1929 excluded from the archive.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001930 """
1931 self._check("aw")
1932
1933 if arcname is None:
1934 arcname = name
1935
Guido van Rossum486364b2007-06-30 05:01:58 +00001936 # Exclude pathnames.
Lars Gustäbel049d2aa2009-09-12 10:44:00 +00001937 if exclude is not None:
1938 import warnings
1939 warnings.warn("use the filter argument instead",
1940 DeprecationWarning, 2)
1941 if exclude(name):
1942 self._dbg(2, "tarfile: Excluded %r" % name)
1943 return
Guido van Rossum486364b2007-06-30 05:01:58 +00001944
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001945 # Skip if somebody tries to archive the archive...
Thomas Wouters902d6eb2007-01-09 23:18:33 +00001946 if self.name is not None and os.path.abspath(name) == self.name:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001947 self._dbg(2, "tarfile: Skipped %r" % name)
1948 return
1949
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001950 self._dbg(1, name)
1951
1952 # Create a TarInfo object from the file.
1953 tarinfo = self.gettarinfo(name, arcname)
1954
1955 if tarinfo is None:
1956 self._dbg(1, "tarfile: Unsupported type %r" % name)
1957 return
1958
Lars Gustäbel049d2aa2009-09-12 10:44:00 +00001959 # Change or exclude the TarInfo object.
1960 if filter is not None:
1961 tarinfo = filter(tarinfo)
1962 if tarinfo is None:
1963 self._dbg(2, "tarfile: Excluded %r" % name)
1964 return
1965
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001966 # Append the tar header and data to the archive.
1967 if tarinfo.isreg():
Guido van Rossume7ba4952007-06-06 23:52:48 +00001968 f = bltn_open(name, "rb")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001969 self.addfile(tarinfo, f)
1970 f.close()
1971
Martin v. Löwis5dbdc592005-08-27 10:07:56 +00001972 elif tarinfo.isdir():
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001973 self.addfile(tarinfo)
1974 if recursive:
1975 for f in os.listdir(name):
Lars Gustäbel049d2aa2009-09-12 10:44:00 +00001976 self.add(os.path.join(name, f), os.path.join(arcname, f),
1977 recursive, exclude, filter)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001978
Martin v. Löwis5dbdc592005-08-27 10:07:56 +00001979 else:
1980 self.addfile(tarinfo)
1981
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001982 def addfile(self, tarinfo, fileobj=None):
1983 """Add the TarInfo object `tarinfo' to the archive. If `fileobj' is
1984 given, tarinfo.size bytes are read from it and added to the archive.
1985 You can create TarInfo objects using gettarinfo().
1986 On Windows platforms, `fileobj' should always be opened with mode
1987 'rb' to avoid irritation about the file size.
1988 """
1989 self._check("aw")
1990
Thomas Wouters89f507f2006-12-13 04:49:30 +00001991 tarinfo = copy.copy(tarinfo)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001992
Guido van Rossume7ba4952007-06-06 23:52:48 +00001993 buf = tarinfo.tobuf(self.format, self.encoding, self.errors)
Thomas Wouters89f507f2006-12-13 04:49:30 +00001994 self.fileobj.write(buf)
1995 self.offset += len(buf)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001996
1997 # If there's data to follow, append it.
1998 if fileobj is not None:
1999 copyfileobj(fileobj, self.fileobj, tarinfo.size)
2000 blocks, remainder = divmod(tarinfo.size, BLOCKSIZE)
2001 if remainder > 0:
2002 self.fileobj.write(NUL * (BLOCKSIZE - remainder))
2003 blocks += 1
2004 self.offset += blocks * BLOCKSIZE
2005
Martin v. Löwisf3c56112004-09-18 09:08:52 +00002006 self.members.append(tarinfo)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002007
Martin v. Löwis00a73e72005-03-04 19:40:34 +00002008 def extractall(self, path=".", members=None):
2009 """Extract all members from the archive to the current working
2010 directory and set owner, modification time and permissions on
2011 directories afterwards. `path' specifies a different directory
2012 to extract to. `members' is optional and must be a subset of the
2013 list returned by getmembers().
2014 """
2015 directories = []
2016
2017 if members is None:
2018 members = self
2019
2020 for tarinfo in members:
2021 if tarinfo.isdir():
Christian Heimes2202f872008-02-06 14:31:34 +00002022 # Extract directories with a safe mode.
Martin v. Löwis00a73e72005-03-04 19:40:34 +00002023 directories.append(tarinfo)
Christian Heimes2202f872008-02-06 14:31:34 +00002024 tarinfo = copy.copy(tarinfo)
2025 tarinfo.mode = 0o700
2026 self.extract(tarinfo, path)
Martin v. Löwis00a73e72005-03-04 19:40:34 +00002027
2028 # Reverse sort directories.
Raymond Hettingerd4cb56d2008-01-30 02:55:10 +00002029 directories.sort(key=lambda a: a.name)
Martin v. Löwis00a73e72005-03-04 19:40:34 +00002030 directories.reverse()
2031
2032 # Set correct owner, mtime and filemode on directories.
2033 for tarinfo in directories:
Christian Heimesfaf2f632008-01-06 16:59:19 +00002034 dirpath = os.path.join(path, tarinfo.name)
Martin v. Löwis00a73e72005-03-04 19:40:34 +00002035 try:
Christian Heimesfaf2f632008-01-06 16:59:19 +00002036 self.chown(tarinfo, dirpath)
2037 self.utime(tarinfo, dirpath)
2038 self.chmod(tarinfo, dirpath)
Guido van Rossumb940e112007-01-10 16:19:56 +00002039 except ExtractError as e:
Martin v. Löwis00a73e72005-03-04 19:40:34 +00002040 if self.errorlevel > 1:
2041 raise
2042 else:
2043 self._dbg(1, "tarfile: %s" % e)
2044
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002045 def extract(self, member, path=""):
2046 """Extract a member from the archive to the current working directory,
2047 using its full name. Its file information is extracted as accurately
2048 as possible. `member' may be a filename or a TarInfo object. You can
2049 specify a different directory using `path'.
2050 """
2051 self._check("r")
2052
Guido van Rossum3172c5d2007-10-16 18:12:55 +00002053 if isinstance(member, str):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002054 tarinfo = self.getmember(member)
Guido van Rossumd8faa362007-04-27 19:54:29 +00002055 else:
2056 tarinfo = member
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002057
Neal Norwitza4f651a2004-07-20 22:07:44 +00002058 # Prepare the link target for makelink().
2059 if tarinfo.islnk():
2060 tarinfo._link_target = os.path.join(path, tarinfo.linkname)
2061
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002062 try:
2063 self._extract_member(tarinfo, os.path.join(path, tarinfo.name))
Guido van Rossumb940e112007-01-10 16:19:56 +00002064 except EnvironmentError as e:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002065 if self.errorlevel > 0:
2066 raise
2067 else:
2068 if e.filename is None:
2069 self._dbg(1, "tarfile: %s" % e.strerror)
2070 else:
2071 self._dbg(1, "tarfile: %s %r" % (e.strerror, e.filename))
Guido van Rossumb940e112007-01-10 16:19:56 +00002072 except ExtractError as e:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002073 if self.errorlevel > 1:
2074 raise
2075 else:
2076 self._dbg(1, "tarfile: %s" % e)
2077
2078 def extractfile(self, member):
2079 """Extract a member from the archive as a file object. `member' may be
2080 a filename or a TarInfo object. If `member' is a regular file, a
2081 file-like object is returned. If `member' is a link, a file-like
2082 object is constructed from the link's target. If `member' is none of
2083 the above, None is returned.
2084 The file-like object is read-only and provides the following
2085 methods: read(), readline(), readlines(), seek() and tell()
2086 """
2087 self._check("r")
2088
Guido van Rossum3172c5d2007-10-16 18:12:55 +00002089 if isinstance(member, str):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002090 tarinfo = self.getmember(member)
Guido van Rossumd8faa362007-04-27 19:54:29 +00002091 else:
2092 tarinfo = member
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002093
2094 if tarinfo.isreg():
2095 return self.fileobject(self, tarinfo)
2096
2097 elif tarinfo.type not in SUPPORTED_TYPES:
2098 # If a member's type is unknown, it is treated as a
2099 # regular file.
2100 return self.fileobject(self, tarinfo)
2101
2102 elif tarinfo.islnk() or tarinfo.issym():
2103 if isinstance(self.fileobj, _Stream):
2104 # A small but ugly workaround for the case that someone tries
2105 # to extract a (sym)link as a file-object from a non-seekable
2106 # stream of tar blocks.
Thomas Wouters477c8d52006-05-27 19:21:47 +00002107 raise StreamError("cannot extract (sym)link as file object")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002108 else:
Georg Brandl7eb4b7d2005-07-22 21:49:32 +00002109 # A (sym)link's file object is its target's file object.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002110 return self.extractfile(self._getmember(tarinfo.linkname,
2111 tarinfo))
2112 else:
2113 # If there's no data associated with the member (directory, chrdev,
2114 # blkdev, etc.), return None instead of a file object.
2115 return None
2116
2117 def _extract_member(self, tarinfo, targetpath):
2118 """Extract the TarInfo object tarinfo to a physical
2119 file called targetpath.
2120 """
2121 # Fetch the TarInfo object for the given name
2122 # and build the destination pathname, replacing
2123 # forward slashes to platform specific separators.
Lars Gustäbelbfdfdda2009-08-28 19:59:59 +00002124 targetpath = targetpath.rstrip("/")
2125 targetpath = targetpath.replace("/", os.sep)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002126
2127 # Create all upper directories.
2128 upperdirs = os.path.dirname(targetpath)
2129 if upperdirs and not os.path.exists(upperdirs):
Christian Heimes2202f872008-02-06 14:31:34 +00002130 # Create directories that are not part of the archive with
2131 # default permissions.
Thomas Woutersb2137042007-02-01 18:02:27 +00002132 os.makedirs(upperdirs)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002133
2134 if tarinfo.islnk() or tarinfo.issym():
2135 self._dbg(1, "%s -> %s" % (tarinfo.name, tarinfo.linkname))
2136 else:
2137 self._dbg(1, tarinfo.name)
2138
2139 if tarinfo.isreg():
2140 self.makefile(tarinfo, targetpath)
2141 elif tarinfo.isdir():
2142 self.makedir(tarinfo, targetpath)
2143 elif tarinfo.isfifo():
2144 self.makefifo(tarinfo, targetpath)
2145 elif tarinfo.ischr() or tarinfo.isblk():
2146 self.makedev(tarinfo, targetpath)
2147 elif tarinfo.islnk() or tarinfo.issym():
2148 self.makelink(tarinfo, targetpath)
2149 elif tarinfo.type not in SUPPORTED_TYPES:
2150 self.makeunknown(tarinfo, targetpath)
2151 else:
2152 self.makefile(tarinfo, targetpath)
2153
2154 self.chown(tarinfo, targetpath)
2155 if not tarinfo.issym():
2156 self.chmod(tarinfo, targetpath)
2157 self.utime(tarinfo, targetpath)
2158
2159 #--------------------------------------------------------------------------
2160 # Below are the different file methods. They are called via
2161 # _extract_member() when extract() is called. They can be replaced in a
2162 # subclass to implement other functionality.
2163
2164 def makedir(self, tarinfo, targetpath):
2165 """Make a directory called targetpath.
2166 """
2167 try:
Christian Heimes2202f872008-02-06 14:31:34 +00002168 # Use a safe mode for the directory, the real mode is set
2169 # later in _extract_member().
2170 os.mkdir(targetpath, 0o700)
Guido van Rossumb940e112007-01-10 16:19:56 +00002171 except EnvironmentError as e:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002172 if e.errno != errno.EEXIST:
2173 raise
2174
2175 def makefile(self, tarinfo, targetpath):
2176 """Make a file called targetpath.
2177 """
2178 source = self.extractfile(tarinfo)
Guido van Rossume7ba4952007-06-06 23:52:48 +00002179 target = bltn_open(targetpath, "wb")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002180 copyfileobj(source, target)
2181 source.close()
2182 target.close()
2183
2184 def makeunknown(self, tarinfo, targetpath):
2185 """Make a file from a TarInfo object with an unknown type
2186 at targetpath.
2187 """
2188 self.makefile(tarinfo, targetpath)
2189 self._dbg(1, "tarfile: Unknown file type %r, " \
2190 "extracted as regular file." % tarinfo.type)
2191
2192 def makefifo(self, tarinfo, targetpath):
2193 """Make a fifo called targetpath.
2194 """
2195 if hasattr(os, "mkfifo"):
2196 os.mkfifo(targetpath)
2197 else:
Thomas Wouters477c8d52006-05-27 19:21:47 +00002198 raise ExtractError("fifo not supported by system")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002199
2200 def makedev(self, tarinfo, targetpath):
2201 """Make a character or block device called targetpath.
2202 """
2203 if not hasattr(os, "mknod") or not hasattr(os, "makedev"):
Thomas Wouters477c8d52006-05-27 19:21:47 +00002204 raise ExtractError("special devices not supported by system")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002205
2206 mode = tarinfo.mode
2207 if tarinfo.isblk():
2208 mode |= stat.S_IFBLK
2209 else:
2210 mode |= stat.S_IFCHR
2211
2212 os.mknod(targetpath, mode,
2213 os.makedev(tarinfo.devmajor, tarinfo.devminor))
2214
2215 def makelink(self, tarinfo, targetpath):
2216 """Make a (symbolic) link called targetpath. If it cannot be created
2217 (platform limitation), we try to make a copy of the referenced file
2218 instead of a link.
2219 """
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002220 try:
2221 if tarinfo.issym():
Lars Gustäbelbfdfdda2009-08-28 19:59:59 +00002222 os.symlink(tarinfo.linkname, targetpath)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002223 else:
Neal Norwitza4f651a2004-07-20 22:07:44 +00002224 # See extract().
2225 os.link(tarinfo._link_target, targetpath)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002226 except AttributeError:
2227 if tarinfo.issym():
Lars Gustäbelbfdfdda2009-08-28 19:59:59 +00002228 linkpath = os.path.dirname(tarinfo.name) + "/" + \
2229 tarinfo.linkname
2230 else:
2231 linkpath = tarinfo.linkname
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002232
2233 try:
2234 self._extract_member(self.getmember(linkpath), targetpath)
Guido van Rossumb940e112007-01-10 16:19:56 +00002235 except (EnvironmentError, KeyError) as e:
Lars Gustäbelbfdfdda2009-08-28 19:59:59 +00002236 linkpath = linkpath.replace("/", os.sep)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002237 try:
2238 shutil.copy2(linkpath, targetpath)
Guido van Rossumb940e112007-01-10 16:19:56 +00002239 except EnvironmentError as e:
Thomas Wouters477c8d52006-05-27 19:21:47 +00002240 raise IOError("link could not be created")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002241
2242 def chown(self, tarinfo, targetpath):
2243 """Set owner of targetpath according to tarinfo.
2244 """
2245 if pwd and hasattr(os, "geteuid") and os.geteuid() == 0:
2246 # We have to be root to do so.
2247 try:
2248 g = grp.getgrnam(tarinfo.gname)[2]
2249 except KeyError:
2250 try:
2251 g = grp.getgrgid(tarinfo.gid)[2]
2252 except KeyError:
2253 g = os.getgid()
2254 try:
2255 u = pwd.getpwnam(tarinfo.uname)[2]
2256 except KeyError:
2257 try:
2258 u = pwd.getpwuid(tarinfo.uid)[2]
2259 except KeyError:
2260 u = os.getuid()
2261 try:
2262 if tarinfo.issym() and hasattr(os, "lchown"):
2263 os.lchown(targetpath, u, g)
2264 else:
Andrew MacIntyre7970d202003-02-19 12:51:34 +00002265 if sys.platform != "os2emx":
2266 os.chown(targetpath, u, g)
Guido van Rossumb940e112007-01-10 16:19:56 +00002267 except EnvironmentError as e:
Thomas Wouters477c8d52006-05-27 19:21:47 +00002268 raise ExtractError("could not change owner")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002269
2270 def chmod(self, tarinfo, targetpath):
2271 """Set file permissions of targetpath according to tarinfo.
2272 """
Jack Jansen834eff62003-03-07 12:47:06 +00002273 if hasattr(os, 'chmod'):
2274 try:
2275 os.chmod(targetpath, tarinfo.mode)
Guido van Rossumb940e112007-01-10 16:19:56 +00002276 except EnvironmentError as e:
Thomas Wouters477c8d52006-05-27 19:21:47 +00002277 raise ExtractError("could not change mode")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002278
2279 def utime(self, tarinfo, targetpath):
2280 """Set modification time of targetpath according to tarinfo.
2281 """
Jack Jansen834eff62003-03-07 12:47:06 +00002282 if not hasattr(os, 'utime'):
Tim Petersf9347782003-03-07 15:36:41 +00002283 return
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002284 try:
2285 os.utime(targetpath, (tarinfo.mtime, tarinfo.mtime))
Guido van Rossumb940e112007-01-10 16:19:56 +00002286 except EnvironmentError as e:
Thomas Wouters477c8d52006-05-27 19:21:47 +00002287 raise ExtractError("could not change modification time")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002288
2289 #--------------------------------------------------------------------------
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002290 def next(self):
2291 """Return the next member of the archive as a TarInfo object, when
2292 TarFile is opened for reading. Return None if there is no more
2293 available.
2294 """
2295 self._check("ra")
2296 if self.firstmember is not None:
2297 m = self.firstmember
2298 self.firstmember = None
2299 return m
2300
2301 # Read the next block.
Andrew M. Kuchling864bba12004-07-10 22:02:11 +00002302 self.fileobj.seek(self.offset)
Lars Gustäbel9520a432009-11-22 18:48:49 +00002303 tarinfo = None
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002304 while True:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002305 try:
Guido van Rossumd8faa362007-04-27 19:54:29 +00002306 tarinfo = self.tarinfo.fromtarfile(self)
Lars Gustäbel9520a432009-11-22 18:48:49 +00002307 except EOFHeaderError as e:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002308 if self.ignore_zeros:
Thomas Wouters902d6eb2007-01-09 23:18:33 +00002309 self._dbg(2, "0x%X: %s" % (self.offset, e))
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002310 self.offset += BLOCKSIZE
2311 continue
Lars Gustäbel9520a432009-11-22 18:48:49 +00002312 except InvalidHeaderError as e:
2313 if self.ignore_zeros:
2314 self._dbg(2, "0x%X: %s" % (self.offset, e))
2315 self.offset += BLOCKSIZE
2316 continue
2317 elif self.offset == 0:
2318 raise ReadError(str(e))
2319 except EmptyHeaderError:
2320 if self.offset == 0:
2321 raise ReadError("empty file")
2322 except TruncatedHeaderError as e:
2323 if self.offset == 0:
2324 raise ReadError(str(e))
2325 except SubsequentHeaderError as e:
2326 raise ReadError(str(e))
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002327 break
2328
Lars Gustäbel9520a432009-11-22 18:48:49 +00002329 if tarinfo is not None:
2330 self.members.append(tarinfo)
2331 else:
2332 self._loaded = True
2333
Thomas Wouters477c8d52006-05-27 19:21:47 +00002334 return tarinfo
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002335
2336 #--------------------------------------------------------------------------
2337 # Little helper methods:
2338
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002339 def _getmember(self, name, tarinfo=None):
2340 """Find an archive member by name from bottom to top.
2341 If tarinfo is given, it is used as the starting point.
2342 """
Martin v. Löwisf3c56112004-09-18 09:08:52 +00002343 # Ensure that all members have been loaded.
2344 members = self.getmembers()
2345
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002346 if tarinfo is None:
Martin v. Löwisf3c56112004-09-18 09:08:52 +00002347 end = len(members)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002348 else:
Martin v. Löwisf3c56112004-09-18 09:08:52 +00002349 end = members.index(tarinfo)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002350
Guido van Rossum805365e2007-05-07 22:24:25 +00002351 for i in range(end - 1, -1, -1):
Martin v. Löwisf3c56112004-09-18 09:08:52 +00002352 if name == members[i].name:
2353 return members[i]
Andrew M. Kuchling864bba12004-07-10 22:02:11 +00002354
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002355 def _load(self):
2356 """Read through the entire archive file and look for readable
2357 members.
2358 """
2359 while True:
2360 tarinfo = self.next()
2361 if tarinfo is None:
2362 break
2363 self._loaded = True
2364
2365 def _check(self, mode=None):
2366 """Check if TarFile is still open, and if the operation's mode
2367 corresponds to TarFile's mode.
2368 """
2369 if self.closed:
Thomas Wouters477c8d52006-05-27 19:21:47 +00002370 raise IOError("%s is closed" % self.__class__.__name__)
Guido van Rossumd8faa362007-04-27 19:54:29 +00002371 if mode is not None and self.mode not in mode:
2372 raise IOError("bad operation for mode %r" % self.mode)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002373
2374 def __iter__(self):
2375 """Provide an iterator object.
2376 """
2377 if self._loaded:
2378 return iter(self.members)
2379 else:
2380 return TarIter(self)
2381
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002382 def _dbg(self, level, msg):
2383 """Write debugging output to sys.stderr.
2384 """
2385 if level <= self.debug:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00002386 print(msg, file=sys.stderr)
Lars Gustäbel01385812010-03-03 12:08:54 +00002387
2388 def __enter__(self):
2389 self._check()
2390 return self
2391
2392 def __exit__(self, type, value, traceback):
2393 if type is None:
2394 self.close()
2395 else:
2396 # An exception occurred. We must not call close() because
2397 # it would try to write end-of-archive blocks and padding.
2398 if not self._extfileobj:
2399 self.fileobj.close()
2400 self.closed = True
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002401# class TarFile
2402
2403class TarIter:
2404 """Iterator Class.
2405
2406 for tarinfo in TarFile(...):
2407 suite...
2408 """
2409
2410 def __init__(self, tarfile):
2411 """Construct a TarIter object.
2412 """
2413 self.tarfile = tarfile
Martin v. Löwis637431b2005-03-03 23:12:42 +00002414 self.index = 0
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002415 def __iter__(self):
2416 """Return iterator object.
2417 """
2418 return self
Georg Brandla18af4e2007-04-21 15:47:16 +00002419 def __next__(self):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002420 """Return the next item using TarFile's next() method.
2421 When all members have been read, set TarFile as _loaded.
2422 """
Martin v. Löwis637431b2005-03-03 23:12:42 +00002423 # Fix for SF #1100429: Under rare circumstances it can
2424 # happen that getmembers() is called during iteration,
2425 # which will cause TarIter to stop prematurely.
2426 if not self.tarfile._loaded:
2427 tarinfo = self.tarfile.next()
2428 if not tarinfo:
2429 self.tarfile._loaded = True
2430 raise StopIteration
2431 else:
2432 try:
2433 tarinfo = self.tarfile.members[self.index]
2434 except IndexError:
2435 raise StopIteration
2436 self.index += 1
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002437 return tarinfo
2438
2439# Helper classes for sparse file support
2440class _section:
2441 """Base class for _data and _hole.
2442 """
2443 def __init__(self, offset, size):
2444 self.offset = offset
2445 self.size = size
2446 def __contains__(self, offset):
2447 return self.offset <= offset < self.offset + self.size
2448
2449class _data(_section):
2450 """Represent a data section in a sparse file.
2451 """
2452 def __init__(self, offset, size, realpos):
2453 _section.__init__(self, offset, size)
2454 self.realpos = realpos
2455
2456class _hole(_section):
2457 """Represent a hole section in a sparse file.
2458 """
2459 pass
2460
2461class _ringbuffer(list):
2462 """Ringbuffer class which increases performance
2463 over a regular list.
2464 """
2465 def __init__(self):
2466 self.idx = 0
2467 def find(self, offset):
2468 idx = self.idx
2469 while True:
2470 item = self[idx]
2471 if offset in item:
2472 break
2473 idx += 1
2474 if idx == len(self):
2475 idx = 0
2476 if idx == self.idx:
2477 # End of File
2478 return None
2479 self.idx = idx
2480 return item
2481
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002482#--------------------
2483# exported functions
2484#--------------------
2485def is_tarfile(name):
2486 """Return True if name points to a tar archive that we
2487 are able to handle, else return False.
2488 """
2489 try:
2490 t = open(name)
2491 t.close()
2492 return True
2493 except TarError:
2494 return False
2495
Guido van Rossume7ba4952007-06-06 23:52:48 +00002496bltn_open = open
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002497open = TarFile.open