blob: 8559e96fada60a45e8729e4dce4674b312b94193 [file] [log] [blame]
Benjamin Peterson90f5ba52010-03-11 22:53:45 +00001#!/usr/bin/env python3
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002#-------------------------------------------------------------------
3# tarfile.py
4#-------------------------------------------------------------------
Christian Heimes9c1257e2007-11-04 11:37:22 +00005# Copyright (C) 2002 Lars Gustaebel <lars@gustaebel.de>
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00006# All rights reserved.
7#
8# Permission is hereby granted, free of charge, to any person
9# obtaining a copy of this software and associated documentation
10# files (the "Software"), to deal in the Software without
11# restriction, including without limitation the rights to use,
12# copy, modify, merge, publish, distribute, sublicense, and/or sell
13# copies of the Software, and to permit persons to whom the
14# Software is furnished to do so, subject to the following
15# conditions:
16#
17# The above copyright notice and this permission notice shall be
18# included in all copies or substantial portions of the Software.
19#
20# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
21# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
22# OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
23# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
24# HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
25# WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
26# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
27# OTHER DEALINGS IN THE SOFTWARE.
28#
29"""Read from and write to tar format archives.
30"""
31
32__version__ = "$Revision$"
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +000033
Guido van Rossumd8faa362007-04-27 19:54:29 +000034version = "0.9.0"
Guido van Rossum98297ee2007-11-06 21:34:58 +000035__author__ = "Lars Gust\u00e4bel (lars@gustaebel.de)"
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +000036__date__ = "$Date$"
37__cvsid__ = "$Id$"
Guido van Rossum98297ee2007-11-06 21:34:58 +000038__credits__ = "Gustavo Niemeyer, Niels Gust\u00e4bel, Richard Townsend."
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +000039
40#---------
41# Imports
42#---------
43import sys
44import os
45import shutil
46import stat
47import errno
48import time
49import struct
Thomas Wouters89f507f2006-12-13 04:49:30 +000050import copy
Guido van Rossumd8faa362007-04-27 19:54:29 +000051import re
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +000052
53try:
54 import grp, pwd
55except ImportError:
56 grp = pwd = None
57
58# from tarfile import *
59__all__ = ["TarFile", "TarInfo", "is_tarfile", "TarError"]
60
Georg Brandl1a3284e2007-12-02 09:40:06 +000061from builtins import open as _open # Since 'open' is TarFile.open
Guido van Rossum8f78fe92006-08-24 04:03:53 +000062
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +000063#---------------------------------------------------------
64# tar constants
65#---------------------------------------------------------
Lars Gustäbelb506dc32007-08-07 18:36:16 +000066NUL = b"\0" # the null character
Guido van Rossumd8faa362007-04-27 19:54:29 +000067BLOCKSIZE = 512 # length of processing blocks
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +000068RECORDSIZE = BLOCKSIZE * 20 # length of records
Lars Gustäbelb506dc32007-08-07 18:36:16 +000069GNU_MAGIC = b"ustar \0" # magic gnu tar string
70POSIX_MAGIC = b"ustar\x0000" # magic posix tar string
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +000071
Guido van Rossumd8faa362007-04-27 19:54:29 +000072LENGTH_NAME = 100 # maximum length of a filename
73LENGTH_LINK = 100 # maximum length of a linkname
74LENGTH_PREFIX = 155 # maximum length of the prefix field
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +000075
Lars Gustäbelb506dc32007-08-07 18:36:16 +000076REGTYPE = b"0" # regular file
77AREGTYPE = b"\0" # regular file
78LNKTYPE = b"1" # link (inside tarfile)
79SYMTYPE = b"2" # symbolic link
80CHRTYPE = b"3" # character special device
81BLKTYPE = b"4" # block special device
82DIRTYPE = b"5" # directory
83FIFOTYPE = b"6" # fifo special device
84CONTTYPE = b"7" # contiguous file
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +000085
Lars Gustäbelb506dc32007-08-07 18:36:16 +000086GNUTYPE_LONGNAME = b"L" # GNU tar longname
87GNUTYPE_LONGLINK = b"K" # GNU tar longlink
88GNUTYPE_SPARSE = b"S" # GNU tar sparse file
Guido van Rossumd8faa362007-04-27 19:54:29 +000089
Lars Gustäbelb506dc32007-08-07 18:36:16 +000090XHDTYPE = b"x" # POSIX.1-2001 extended header
91XGLTYPE = b"g" # POSIX.1-2001 global header
92SOLARIS_XHDTYPE = b"X" # Solaris extended header
Guido van Rossumd8faa362007-04-27 19:54:29 +000093
94USTAR_FORMAT = 0 # POSIX.1-1988 (ustar) format
95GNU_FORMAT = 1 # GNU tar format
96PAX_FORMAT = 2 # POSIX.1-2001 (pax) format
97DEFAULT_FORMAT = GNU_FORMAT
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +000098
99#---------------------------------------------------------
100# tarfile constants
101#---------------------------------------------------------
Guido van Rossumd8faa362007-04-27 19:54:29 +0000102# File types that tarfile supports:
103SUPPORTED_TYPES = (REGTYPE, AREGTYPE, LNKTYPE,
104 SYMTYPE, DIRTYPE, FIFOTYPE,
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000105 CONTTYPE, CHRTYPE, BLKTYPE,
106 GNUTYPE_LONGNAME, GNUTYPE_LONGLINK,
107 GNUTYPE_SPARSE)
108
Guido van Rossumd8faa362007-04-27 19:54:29 +0000109# File types that will be treated as a regular file.
110REGULAR_TYPES = (REGTYPE, AREGTYPE,
111 CONTTYPE, GNUTYPE_SPARSE)
112
113# File types that are part of the GNU tar format.
114GNU_TYPES = (GNUTYPE_LONGNAME, GNUTYPE_LONGLINK,
115 GNUTYPE_SPARSE)
116
117# Fields from a pax header that override a TarInfo attribute.
118PAX_FIELDS = ("path", "linkpath", "size", "mtime",
119 "uid", "gid", "uname", "gname")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000120
Lars Gustäbel1465cc22010-05-17 18:02:50 +0000121# Fields from a pax header that are affected by hdrcharset.
122PAX_NAME_FIELDS = {"path", "linkpath", "uname", "gname"}
123
Guido van Rossume7ba4952007-06-06 23:52:48 +0000124# Fields in a pax header that are numbers, all other fields
125# are treated as strings.
126PAX_NUMBER_FIELDS = {
127 "atime": float,
128 "ctime": float,
129 "mtime": float,
130 "uid": int,
131 "gid": int,
132 "size": int
133}
134
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000135#---------------------------------------------------------
136# Bits used in the mode field, values in octal.
137#---------------------------------------------------------
Guido van Rossumcd16bf62007-06-13 18:07:49 +0000138S_IFLNK = 0o120000 # symbolic link
139S_IFREG = 0o100000 # regular file
140S_IFBLK = 0o060000 # block device
141S_IFDIR = 0o040000 # directory
142S_IFCHR = 0o020000 # character device
143S_IFIFO = 0o010000 # fifo
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000144
Guido van Rossumcd16bf62007-06-13 18:07:49 +0000145TSUID = 0o4000 # set UID on execution
146TSGID = 0o2000 # set GID on execution
147TSVTX = 0o1000 # reserved
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000148
Guido van Rossumcd16bf62007-06-13 18:07:49 +0000149TUREAD = 0o400 # read by owner
150TUWRITE = 0o200 # write by owner
151TUEXEC = 0o100 # execute/search by owner
152TGREAD = 0o040 # read by group
153TGWRITE = 0o020 # write by group
154TGEXEC = 0o010 # execute/search by group
155TOREAD = 0o004 # read by other
156TOWRITE = 0o002 # write by other
157TOEXEC = 0o001 # execute/search by other
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000158
159#---------------------------------------------------------
Guido van Rossumd8faa362007-04-27 19:54:29 +0000160# initialization
161#---------------------------------------------------------
162ENCODING = sys.getfilesystemencoding()
163if ENCODING is None:
Lars Gustäbel3741eff2007-08-21 12:17:05 +0000164 ENCODING = "ascii"
Guido van Rossumd8faa362007-04-27 19:54:29 +0000165
166#---------------------------------------------------------
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000167# Some useful functions
168#---------------------------------------------------------
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000169
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000170def stn(s, length, encoding, errors):
171 """Convert a string to a null-terminated bytes object.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000172 """
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000173 s = s.encode(encoding, errors)
Thomas Wouters89f507f2006-12-13 04:49:30 +0000174 return s[:length] + (length - len(s)) * NUL
Thomas Wouters477c8d52006-05-27 19:21:47 +0000175
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000176def nts(s, encoding, errors):
177 """Convert a null-terminated bytes object to a string.
Guido van Rossumd8faa362007-04-27 19:54:29 +0000178 """
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000179 p = s.find(b"\0")
180 if p != -1:
181 s = s[:p]
182 return s.decode(encoding, errors)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000183
Thomas Wouters477c8d52006-05-27 19:21:47 +0000184def nti(s):
185 """Convert a number field to a python number.
186 """
187 # There are two possible encodings for a number field, see
188 # itn() below.
Guido van Rossumcd16bf62007-06-13 18:07:49 +0000189 if s[0] != chr(0o200):
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000190 try:
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000191 n = int(nts(s, "ascii", "strict") or "0", 8)
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000192 except ValueError:
Lars Gustäbel9520a432009-11-22 18:48:49 +0000193 raise InvalidHeaderError("invalid header")
Thomas Wouters477c8d52006-05-27 19:21:47 +0000194 else:
Guido van Rossume2a383d2007-01-15 16:59:06 +0000195 n = 0
Guido van Rossum805365e2007-05-07 22:24:25 +0000196 for i in range(len(s) - 1):
Thomas Wouters477c8d52006-05-27 19:21:47 +0000197 n <<= 8
198 n += ord(s[i + 1])
199 return n
200
Guido van Rossumd8faa362007-04-27 19:54:29 +0000201def itn(n, digits=8, format=DEFAULT_FORMAT):
Thomas Wouters477c8d52006-05-27 19:21:47 +0000202 """Convert a python number to a number field.
203 """
204 # POSIX 1003.1-1988 requires numbers to be encoded as a string of
205 # octal digits followed by a null-byte, this allows values up to
206 # (8**(digits-1))-1. GNU tar allows storing numbers greater than
Guido van Rossumcd16bf62007-06-13 18:07:49 +0000207 # that if necessary. A leading 0o200 byte indicates this particular
Thomas Wouters477c8d52006-05-27 19:21:47 +0000208 # encoding, the following digits-1 bytes are a big-endian
209 # representation. This allows values up to (256**(digits-1))-1.
210 if 0 <= n < 8 ** (digits - 1):
Lars Gustäbela280ca752007-08-28 07:34:33 +0000211 s = bytes("%0*o" % (digits - 1, n), "ascii") + NUL
Thomas Wouters477c8d52006-05-27 19:21:47 +0000212 else:
Guido van Rossumd8faa362007-04-27 19:54:29 +0000213 if format != GNU_FORMAT or n >= 256 ** (digits - 1):
Thomas Wouters477c8d52006-05-27 19:21:47 +0000214 raise ValueError("overflow in number field")
215
216 if n < 0:
217 # XXX We mimic GNU tar's behaviour with negative numbers,
218 # this could raise OverflowError.
219 n = struct.unpack("L", struct.pack("l", n))[0]
220
Guido van Rossum254348e2007-11-21 19:29:53 +0000221 s = bytearray()
Guido van Rossum805365e2007-05-07 22:24:25 +0000222 for i in range(digits - 1):
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000223 s.insert(0, n & 0o377)
Thomas Wouters477c8d52006-05-27 19:21:47 +0000224 n >>= 8
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000225 s.insert(0, 0o200)
Thomas Wouters477c8d52006-05-27 19:21:47 +0000226 return s
227
228def calc_chksums(buf):
229 """Calculate the checksum for a member's header by summing up all
230 characters except for the chksum field which is treated as if
231 it was filled with spaces. According to the GNU tar sources,
232 some tars (Sun and NeXT) calculate chksum with signed char,
233 which will be different if there are chars in the buffer with
234 the high bit set. So we calculate two checksums, unsigned and
235 signed.
236 """
237 unsigned_chksum = 256 + sum(struct.unpack("148B", buf[:148]) + struct.unpack("356B", buf[156:512]))
238 signed_chksum = 256 + sum(struct.unpack("148b", buf[:148]) + struct.unpack("356b", buf[156:512]))
239 return unsigned_chksum, signed_chksum
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000240
241def copyfileobj(src, dst, length=None):
242 """Copy length bytes from fileobj src to fileobj dst.
243 If length is None, copy the entire content.
244 """
245 if length == 0:
246 return
247 if length is None:
248 shutil.copyfileobj(src, dst)
249 return
250
251 BUFSIZE = 16 * 1024
252 blocks, remainder = divmod(length, BUFSIZE)
Guido van Rossum805365e2007-05-07 22:24:25 +0000253 for b in range(blocks):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000254 buf = src.read(BUFSIZE)
255 if len(buf) < BUFSIZE:
Thomas Wouters477c8d52006-05-27 19:21:47 +0000256 raise IOError("end of file reached")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000257 dst.write(buf)
258
259 if remainder != 0:
260 buf = src.read(remainder)
261 if len(buf) < remainder:
Thomas Wouters477c8d52006-05-27 19:21:47 +0000262 raise IOError("end of file reached")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000263 dst.write(buf)
264 return
265
266filemode_table = (
Andrew M. Kuchling8bc462f2004-10-20 11:48:42 +0000267 ((S_IFLNK, "l"),
268 (S_IFREG, "-"),
269 (S_IFBLK, "b"),
270 (S_IFDIR, "d"),
271 (S_IFCHR, "c"),
272 (S_IFIFO, "p")),
273
274 ((TUREAD, "r"),),
275 ((TUWRITE, "w"),),
276 ((TUEXEC|TSUID, "s"),
277 (TSUID, "S"),
278 (TUEXEC, "x")),
279
280 ((TGREAD, "r"),),
281 ((TGWRITE, "w"),),
282 ((TGEXEC|TSGID, "s"),
283 (TSGID, "S"),
284 (TGEXEC, "x")),
285
286 ((TOREAD, "r"),),
287 ((TOWRITE, "w"),),
288 ((TOEXEC|TSVTX, "t"),
289 (TSVTX, "T"),
290 (TOEXEC, "x"))
291)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000292
293def filemode(mode):
294 """Convert a file's mode to a string of the form
295 -rwxrwxrwx.
296 Used by TarFile.list()
297 """
Andrew M. Kuchling8bc462f2004-10-20 11:48:42 +0000298 perm = []
299 for table in filemode_table:
300 for bit, char in table:
301 if mode & bit == bit:
302 perm.append(char)
303 break
304 else:
305 perm.append("-")
306 return "".join(perm)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000307
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000308class TarError(Exception):
309 """Base exception."""
310 pass
311class ExtractError(TarError):
312 """General exception for extract errors."""
313 pass
314class ReadError(TarError):
315 """Exception for unreadble tar archives."""
316 pass
317class CompressionError(TarError):
318 """Exception for unavailable compression methods."""
319 pass
320class StreamError(TarError):
321 """Exception for unsupported operations on stream-like TarFiles."""
322 pass
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000323class HeaderError(TarError):
Lars Gustäbel9520a432009-11-22 18:48:49 +0000324 """Base exception for header errors."""
325 pass
326class EmptyHeaderError(HeaderError):
327 """Exception for empty headers."""
328 pass
329class TruncatedHeaderError(HeaderError):
330 """Exception for truncated headers."""
331 pass
332class EOFHeaderError(HeaderError):
333 """Exception for end of file headers."""
334 pass
335class InvalidHeaderError(HeaderError):
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000336 """Exception for invalid headers."""
337 pass
Lars Gustäbel9520a432009-11-22 18:48:49 +0000338class SubsequentHeaderError(HeaderError):
339 """Exception for missing and invalid extended headers."""
340 pass
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000341
342#---------------------------
343# internal stream interface
344#---------------------------
345class _LowLevelFile:
346 """Low-level file object. Supports reading and writing.
347 It is used instead of a regular file object for streaming
348 access.
349 """
350
351 def __init__(self, name, mode):
352 mode = {
353 "r": os.O_RDONLY,
354 "w": os.O_WRONLY | os.O_CREAT | os.O_TRUNC,
355 }[mode]
356 if hasattr(os, "O_BINARY"):
357 mode |= os.O_BINARY
Lars Gustäbeld6eb70b2010-04-29 15:37:02 +0000358 self.fd = os.open(name, mode, 0o666)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000359
360 def close(self):
361 os.close(self.fd)
362
363 def read(self, size):
364 return os.read(self.fd, size)
365
366 def write(self, s):
367 os.write(self.fd, s)
368
369class _Stream:
370 """Class that serves as an adapter between TarFile and
371 a stream-like object. The stream-like object only
372 needs to have a read() or write() method and is accessed
373 blockwise. Use of gzip or bzip2 compression is possible.
374 A stream-like object could be for example: sys.stdin,
375 sys.stdout, a socket, a tape device etc.
376
377 _Stream is intended to be used only internally.
378 """
379
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000380 def __init__(self, name, mode, comptype, fileobj, bufsize):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000381 """Construct a _Stream object.
382 """
383 self._extfileobj = True
384 if fileobj is None:
385 fileobj = _LowLevelFile(name, mode)
386 self._extfileobj = False
387
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000388 if comptype == '*':
389 # Enable transparent compression detection for the
390 # stream interface
391 fileobj = _StreamProxy(fileobj)
392 comptype = fileobj.getcomptype()
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000393
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000394 self.name = name or ""
395 self.mode = mode
396 self.comptype = comptype
397 self.fileobj = fileobj
398 self.bufsize = bufsize
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000399 self.buf = b""
Guido van Rossume2a383d2007-01-15 16:59:06 +0000400 self.pos = 0
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000401 self.closed = False
402
403 if comptype == "gz":
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000404 try:
405 import zlib
406 except ImportError:
Thomas Wouters477c8d52006-05-27 19:21:47 +0000407 raise CompressionError("zlib module is not available")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000408 self.zlib = zlib
Antoine Pitrou77b338b2009-12-14 18:00:06 +0000409 self.crc = zlib.crc32(b"")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000410 if mode == "r":
411 self._init_read_gz()
412 else:
413 self._init_write_gz()
414
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000415 if comptype == "bz2":
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000416 try:
417 import bz2
418 except ImportError:
Thomas Wouters477c8d52006-05-27 19:21:47 +0000419 raise CompressionError("bz2 module is not available")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000420 if mode == "r":
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000421 self.dbuf = b""
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000422 self.cmp = bz2.BZ2Decompressor()
423 else:
424 self.cmp = bz2.BZ2Compressor()
425
426 def __del__(self):
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000427 if hasattr(self, "closed") and not self.closed:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000428 self.close()
429
430 def _init_write_gz(self):
431 """Initialize for writing with gzip compression.
432 """
433 self.cmp = self.zlib.compressobj(9, self.zlib.DEFLATED,
434 -self.zlib.MAX_WBITS,
435 self.zlib.DEF_MEM_LEVEL,
436 0)
Guido van Rossume2a383d2007-01-15 16:59:06 +0000437 timestamp = struct.pack("<L", int(time.time()))
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000438 self.__write(b"\037\213\010\010" + timestamp + b"\002\377")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000439 if self.name.endswith(".gz"):
440 self.name = self.name[:-3]
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000441 # RFC1952 says we must use ISO-8859-1 for the FNAME field.
442 self.__write(self.name.encode("iso-8859-1", "replace") + NUL)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000443
444 def write(self, s):
445 """Write string s to the stream.
446 """
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000447 if self.comptype == "gz":
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000448 self.crc = self.zlib.crc32(s, self.crc)
449 self.pos += len(s)
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000450 if self.comptype != "tar":
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000451 s = self.cmp.compress(s)
452 self.__write(s)
453
454 def __write(self, s):
455 """Write string s to the stream if a whole new block
456 is ready to be written.
457 """
458 self.buf += s
459 while len(self.buf) > self.bufsize:
460 self.fileobj.write(self.buf[:self.bufsize])
461 self.buf = self.buf[self.bufsize:]
462
463 def close(self):
464 """Close the _Stream object. No operation should be
465 done on it afterwards.
466 """
467 if self.closed:
468 return
469
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000470 if self.mode == "w" and self.comptype != "tar":
Martin v. Löwisc234a522004-08-22 21:28:33 +0000471 self.buf += self.cmp.flush()
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000472
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000473 if self.mode == "w" and self.buf:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000474 self.fileobj.write(self.buf)
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000475 self.buf = b""
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000476 if self.comptype == "gz":
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000477 # The native zlib crc is an unsigned 32-bit integer, but
478 # the Python wrapper implicitly casts that to a signed C
479 # long. So, on a 32-bit box self.crc may "look negative",
480 # while the same crc on a 64-bit box may "look positive".
481 # To avoid irksome warnings from the `struct` module, force
482 # it to look positive on all boxes.
Guido van Rossume2a383d2007-01-15 16:59:06 +0000483 self.fileobj.write(struct.pack("<L", self.crc & 0xffffffff))
484 self.fileobj.write(struct.pack("<L", self.pos & 0xffffFFFF))
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000485
486 if not self._extfileobj:
487 self.fileobj.close()
488
489 self.closed = True
490
491 def _init_read_gz(self):
492 """Initialize for reading a gzip compressed fileobj.
493 """
494 self.cmp = self.zlib.decompressobj(-self.zlib.MAX_WBITS)
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000495 self.dbuf = b""
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000496
497 # taken from gzip.GzipFile with some alterations
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000498 if self.__read(2) != b"\037\213":
Thomas Wouters477c8d52006-05-27 19:21:47 +0000499 raise ReadError("not a gzip file")
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000500 if self.__read(1) != b"\010":
Thomas Wouters477c8d52006-05-27 19:21:47 +0000501 raise CompressionError("unsupported compression method")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000502
503 flag = ord(self.__read(1))
504 self.__read(6)
505
506 if flag & 4:
507 xlen = ord(self.__read(1)) + 256 * ord(self.__read(1))
508 self.read(xlen)
509 if flag & 8:
510 while True:
511 s = self.__read(1)
512 if not s or s == NUL:
513 break
514 if flag & 16:
515 while True:
516 s = self.__read(1)
517 if not s or s == NUL:
518 break
519 if flag & 2:
520 self.__read(2)
521
522 def tell(self):
523 """Return the stream's file pointer position.
524 """
525 return self.pos
526
527 def seek(self, pos=0):
528 """Set the stream's file pointer to pos. Negative seeking
529 is forbidden.
530 """
531 if pos - self.pos >= 0:
532 blocks, remainder = divmod(pos - self.pos, self.bufsize)
Guido van Rossum805365e2007-05-07 22:24:25 +0000533 for i in range(blocks):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000534 self.read(self.bufsize)
535 self.read(remainder)
536 else:
Thomas Wouters477c8d52006-05-27 19:21:47 +0000537 raise StreamError("seeking backwards is not allowed")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000538 return self.pos
539
540 def read(self, size=None):
541 """Return the next size number of bytes from the stream.
542 If size is not defined, return all bytes of the stream
543 up to EOF.
544 """
545 if size is None:
546 t = []
547 while True:
548 buf = self._read(self.bufsize)
549 if not buf:
550 break
551 t.append(buf)
552 buf = "".join(t)
553 else:
554 buf = self._read(size)
555 self.pos += len(buf)
556 return buf
557
558 def _read(self, size):
559 """Return size bytes from the stream.
560 """
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000561 if self.comptype == "tar":
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000562 return self.__read(size)
563
564 c = len(self.dbuf)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000565 while c < size:
566 buf = self.__read(self.bufsize)
567 if not buf:
568 break
Guido van Rossumd8faa362007-04-27 19:54:29 +0000569 try:
570 buf = self.cmp.decompress(buf)
571 except IOError:
572 raise ReadError("invalid compressed data")
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000573 self.dbuf += buf
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000574 c += len(buf)
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000575 buf = self.dbuf[:size]
576 self.dbuf = self.dbuf[size:]
577 return buf
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000578
579 def __read(self, size):
580 """Return size bytes from stream. If internal buffer is empty,
581 read another block from the stream.
582 """
583 c = len(self.buf)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000584 while c < size:
585 buf = self.fileobj.read(self.bufsize)
586 if not buf:
587 break
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000588 self.buf += buf
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000589 c += len(buf)
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000590 buf = self.buf[:size]
591 self.buf = self.buf[size:]
592 return buf
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000593# class _Stream
594
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000595class _StreamProxy(object):
596 """Small proxy class that enables transparent compression
597 detection for the Stream interface (mode 'r|*').
598 """
599
600 def __init__(self, fileobj):
601 self.fileobj = fileobj
602 self.buf = self.fileobj.read(BLOCKSIZE)
603
604 def read(self, size):
605 self.read = self.fileobj.read
606 return self.buf
607
608 def getcomptype(self):
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000609 if self.buf.startswith(b"\037\213\010"):
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000610 return "gz"
Lars Gustäbela280ca752007-08-28 07:34:33 +0000611 if self.buf.startswith(b"BZh91"):
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000612 return "bz2"
613 return "tar"
614
615 def close(self):
616 self.fileobj.close()
617# class StreamProxy
618
Thomas Wouters477c8d52006-05-27 19:21:47 +0000619class _BZ2Proxy(object):
620 """Small proxy class that enables external file object
621 support for "r:bz2" and "w:bz2" modes. This is actually
622 a workaround for a limitation in bz2 module's BZ2File
623 class which (unlike gzip.GzipFile) has no support for
624 a file object argument.
625 """
626
627 blocksize = 16 * 1024
628
629 def __init__(self, fileobj, mode):
630 self.fileobj = fileobj
631 self.mode = mode
Guido van Rossumd8faa362007-04-27 19:54:29 +0000632 self.name = getattr(self.fileobj, "name", None)
Thomas Wouters477c8d52006-05-27 19:21:47 +0000633 self.init()
634
635 def init(self):
636 import bz2
637 self.pos = 0
638 if self.mode == "r":
639 self.bz2obj = bz2.BZ2Decompressor()
640 self.fileobj.seek(0)
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000641 self.buf = b""
Thomas Wouters477c8d52006-05-27 19:21:47 +0000642 else:
643 self.bz2obj = bz2.BZ2Compressor()
644
645 def read(self, size):
Thomas Wouters477c8d52006-05-27 19:21:47 +0000646 x = len(self.buf)
647 while x < size:
Lars Gustäbel42e00912009-03-22 20:34:29 +0000648 raw = self.fileobj.read(self.blocksize)
649 if not raw:
Thomas Wouters477c8d52006-05-27 19:21:47 +0000650 break
Lars Gustäbel42e00912009-03-22 20:34:29 +0000651 data = self.bz2obj.decompress(raw)
652 self.buf += data
Thomas Wouters477c8d52006-05-27 19:21:47 +0000653 x += len(data)
Thomas Wouters477c8d52006-05-27 19:21:47 +0000654
655 buf = self.buf[:size]
656 self.buf = self.buf[size:]
657 self.pos += len(buf)
658 return buf
659
660 def seek(self, pos):
661 if pos < self.pos:
662 self.init()
663 self.read(pos - self.pos)
664
665 def tell(self):
666 return self.pos
667
668 def write(self, data):
669 self.pos += len(data)
670 raw = self.bz2obj.compress(data)
671 self.fileobj.write(raw)
672
673 def close(self):
674 if self.mode == "w":
675 raw = self.bz2obj.flush()
676 self.fileobj.write(raw)
Thomas Wouters477c8d52006-05-27 19:21:47 +0000677# class _BZ2Proxy
678
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000679#------------------------
680# Extraction file object
681#------------------------
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000682class _FileInFile(object):
683 """A thin wrapper around an existing file object that
684 provides a part of its data as an individual file
685 object.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000686 """
687
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000688 def __init__(self, fileobj, offset, size, sparse=None):
689 self.fileobj = fileobj
690 self.offset = offset
691 self.size = size
692 self.sparse = sparse
693 self.position = 0
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000694
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000695 def seekable(self):
696 if not hasattr(self.fileobj, "seekable"):
697 # XXX gzip.GzipFile and bz2.BZ2File
698 return True
699 return self.fileobj.seekable()
700
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000701 def tell(self):
702 """Return the current file position.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000703 """
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000704 return self.position
705
706 def seek(self, position):
707 """Seek to a position in the file.
708 """
709 self.position = position
710
711 def read(self, size=None):
712 """Read data from the file.
713 """
714 if size is None:
715 size = self.size - self.position
716 else:
717 size = min(size, self.size - self.position)
718
719 if self.sparse is None:
720 return self.readnormal(size)
721 else:
722 return self.readsparse(size)
723
724 def readnormal(self, size):
725 """Read operation for regular files.
726 """
727 self.fileobj.seek(self.offset + self.position)
728 self.position += size
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000729 return self.fileobj.read(size)
730
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000731 def readsparse(self, size):
732 """Read operation for sparse files.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000733 """
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000734 data = b""
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000735 while size > 0:
736 buf = self.readsparsesection(size)
737 if not buf:
738 break
739 size -= len(buf)
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000740 data += buf
741 return data
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000742
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000743 def readsparsesection(self, size):
744 """Read a single section of a sparse file.
745 """
746 section = self.sparse.find(self.position)
747
748 if section is None:
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000749 return b""
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000750
751 size = min(size, section.offset + section.size - self.position)
752
753 if isinstance(section, _data):
754 realpos = section.realpos + self.position - section.offset
755 self.fileobj.seek(self.offset + realpos)
756 self.position += size
757 return self.fileobj.read(size)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000758 else:
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000759 self.position += size
760 return NUL * size
761#class _FileInFile
762
763
764class ExFileObject(object):
765 """File-like object for reading an archive member.
766 Is returned by TarFile.extractfile().
767 """
768 blocksize = 1024
769
770 def __init__(self, tarfile, tarinfo):
771 self.fileobj = _FileInFile(tarfile.fileobj,
772 tarinfo.offset_data,
773 tarinfo.size,
Lars Gustäbelc2ea8c62008-04-14 10:05:48 +0000774 tarinfo.sparse)
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000775 self.name = tarinfo.name
776 self.mode = "r"
777 self.closed = False
778 self.size = tarinfo.size
779
780 self.position = 0
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000781 self.buffer = b""
782
783 def readable(self):
784 return True
785
786 def writable(self):
787 return False
788
789 def seekable(self):
790 return self.fileobj.seekable()
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000791
792 def read(self, size=None):
793 """Read at most size bytes from the file. If size is not
794 present or None, read all data until EOF is reached.
795 """
796 if self.closed:
797 raise ValueError("I/O operation on closed file")
798
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000799 buf = b""
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000800 if self.buffer:
801 if size is None:
802 buf = self.buffer
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000803 self.buffer = b""
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000804 else:
805 buf = self.buffer[:size]
806 self.buffer = self.buffer[size:]
807
808 if size is None:
809 buf += self.fileobj.read()
810 else:
811 buf += self.fileobj.read(size - len(buf))
812
813 self.position += len(buf)
814 return buf
815
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000816 # XXX TextIOWrapper uses the read1() method.
817 read1 = read
818
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000819 def readline(self, size=-1):
820 """Read one entire line from the file. If size is present
821 and non-negative, return a string with at most that
822 size, which may be an incomplete line.
823 """
824 if self.closed:
825 raise ValueError("I/O operation on closed file")
826
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000827 pos = self.buffer.find(b"\n") + 1
828 if pos == 0:
829 # no newline found.
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000830 while True:
831 buf = self.fileobj.read(self.blocksize)
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000832 self.buffer += buf
833 if not buf or b"\n" in buf:
834 pos = self.buffer.find(b"\n") + 1
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000835 if pos == 0:
836 # no newline found.
837 pos = len(self.buffer)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000838 break
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000839
840 if size != -1:
841 pos = min(size, pos)
842
843 buf = self.buffer[:pos]
844 self.buffer = self.buffer[pos:]
845 self.position += len(buf)
846 return buf
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000847
848 def readlines(self):
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000849 """Return a list with all remaining lines.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000850 """
851 result = []
852 while True:
853 line = self.readline()
854 if not line: break
855 result.append(line)
856 return result
857
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000858 def tell(self):
859 """Return the current file position.
860 """
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000861 if self.closed:
862 raise ValueError("I/O operation on closed file")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000863
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000864 return self.position
865
866 def seek(self, pos, whence=os.SEEK_SET):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000867 """Seek to a position in the file.
868 """
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000869 if self.closed:
870 raise ValueError("I/O operation on closed file")
871
872 if whence == os.SEEK_SET:
873 self.position = min(max(pos, 0), self.size)
874 elif whence == os.SEEK_CUR:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000875 if pos < 0:
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000876 self.position = max(self.position + pos, 0)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000877 else:
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000878 self.position = min(self.position + pos, self.size)
879 elif whence == os.SEEK_END:
880 self.position = max(min(self.size + pos, self.size), 0)
881 else:
882 raise ValueError("Invalid argument")
883
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000884 self.buffer = b""
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000885 self.fileobj.seek(self.position)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000886
887 def close(self):
888 """Close the file object.
889 """
890 self.closed = True
Martin v. Löwisdf241532005-03-03 08:17:42 +0000891
892 def __iter__(self):
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000893 """Get an iterator over the file's lines.
Martin v. Löwisdf241532005-03-03 08:17:42 +0000894 """
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000895 while True:
896 line = self.readline()
897 if not line:
898 break
899 yield line
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000900#class ExFileObject
901
902#------------------
903# Exported Classes
904#------------------
905class TarInfo(object):
906 """Informational class which holds the details about an
907 archive member given by a tar header block.
908 TarInfo objects are returned by TarFile.getmember(),
909 TarFile.getmembers() and TarFile.gettarinfo() and are
910 usually created internally.
911 """
912
Lars Gustäbelc2ea8c62008-04-14 10:05:48 +0000913 __slots__ = ("name", "mode", "uid", "gid", "size", "mtime",
914 "chksum", "type", "linkname", "uname", "gname",
915 "devmajor", "devminor",
916 "offset", "offset_data", "pax_headers", "sparse",
917 "tarfile", "_sparse_structs", "_link_target")
918
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000919 def __init__(self, name=""):
920 """Construct a TarInfo object. name is the optional name
921 of the member.
922 """
Guido van Rossumd8faa362007-04-27 19:54:29 +0000923 self.name = name # member name
Guido van Rossumcd16bf62007-06-13 18:07:49 +0000924 self.mode = 0o644 # file permissions
Thomas Wouters477c8d52006-05-27 19:21:47 +0000925 self.uid = 0 # user id
926 self.gid = 0 # group id
927 self.size = 0 # file size
928 self.mtime = 0 # modification time
929 self.chksum = 0 # header checksum
930 self.type = REGTYPE # member type
931 self.linkname = "" # link name
Guido van Rossumd8faa362007-04-27 19:54:29 +0000932 self.uname = "root" # user name
933 self.gname = "root" # group name
Thomas Wouters477c8d52006-05-27 19:21:47 +0000934 self.devmajor = 0 # device major number
935 self.devminor = 0 # device minor number
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000936
Thomas Wouters477c8d52006-05-27 19:21:47 +0000937 self.offset = 0 # the tar header starts here
938 self.offset_data = 0 # the file's data starts here
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000939
Lars Gustäbelc2ea8c62008-04-14 10:05:48 +0000940 self.sparse = None # sparse member information
Guido van Rossumd8faa362007-04-27 19:54:29 +0000941 self.pax_headers = {} # pax header information
942
943 # In pax headers the "name" and "linkname" field are called
944 # "path" and "linkpath".
945 def _getpath(self):
946 return self.name
947 def _setpath(self, name):
948 self.name = name
949 path = property(_getpath, _setpath)
950
951 def _getlinkpath(self):
952 return self.linkname
953 def _setlinkpath(self, linkname):
954 self.linkname = linkname
955 linkpath = property(_getlinkpath, _setlinkpath)
956
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000957 def __repr__(self):
958 return "<%s %r at %#x>" % (self.__class__.__name__,self.name,id(self))
959
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000960 def get_info(self):
Guido van Rossumd8faa362007-04-27 19:54:29 +0000961 """Return the TarInfo's attributes as a dictionary.
962 """
963 info = {
Lars Gustäbelbfdfdda2009-08-28 19:59:59 +0000964 "name": self.name,
Guido van Rossumcd16bf62007-06-13 18:07:49 +0000965 "mode": self.mode & 0o7777,
Guido van Rossumd8faa362007-04-27 19:54:29 +0000966 "uid": self.uid,
967 "gid": self.gid,
968 "size": self.size,
969 "mtime": self.mtime,
970 "chksum": self.chksum,
971 "type": self.type,
Lars Gustäbelbfdfdda2009-08-28 19:59:59 +0000972 "linkname": self.linkname,
Guido van Rossumd8faa362007-04-27 19:54:29 +0000973 "uname": self.uname,
974 "gname": self.gname,
975 "devmajor": self.devmajor,
976 "devminor": self.devminor
977 }
978
979 if info["type"] == DIRTYPE and not info["name"].endswith("/"):
980 info["name"] += "/"
981
982 return info
983
Victor Stinnerde629d42010-05-05 21:43:57 +0000984 def tobuf(self, format=DEFAULT_FORMAT, encoding=ENCODING, errors="surrogateescape"):
Guido van Rossumd8faa362007-04-27 19:54:29 +0000985 """Return a tar header as a string of 512 byte blocks.
986 """
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000987 info = self.get_info()
Guido van Rossume7ba4952007-06-06 23:52:48 +0000988
Guido van Rossumd8faa362007-04-27 19:54:29 +0000989 if format == USTAR_FORMAT:
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000990 return self.create_ustar_header(info, encoding, errors)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000991 elif format == GNU_FORMAT:
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000992 return self.create_gnu_header(info, encoding, errors)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000993 elif format == PAX_FORMAT:
Lars Gustäbel1465cc22010-05-17 18:02:50 +0000994 return self.create_pax_header(info, encoding)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000995 else:
996 raise ValueError("invalid format")
997
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000998 def create_ustar_header(self, info, encoding, errors):
Guido van Rossumd8faa362007-04-27 19:54:29 +0000999 """Return the object as a ustar header block.
1000 """
Guido van Rossumd8faa362007-04-27 19:54:29 +00001001 info["magic"] = POSIX_MAGIC
1002
1003 if len(info["linkname"]) > LENGTH_LINK:
1004 raise ValueError("linkname is too long")
1005
1006 if len(info["name"]) > LENGTH_NAME:
1007 info["prefix"], info["name"] = self._posix_split_name(info["name"])
1008
Lars Gustäbelb506dc32007-08-07 18:36:16 +00001009 return self._create_header(info, USTAR_FORMAT, encoding, errors)
Guido van Rossumd8faa362007-04-27 19:54:29 +00001010
Lars Gustäbelb506dc32007-08-07 18:36:16 +00001011 def create_gnu_header(self, info, encoding, errors):
Guido van Rossumd8faa362007-04-27 19:54:29 +00001012 """Return the object as a GNU header block sequence.
1013 """
Guido van Rossumd8faa362007-04-27 19:54:29 +00001014 info["magic"] = GNU_MAGIC
1015
Lars Gustäbelb506dc32007-08-07 18:36:16 +00001016 buf = b""
Guido van Rossumd8faa362007-04-27 19:54:29 +00001017 if len(info["linkname"]) > LENGTH_LINK:
Lars Gustäbelb506dc32007-08-07 18:36:16 +00001018 buf += self._create_gnu_long_header(info["linkname"], GNUTYPE_LONGLINK, encoding, errors)
Guido van Rossumd8faa362007-04-27 19:54:29 +00001019
1020 if len(info["name"]) > LENGTH_NAME:
Lars Gustäbelb506dc32007-08-07 18:36:16 +00001021 buf += self._create_gnu_long_header(info["name"], GNUTYPE_LONGNAME, encoding, errors)
Guido van Rossumd8faa362007-04-27 19:54:29 +00001022
Lars Gustäbelb506dc32007-08-07 18:36:16 +00001023 return buf + self._create_header(info, GNU_FORMAT, encoding, errors)
Guido van Rossumd8faa362007-04-27 19:54:29 +00001024
Lars Gustäbel1465cc22010-05-17 18:02:50 +00001025 def create_pax_header(self, info, encoding):
Guido van Rossumd8faa362007-04-27 19:54:29 +00001026 """Return the object as a ustar header block. If it cannot be
1027 represented this way, prepend a pax extended header sequence
1028 with supplement information.
1029 """
Guido van Rossumd8faa362007-04-27 19:54:29 +00001030 info["magic"] = POSIX_MAGIC
1031 pax_headers = self.pax_headers.copy()
1032
1033 # Test string fields for values that exceed the field length or cannot
1034 # be represented in ASCII encoding.
1035 for name, hname, length in (
1036 ("name", "path", LENGTH_NAME), ("linkname", "linkpath", LENGTH_LINK),
1037 ("uname", "uname", 32), ("gname", "gname", 32)):
1038
Guido van Rossume7ba4952007-06-06 23:52:48 +00001039 if hname in pax_headers:
1040 # The pax header has priority.
1041 continue
1042
Guido van Rossumd8faa362007-04-27 19:54:29 +00001043 # Try to encode the string as ASCII.
1044 try:
Lars Gustäbelb506dc32007-08-07 18:36:16 +00001045 info[name].encode("ascii", "strict")
Guido van Rossumd8faa362007-04-27 19:54:29 +00001046 except UnicodeEncodeError:
Lars Gustäbelb506dc32007-08-07 18:36:16 +00001047 pax_headers[hname] = info[name]
Guido van Rossumd8faa362007-04-27 19:54:29 +00001048 continue
1049
Guido van Rossume7ba4952007-06-06 23:52:48 +00001050 if len(info[name]) > length:
Lars Gustäbelb506dc32007-08-07 18:36:16 +00001051 pax_headers[hname] = info[name]
Guido van Rossumd8faa362007-04-27 19:54:29 +00001052
1053 # Test number fields for values that exceed the field limit or values
1054 # that like to be stored as float.
1055 for name, digits in (("uid", 8), ("gid", 8), ("size", 12), ("mtime", 12)):
Guido van Rossume7ba4952007-06-06 23:52:48 +00001056 if name in pax_headers:
1057 # The pax header has priority. Avoid overflow.
1058 info[name] = 0
1059 continue
1060
Guido van Rossumd8faa362007-04-27 19:54:29 +00001061 val = info[name]
1062 if not 0 <= val < 8 ** (digits - 1) or isinstance(val, float):
Guido van Rossumef87d6e2007-05-02 19:09:54 +00001063 pax_headers[name] = str(val)
Guido van Rossumd8faa362007-04-27 19:54:29 +00001064 info[name] = 0
1065
Guido van Rossume7ba4952007-06-06 23:52:48 +00001066 # Create a pax extended header if necessary.
Guido van Rossumd8faa362007-04-27 19:54:29 +00001067 if pax_headers:
Lars Gustäbel1465cc22010-05-17 18:02:50 +00001068 buf = self._create_pax_generic_header(pax_headers, XHDTYPE, encoding)
Guido van Rossumd8faa362007-04-27 19:54:29 +00001069 else:
Lars Gustäbelb506dc32007-08-07 18:36:16 +00001070 buf = b""
Guido van Rossumd8faa362007-04-27 19:54:29 +00001071
Lars Gustäbel3741eff2007-08-21 12:17:05 +00001072 return buf + self._create_header(info, USTAR_FORMAT, "ascii", "replace")
Guido van Rossumd8faa362007-04-27 19:54:29 +00001073
1074 @classmethod
Lars Gustäbel3741eff2007-08-21 12:17:05 +00001075 def create_pax_global_header(cls, pax_headers):
Guido van Rossumd8faa362007-04-27 19:54:29 +00001076 """Return the object as a pax global header block sequence.
1077 """
Lars Gustäbel1465cc22010-05-17 18:02:50 +00001078 return cls._create_pax_generic_header(pax_headers, XGLTYPE, "utf8")
Guido van Rossumd8faa362007-04-27 19:54:29 +00001079
1080 def _posix_split_name(self, name):
1081 """Split a name longer than 100 chars into a prefix
1082 and a name part.
1083 """
1084 prefix = name[:LENGTH_PREFIX + 1]
1085 while prefix and prefix[-1] != "/":
1086 prefix = prefix[:-1]
1087
1088 name = name[len(prefix):]
1089 prefix = prefix[:-1]
1090
1091 if not prefix or len(name) > LENGTH_NAME:
1092 raise ValueError("name is too long")
1093 return prefix, name
1094
1095 @staticmethod
Lars Gustäbelb506dc32007-08-07 18:36:16 +00001096 def _create_header(info, format, encoding, errors):
Guido van Rossumd8faa362007-04-27 19:54:29 +00001097 """Return a header block. info is a dictionary with file
1098 information, format must be one of the *_FORMAT constants.
1099 """
1100 parts = [
Lars Gustäbelb506dc32007-08-07 18:36:16 +00001101 stn(info.get("name", ""), 100, encoding, errors),
Guido van Rossumcd16bf62007-06-13 18:07:49 +00001102 itn(info.get("mode", 0) & 0o7777, 8, format),
Guido van Rossumd8faa362007-04-27 19:54:29 +00001103 itn(info.get("uid", 0), 8, format),
1104 itn(info.get("gid", 0), 8, format),
1105 itn(info.get("size", 0), 12, format),
1106 itn(info.get("mtime", 0), 12, format),
Lars Gustäbelb506dc32007-08-07 18:36:16 +00001107 b" ", # checksum field
Guido van Rossumd8faa362007-04-27 19:54:29 +00001108 info.get("type", REGTYPE),
Lars Gustäbelb506dc32007-08-07 18:36:16 +00001109 stn(info.get("linkname", ""), 100, encoding, errors),
1110 info.get("magic", POSIX_MAGIC),
1111 stn(info.get("uname", "root"), 32, encoding, errors),
1112 stn(info.get("gname", "root"), 32, encoding, errors),
Guido van Rossumd8faa362007-04-27 19:54:29 +00001113 itn(info.get("devmajor", 0), 8, format),
1114 itn(info.get("devminor", 0), 8, format),
Lars Gustäbelb506dc32007-08-07 18:36:16 +00001115 stn(info.get("prefix", ""), 155, encoding, errors)
Guido van Rossumd8faa362007-04-27 19:54:29 +00001116 ]
1117
Lars Gustäbelb506dc32007-08-07 18:36:16 +00001118 buf = struct.pack("%ds" % BLOCKSIZE, b"".join(parts))
Guido van Rossumd8faa362007-04-27 19:54:29 +00001119 chksum = calc_chksums(buf[-BLOCKSIZE:])[0]
Lars Gustäbela280ca752007-08-28 07:34:33 +00001120 buf = buf[:-364] + bytes("%06o\0" % chksum, "ascii") + buf[-357:]
Guido van Rossumd8faa362007-04-27 19:54:29 +00001121 return buf
1122
1123 @staticmethod
1124 def _create_payload(payload):
1125 """Return the string payload filled with zero bytes
1126 up to the next 512 byte border.
1127 """
1128 blocks, remainder = divmod(len(payload), BLOCKSIZE)
1129 if remainder > 0:
1130 payload += (BLOCKSIZE - remainder) * NUL
1131 return payload
1132
1133 @classmethod
Lars Gustäbelb506dc32007-08-07 18:36:16 +00001134 def _create_gnu_long_header(cls, name, type, encoding, errors):
Guido van Rossumd8faa362007-04-27 19:54:29 +00001135 """Return a GNUTYPE_LONGNAME or GNUTYPE_LONGLINK sequence
1136 for name.
1137 """
Lars Gustäbelb506dc32007-08-07 18:36:16 +00001138 name = name.encode(encoding, errors) + NUL
Guido van Rossumd8faa362007-04-27 19:54:29 +00001139
1140 info = {}
1141 info["name"] = "././@LongLink"
1142 info["type"] = type
1143 info["size"] = len(name)
1144 info["magic"] = GNU_MAGIC
1145
1146 # create extended header + name blocks.
Lars Gustäbelb506dc32007-08-07 18:36:16 +00001147 return cls._create_header(info, USTAR_FORMAT, encoding, errors) + \
Guido van Rossumd8faa362007-04-27 19:54:29 +00001148 cls._create_payload(name)
1149
1150 @classmethod
Lars Gustäbel1465cc22010-05-17 18:02:50 +00001151 def _create_pax_generic_header(cls, pax_headers, type, encoding):
1152 """Return a POSIX.1-2008 extended or global header sequence
Guido van Rossumd8faa362007-04-27 19:54:29 +00001153 that contains a list of keyword, value pairs. The values
Lars Gustäbelb506dc32007-08-07 18:36:16 +00001154 must be strings.
Guido van Rossumd8faa362007-04-27 19:54:29 +00001155 """
Lars Gustäbel1465cc22010-05-17 18:02:50 +00001156 # Check if one of the fields contains surrogate characters and thereby
1157 # forces hdrcharset=BINARY, see _proc_pax() for more information.
1158 binary = False
1159 for keyword, value in pax_headers.items():
1160 try:
1161 value.encode("utf8", "strict")
1162 except UnicodeEncodeError:
1163 binary = True
1164 break
1165
Lars Gustäbelb506dc32007-08-07 18:36:16 +00001166 records = b""
Lars Gustäbel1465cc22010-05-17 18:02:50 +00001167 if binary:
1168 # Put the hdrcharset field at the beginning of the header.
1169 records += b"21 hdrcharset=BINARY\n"
1170
Guido van Rossumd8faa362007-04-27 19:54:29 +00001171 for keyword, value in pax_headers.items():
1172 keyword = keyword.encode("utf8")
Lars Gustäbel1465cc22010-05-17 18:02:50 +00001173 if binary:
1174 # Try to restore the original byte representation of `value'.
1175 # Needless to say, that the encoding must match the string.
1176 value = value.encode(encoding, "surrogateescape")
1177 else:
1178 value = value.encode("utf8")
1179
Guido van Rossumd8faa362007-04-27 19:54:29 +00001180 l = len(keyword) + len(value) + 3 # ' ' + '=' + '\n'
1181 n = p = 0
1182 while True:
1183 n = l + len(str(p))
1184 if n == p:
1185 break
1186 p = n
Lars Gustäbela280ca752007-08-28 07:34:33 +00001187 records += bytes(str(p), "ascii") + b" " + keyword + b"=" + value + b"\n"
Guido van Rossumd8faa362007-04-27 19:54:29 +00001188
1189 # We use a hardcoded "././@PaxHeader" name like star does
1190 # instead of the one that POSIX recommends.
1191 info = {}
1192 info["name"] = "././@PaxHeader"
1193 info["type"] = type
1194 info["size"] = len(records)
1195 info["magic"] = POSIX_MAGIC
1196
1197 # Create pax header + record blocks.
Lars Gustäbel3741eff2007-08-21 12:17:05 +00001198 return cls._create_header(info, USTAR_FORMAT, "ascii", "replace") + \
Guido van Rossumd8faa362007-04-27 19:54:29 +00001199 cls._create_payload(records)
1200
Guido van Rossum75b64e62005-01-16 00:16:11 +00001201 @classmethod
Lars Gustäbelb506dc32007-08-07 18:36:16 +00001202 def frombuf(cls, buf, encoding, errors):
1203 """Construct a TarInfo object from a 512 byte bytes object.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001204 """
Lars Gustäbel9520a432009-11-22 18:48:49 +00001205 if len(buf) == 0:
1206 raise EmptyHeaderError("empty header")
Thomas Wouters477c8d52006-05-27 19:21:47 +00001207 if len(buf) != BLOCKSIZE:
Lars Gustäbel9520a432009-11-22 18:48:49 +00001208 raise TruncatedHeaderError("truncated header")
Thomas Wouters477c8d52006-05-27 19:21:47 +00001209 if buf.count(NUL) == BLOCKSIZE:
Lars Gustäbel9520a432009-11-22 18:48:49 +00001210 raise EOFHeaderError("end of file header")
Thomas Wouters902d6eb2007-01-09 23:18:33 +00001211
1212 chksum = nti(buf[148:156])
1213 if chksum not in calc_chksums(buf):
Lars Gustäbel9520a432009-11-22 18:48:49 +00001214 raise InvalidHeaderError("bad checksum")
Thomas Wouters477c8d52006-05-27 19:21:47 +00001215
Guido van Rossumd8faa362007-04-27 19:54:29 +00001216 obj = cls()
Lars Gustäbelb506dc32007-08-07 18:36:16 +00001217 obj.name = nts(buf[0:100], encoding, errors)
Guido van Rossumd8faa362007-04-27 19:54:29 +00001218 obj.mode = nti(buf[100:108])
1219 obj.uid = nti(buf[108:116])
1220 obj.gid = nti(buf[116:124])
1221 obj.size = nti(buf[124:136])
1222 obj.mtime = nti(buf[136:148])
1223 obj.chksum = chksum
1224 obj.type = buf[156:157]
Lars Gustäbelb506dc32007-08-07 18:36:16 +00001225 obj.linkname = nts(buf[157:257], encoding, errors)
1226 obj.uname = nts(buf[265:297], encoding, errors)
1227 obj.gname = nts(buf[297:329], encoding, errors)
Guido van Rossumd8faa362007-04-27 19:54:29 +00001228 obj.devmajor = nti(buf[329:337])
1229 obj.devminor = nti(buf[337:345])
Lars Gustäbelb506dc32007-08-07 18:36:16 +00001230 prefix = nts(buf[345:500], encoding, errors)
Thomas Wouters89f507f2006-12-13 04:49:30 +00001231
Guido van Rossumd8faa362007-04-27 19:54:29 +00001232 # Old V7 tar format represents a directory as a regular
1233 # file with a trailing slash.
1234 if obj.type == AREGTYPE and obj.name.endswith("/"):
1235 obj.type = DIRTYPE
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001236
Lars Gustäbelc2ea8c62008-04-14 10:05:48 +00001237 # The old GNU sparse format occupies some of the unused
1238 # space in the buffer for up to 4 sparse structures.
1239 # Save the them for later processing in _proc_sparse().
1240 if obj.type == GNUTYPE_SPARSE:
1241 pos = 386
1242 structs = []
1243 for i in range(4):
1244 try:
1245 offset = nti(buf[pos:pos + 12])
1246 numbytes = nti(buf[pos + 12:pos + 24])
1247 except ValueError:
1248 break
1249 structs.append((offset, numbytes))
1250 pos += 24
1251 isextended = bool(buf[482])
1252 origsize = nti(buf[483:495])
1253 obj._sparse_structs = (structs, isextended, origsize)
1254
Guido van Rossumd8faa362007-04-27 19:54:29 +00001255 # Remove redundant slashes from directories.
1256 if obj.isdir():
1257 obj.name = obj.name.rstrip("/")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001258
Guido van Rossumd8faa362007-04-27 19:54:29 +00001259 # Reconstruct a ustar longname.
1260 if prefix and obj.type not in GNU_TYPES:
1261 obj.name = prefix + "/" + obj.name
1262 return obj
1263
1264 @classmethod
1265 def fromtarfile(cls, tarfile):
1266 """Return the next TarInfo object from TarFile object
1267 tarfile.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001268 """
Guido van Rossumd8faa362007-04-27 19:54:29 +00001269 buf = tarfile.fileobj.read(BLOCKSIZE)
Lars Gustäbelb506dc32007-08-07 18:36:16 +00001270 obj = cls.frombuf(buf, tarfile.encoding, tarfile.errors)
Guido van Rossumd8faa362007-04-27 19:54:29 +00001271 obj.offset = tarfile.fileobj.tell() - BLOCKSIZE
1272 return obj._proc_member(tarfile)
Thomas Wouters89f507f2006-12-13 04:49:30 +00001273
Guido van Rossumd8faa362007-04-27 19:54:29 +00001274 #--------------------------------------------------------------------------
1275 # The following are methods that are called depending on the type of a
1276 # member. The entry point is _proc_member() which can be overridden in a
1277 # subclass to add custom _proc_*() methods. A _proc_*() method MUST
1278 # implement the following
1279 # operations:
1280 # 1. Set self.offset_data to the position where the data blocks begin,
1281 # if there is data that follows.
1282 # 2. Set tarfile.offset to the position where the next member's header will
1283 # begin.
1284 # 3. Return self or another valid TarInfo object.
1285 def _proc_member(self, tarfile):
1286 """Choose the right processing method depending on
1287 the type and call it.
Thomas Wouters89f507f2006-12-13 04:49:30 +00001288 """
Guido van Rossumd8faa362007-04-27 19:54:29 +00001289 if self.type in (GNUTYPE_LONGNAME, GNUTYPE_LONGLINK):
1290 return self._proc_gnulong(tarfile)
1291 elif self.type == GNUTYPE_SPARSE:
1292 return self._proc_sparse(tarfile)
1293 elif self.type in (XHDTYPE, XGLTYPE, SOLARIS_XHDTYPE):
1294 return self._proc_pax(tarfile)
1295 else:
1296 return self._proc_builtin(tarfile)
Thomas Wouters89f507f2006-12-13 04:49:30 +00001297
Guido van Rossumd8faa362007-04-27 19:54:29 +00001298 def _proc_builtin(self, tarfile):
1299 """Process a builtin type or an unknown type which
1300 will be treated as a regular file.
1301 """
1302 self.offset_data = tarfile.fileobj.tell()
1303 offset = self.offset_data
1304 if self.isreg() or self.type not in SUPPORTED_TYPES:
1305 # Skip the following data blocks.
1306 offset += self._block(self.size)
1307 tarfile.offset = offset
Thomas Wouters89f507f2006-12-13 04:49:30 +00001308
Guido van Rossume7ba4952007-06-06 23:52:48 +00001309 # Patch the TarInfo object with saved global
Guido van Rossumd8faa362007-04-27 19:54:29 +00001310 # header information.
Guido van Rossume7ba4952007-06-06 23:52:48 +00001311 self._apply_pax_info(tarfile.pax_headers, tarfile.encoding, tarfile.errors)
Guido van Rossumd8faa362007-04-27 19:54:29 +00001312
1313 return self
1314
1315 def _proc_gnulong(self, tarfile):
1316 """Process the blocks that hold a GNU longname
1317 or longlink member.
1318 """
1319 buf = tarfile.fileobj.read(self._block(self.size))
1320
1321 # Fetch the next header and process it.
Lars Gustäbel9520a432009-11-22 18:48:49 +00001322 try:
1323 next = self.fromtarfile(tarfile)
1324 except HeaderError:
1325 raise SubsequentHeaderError("missing or bad subsequent header")
Guido van Rossumd8faa362007-04-27 19:54:29 +00001326
1327 # Patch the TarInfo object from the next header with
1328 # the longname information.
1329 next.offset = self.offset
1330 if self.type == GNUTYPE_LONGNAME:
Lars Gustäbelb506dc32007-08-07 18:36:16 +00001331 next.name = nts(buf, tarfile.encoding, tarfile.errors)
Guido van Rossumd8faa362007-04-27 19:54:29 +00001332 elif self.type == GNUTYPE_LONGLINK:
Lars Gustäbelb506dc32007-08-07 18:36:16 +00001333 next.linkname = nts(buf, tarfile.encoding, tarfile.errors)
Guido van Rossumd8faa362007-04-27 19:54:29 +00001334
1335 return next
1336
1337 def _proc_sparse(self, tarfile):
1338 """Process a GNU sparse header plus extra headers.
1339 """
Lars Gustäbelc2ea8c62008-04-14 10:05:48 +00001340 # We already collected some sparse structures in frombuf().
1341 structs, isextended, origsize = self._sparse_structs
1342 del self._sparse_structs
Guido van Rossumd8faa362007-04-27 19:54:29 +00001343
Lars Gustäbelc2ea8c62008-04-14 10:05:48 +00001344 # Collect sparse structures from extended header blocks.
Lars Gustäbelb506dc32007-08-07 18:36:16 +00001345 while isextended:
Guido van Rossumd8faa362007-04-27 19:54:29 +00001346 buf = tarfile.fileobj.read(BLOCKSIZE)
1347 pos = 0
Guido van Rossum805365e2007-05-07 22:24:25 +00001348 for i in range(21):
Guido van Rossumd8faa362007-04-27 19:54:29 +00001349 try:
1350 offset = nti(buf[pos:pos + 12])
1351 numbytes = nti(buf[pos + 12:pos + 24])
1352 except ValueError:
1353 break
Lars Gustäbelc2ea8c62008-04-14 10:05:48 +00001354 structs.append((offset, numbytes))
Guido van Rossumd8faa362007-04-27 19:54:29 +00001355 pos += 24
Lars Gustäbelb506dc32007-08-07 18:36:16 +00001356 isextended = bool(buf[504])
Guido van Rossumd8faa362007-04-27 19:54:29 +00001357
Lars Gustäbelc2ea8c62008-04-14 10:05:48 +00001358 # Transform the sparse structures to something we can use
1359 # in ExFileObject.
1360 self.sparse = _ringbuffer()
1361 lastpos = 0
1362 realpos = 0
1363 for offset, numbytes in structs:
1364 if offset > lastpos:
1365 self.sparse.append(_hole(lastpos, offset - lastpos))
1366 self.sparse.append(_data(offset, numbytes, realpos))
1367 realpos += numbytes
1368 lastpos = offset + numbytes
Guido van Rossumd8faa362007-04-27 19:54:29 +00001369 if lastpos < origsize:
Lars Gustäbelc2ea8c62008-04-14 10:05:48 +00001370 self.sparse.append(_hole(lastpos, origsize - lastpos))
Guido van Rossumd8faa362007-04-27 19:54:29 +00001371
1372 self.offset_data = tarfile.fileobj.tell()
1373 tarfile.offset = self.offset_data + self._block(self.size)
1374 self.size = origsize
1375
1376 return self
1377
1378 def _proc_pax(self, tarfile):
1379 """Process an extended or global header as described in
Lars Gustäbel1465cc22010-05-17 18:02:50 +00001380 POSIX.1-2008.
Guido van Rossumd8faa362007-04-27 19:54:29 +00001381 """
1382 # Read the header information.
1383 buf = tarfile.fileobj.read(self._block(self.size))
1384
1385 # A pax header stores supplemental information for either
1386 # the following file (extended) or all following files
1387 # (global).
1388 if self.type == XGLTYPE:
1389 pax_headers = tarfile.pax_headers
1390 else:
1391 pax_headers = tarfile.pax_headers.copy()
1392
Lars Gustäbel1465cc22010-05-17 18:02:50 +00001393 # Check if the pax header contains a hdrcharset field. This tells us
1394 # the encoding of the path, linkpath, uname and gname fields. Normally,
1395 # these fields are UTF-8 encoded but since POSIX.1-2008 tar
1396 # implementations are allowed to store them as raw binary strings if
1397 # the translation to UTF-8 fails.
1398 match = re.search(br"\d+ hdrcharset=([^\n]+)\n", buf)
1399 if match is not None:
1400 pax_headers["hdrcharset"] = match.group(1).decode("utf8")
1401
1402 # For the time being, we don't care about anything other than "BINARY".
1403 # The only other value that is currently allowed by the standard is
1404 # "ISO-IR 10646 2000 UTF-8" in other words UTF-8.
1405 hdrcharset = pax_headers.get("hdrcharset")
1406 if hdrcharset == "BINARY":
1407 encoding = tarfile.encoding
1408 else:
1409 encoding = "utf8"
1410
Guido van Rossumd8faa362007-04-27 19:54:29 +00001411 # Parse pax header information. A record looks like that:
1412 # "%d %s=%s\n" % (length, keyword, value). length is the size
1413 # of the complete record including the length field itself and
Guido van Rossume7ba4952007-06-06 23:52:48 +00001414 # the newline. keyword and value are both UTF-8 encoded strings.
Antoine Pitroufd036452008-08-19 17:56:33 +00001415 regex = re.compile(br"(\d+) ([^=]+)=")
Guido van Rossumd8faa362007-04-27 19:54:29 +00001416 pos = 0
1417 while True:
1418 match = regex.match(buf, pos)
1419 if not match:
1420 break
1421
1422 length, keyword = match.groups()
1423 length = int(length)
1424 value = buf[match.end(2) + 1:match.start(1) + length - 1]
1425
Lars Gustäbel1465cc22010-05-17 18:02:50 +00001426 # Normally, we could just use "utf8" as the encoding and "strict"
1427 # as the error handler, but we better not take the risk. For
1428 # example, GNU tar <= 1.23 is known to store filenames it cannot
1429 # translate to UTF-8 as raw strings (unfortunately without a
1430 # hdrcharset=BINARY header).
1431 # We first try the strict standard encoding, and if that fails we
1432 # fall back on the user's encoding and error handler.
1433 keyword = self._decode_pax_field(keyword, "utf8", "utf8",
1434 tarfile.errors)
1435 if keyword in PAX_NAME_FIELDS:
1436 value = self._decode_pax_field(value, encoding, tarfile.encoding,
1437 tarfile.errors)
1438 else:
1439 value = self._decode_pax_field(value, "utf8", "utf8",
1440 tarfile.errors)
Guido van Rossumd8faa362007-04-27 19:54:29 +00001441
1442 pax_headers[keyword] = value
1443 pos += length
1444
Guido van Rossume7ba4952007-06-06 23:52:48 +00001445 # Fetch the next header.
Lars Gustäbel9520a432009-11-22 18:48:49 +00001446 try:
1447 next = self.fromtarfile(tarfile)
1448 except HeaderError:
1449 raise SubsequentHeaderError("missing or bad subsequent header")
Guido van Rossumd8faa362007-04-27 19:54:29 +00001450
Guido van Rossume7ba4952007-06-06 23:52:48 +00001451 if self.type in (XHDTYPE, SOLARIS_XHDTYPE):
Guido van Rossume7ba4952007-06-06 23:52:48 +00001452 # Patch the TarInfo object with the extended header info.
1453 next._apply_pax_info(pax_headers, tarfile.encoding, tarfile.errors)
1454 next.offset = self.offset
1455
1456 if "size" in pax_headers:
1457 # If the extended header replaces the size field,
1458 # we need to recalculate the offset where the next
1459 # header starts.
1460 offset = next.offset_data
1461 if next.isreg() or next.type not in SUPPORTED_TYPES:
1462 offset += next._block(next.size)
1463 tarfile.offset = offset
1464
1465 return next
1466
1467 def _apply_pax_info(self, pax_headers, encoding, errors):
1468 """Replace fields with supplemental information from a previous
1469 pax extended or global header.
1470 """
1471 for keyword, value in pax_headers.items():
1472 if keyword not in PAX_FIELDS:
1473 continue
1474
1475 if keyword == "path":
1476 value = value.rstrip("/")
1477
1478 if keyword in PAX_NUMBER_FIELDS:
1479 try:
1480 value = PAX_NUMBER_FIELDS[keyword](value)
1481 except ValueError:
1482 value = 0
Guido van Rossume7ba4952007-06-06 23:52:48 +00001483
1484 setattr(self, keyword, value)
1485
1486 self.pax_headers = pax_headers.copy()
Guido van Rossumd8faa362007-04-27 19:54:29 +00001487
Lars Gustäbel1465cc22010-05-17 18:02:50 +00001488 def _decode_pax_field(self, value, encoding, fallback_encoding, fallback_errors):
1489 """Decode a single field from a pax record.
1490 """
1491 try:
1492 return value.decode(encoding, "strict")
1493 except UnicodeDecodeError:
1494 return value.decode(fallback_encoding, fallback_errors)
1495
Guido van Rossumd8faa362007-04-27 19:54:29 +00001496 def _block(self, count):
1497 """Round up a byte count by BLOCKSIZE and return it,
1498 e.g. _block(834) => 1024.
1499 """
1500 blocks, remainder = divmod(count, BLOCKSIZE)
1501 if remainder:
1502 blocks += 1
1503 return blocks * BLOCKSIZE
Thomas Wouters89f507f2006-12-13 04:49:30 +00001504
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001505 def isreg(self):
1506 return self.type in REGULAR_TYPES
1507 def isfile(self):
1508 return self.isreg()
1509 def isdir(self):
1510 return self.type == DIRTYPE
1511 def issym(self):
1512 return self.type == SYMTYPE
1513 def islnk(self):
1514 return self.type == LNKTYPE
1515 def ischr(self):
1516 return self.type == CHRTYPE
1517 def isblk(self):
1518 return self.type == BLKTYPE
1519 def isfifo(self):
1520 return self.type == FIFOTYPE
1521 def issparse(self):
1522 return self.type == GNUTYPE_SPARSE
1523 def isdev(self):
1524 return self.type in (CHRTYPE, BLKTYPE, FIFOTYPE)
1525# class TarInfo
1526
1527class TarFile(object):
1528 """The TarFile Class provides an interface to tar archives.
1529 """
1530
1531 debug = 0 # May be set from 0 (no msgs) to 3 (all msgs)
1532
1533 dereference = False # If true, add content of linked file to the
1534 # tar file, else the link.
1535
1536 ignore_zeros = False # If true, skips empty or invalid blocks and
1537 # continues processing.
1538
Lars Gustäbel365aff32009-12-13 11:42:29 +00001539 errorlevel = 1 # If 0, fatal errors only appear in debug
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001540 # messages (if debug >= 0). If > 0, errors
1541 # are passed to the caller as exceptions.
1542
Guido van Rossumd8faa362007-04-27 19:54:29 +00001543 format = DEFAULT_FORMAT # The format to use when creating an archive.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001544
Guido van Rossume7ba4952007-06-06 23:52:48 +00001545 encoding = ENCODING # Encoding for 8-bit character strings.
1546
1547 errors = None # Error handler for unicode conversion.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001548
Guido van Rossumd8faa362007-04-27 19:54:29 +00001549 tarinfo = TarInfo # The default TarInfo class to use.
1550
1551 fileobject = ExFileObject # The default ExFileObject class to use.
1552
1553 def __init__(self, name=None, mode="r", fileobj=None, format=None,
1554 tarinfo=None, dereference=None, ignore_zeros=None, encoding=None,
Victor Stinnerde629d42010-05-05 21:43:57 +00001555 errors="surrogateescape", pax_headers=None, debug=None, errorlevel=None):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001556 """Open an (uncompressed) tar archive `name'. `mode' is either 'r' to
1557 read from an existing archive, 'a' to append data to an existing
1558 file or 'w' to create a new file overwriting an existing one. `mode'
1559 defaults to 'r'.
1560 If `fileobj' is given, it is used for reading or writing data. If it
1561 can be determined, `mode' is overridden by `fileobj's mode.
1562 `fileobj' is not closed, when TarFile is closed.
1563 """
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001564 if len(mode) > 1 or mode not in "raw":
Thomas Wouters477c8d52006-05-27 19:21:47 +00001565 raise ValueError("mode must be 'r', 'a' or 'w'")
Guido van Rossumd8faa362007-04-27 19:54:29 +00001566 self.mode = mode
1567 self._mode = {"r": "rb", "a": "r+b", "w": "wb"}[mode]
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001568
1569 if not fileobj:
Guido van Rossumd8faa362007-04-27 19:54:29 +00001570 if self.mode == "a" and not os.path.exists(name):
Thomas Wouterscf297e42007-02-23 15:07:44 +00001571 # Create nonexistent files in append mode.
Guido van Rossumd8faa362007-04-27 19:54:29 +00001572 self.mode = "w"
1573 self._mode = "wb"
Guido van Rossume7ba4952007-06-06 23:52:48 +00001574 fileobj = bltn_open(name, self._mode)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001575 self._extfileobj = False
1576 else:
Guido van Rossumd8faa362007-04-27 19:54:29 +00001577 if name is None and hasattr(fileobj, "name"):
1578 name = fileobj.name
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001579 if hasattr(fileobj, "mode"):
Guido van Rossumd8faa362007-04-27 19:54:29 +00001580 self._mode = fileobj.mode
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001581 self._extfileobj = True
Thomas Woutersed03b412007-08-28 21:37:11 +00001582 self.name = os.path.abspath(name) if name else None
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001583 self.fileobj = fileobj
1584
Guido van Rossumd8faa362007-04-27 19:54:29 +00001585 # Init attributes.
1586 if format is not None:
1587 self.format = format
1588 if tarinfo is not None:
1589 self.tarinfo = tarinfo
1590 if dereference is not None:
1591 self.dereference = dereference
1592 if ignore_zeros is not None:
1593 self.ignore_zeros = ignore_zeros
1594 if encoding is not None:
1595 self.encoding = encoding
Victor Stinnerde629d42010-05-05 21:43:57 +00001596 self.errors = errors
Guido van Rossume7ba4952007-06-06 23:52:48 +00001597
1598 if pax_headers is not None and self.format == PAX_FORMAT:
1599 self.pax_headers = pax_headers
1600 else:
1601 self.pax_headers = {}
1602
Guido van Rossumd8faa362007-04-27 19:54:29 +00001603 if debug is not None:
1604 self.debug = debug
1605 if errorlevel is not None:
1606 self.errorlevel = errorlevel
1607
1608 # Init datastructures.
Thomas Wouters477c8d52006-05-27 19:21:47 +00001609 self.closed = False
1610 self.members = [] # list of members as TarInfo objects
1611 self._loaded = False # flag if all members have been read
Christian Heimesd8654cf2007-12-02 15:22:16 +00001612 self.offset = self.fileobj.tell()
1613 # current position in the archive file
Thomas Wouters477c8d52006-05-27 19:21:47 +00001614 self.inodes = {} # dictionary caching the inodes of
1615 # archive members already added
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001616
Lars Gustäbel7b465392009-11-18 20:29:25 +00001617 try:
1618 if self.mode == "r":
1619 self.firstmember = None
1620 self.firstmember = self.next()
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001621
Lars Gustäbel7b465392009-11-18 20:29:25 +00001622 if self.mode == "a":
1623 # Move to the end of the archive,
1624 # before the first empty block.
Lars Gustäbel7b465392009-11-18 20:29:25 +00001625 while True:
Lars Gustäbel9520a432009-11-22 18:48:49 +00001626 self.fileobj.seek(self.offset)
1627 try:
1628 tarinfo = self.tarinfo.fromtarfile(self)
1629 self.members.append(tarinfo)
1630 except EOFHeaderError:
1631 self.fileobj.seek(self.offset)
Lars Gustäbel7b465392009-11-18 20:29:25 +00001632 break
Lars Gustäbel9520a432009-11-22 18:48:49 +00001633 except HeaderError as e:
1634 raise ReadError(str(e))
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001635
Lars Gustäbel7b465392009-11-18 20:29:25 +00001636 if self.mode in "aw":
1637 self._loaded = True
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001638
Lars Gustäbel7b465392009-11-18 20:29:25 +00001639 if self.pax_headers:
1640 buf = self.tarinfo.create_pax_global_header(self.pax_headers.copy())
1641 self.fileobj.write(buf)
1642 self.offset += len(buf)
1643 except:
1644 if not self._extfileobj:
1645 self.fileobj.close()
1646 self.closed = True
1647 raise
Guido van Rossumd8faa362007-04-27 19:54:29 +00001648
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001649 #--------------------------------------------------------------------------
1650 # Below are the classmethods which act as alternate constructors to the
1651 # TarFile class. The open() method is the only one that is needed for
1652 # public use; it is the "super"-constructor and is able to select an
1653 # adequate "sub"-constructor for a particular compression using the mapping
1654 # from OPEN_METH.
1655 #
1656 # This concept allows one to subclass TarFile without losing the comfort of
1657 # the super-constructor. A sub-constructor is registered and made available
1658 # by adding it to the mapping in OPEN_METH.
1659
Guido van Rossum75b64e62005-01-16 00:16:11 +00001660 @classmethod
Guido van Rossumd8faa362007-04-27 19:54:29 +00001661 def open(cls, name=None, mode="r", fileobj=None, bufsize=RECORDSIZE, **kwargs):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001662 """Open a tar archive for reading, writing or appending. Return
1663 an appropriate TarFile class.
1664
1665 mode:
Martin v. Löwis78be7df2005-03-05 12:47:42 +00001666 'r' or 'r:*' open for reading with transparent compression
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001667 'r:' open for reading exclusively uncompressed
1668 'r:gz' open for reading with gzip compression
1669 'r:bz2' open for reading with bzip2 compression
Thomas Wouterscf297e42007-02-23 15:07:44 +00001670 'a' or 'a:' open for appending, creating the file if necessary
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001671 'w' or 'w:' open for writing without compression
1672 'w:gz' open for writing with gzip compression
1673 'w:bz2' open for writing with bzip2 compression
Martin v. Löwis78be7df2005-03-05 12:47:42 +00001674
1675 'r|*' open a stream of tar blocks with transparent compression
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001676 'r|' open an uncompressed stream of tar blocks for reading
1677 'r|gz' open a gzip compressed stream of tar blocks
1678 'r|bz2' open a bzip2 compressed stream of tar blocks
1679 'w|' open an uncompressed stream for writing
1680 'w|gz' open a gzip compressed stream for writing
1681 'w|bz2' open a bzip2 compressed stream for writing
1682 """
1683
1684 if not name and not fileobj:
Thomas Wouters477c8d52006-05-27 19:21:47 +00001685 raise ValueError("nothing to open")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001686
Martin v. Löwis78be7df2005-03-05 12:47:42 +00001687 if mode in ("r", "r:*"):
1688 # Find out which *open() is appropriate for opening the file.
1689 for comptype in cls.OPEN_METH:
1690 func = getattr(cls, cls.OPEN_METH[comptype])
Thomas Wouters902d6eb2007-01-09 23:18:33 +00001691 if fileobj is not None:
1692 saved_pos = fileobj.tell()
Martin v. Löwis78be7df2005-03-05 12:47:42 +00001693 try:
Guido van Rossumd8faa362007-04-27 19:54:29 +00001694 return func(name, "r", fileobj, **kwargs)
1695 except (ReadError, CompressionError) as e:
Thomas Wouters902d6eb2007-01-09 23:18:33 +00001696 if fileobj is not None:
1697 fileobj.seek(saved_pos)
Martin v. Löwis78be7df2005-03-05 12:47:42 +00001698 continue
Thomas Wouters477c8d52006-05-27 19:21:47 +00001699 raise ReadError("file could not be opened successfully")
Martin v. Löwis78be7df2005-03-05 12:47:42 +00001700
1701 elif ":" in mode:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001702 filemode, comptype = mode.split(":", 1)
1703 filemode = filemode or "r"
1704 comptype = comptype or "tar"
1705
1706 # Select the *open() function according to
1707 # given compression.
1708 if comptype in cls.OPEN_METH:
1709 func = getattr(cls, cls.OPEN_METH[comptype])
1710 else:
Thomas Wouters477c8d52006-05-27 19:21:47 +00001711 raise CompressionError("unknown compression type %r" % comptype)
Guido van Rossumd8faa362007-04-27 19:54:29 +00001712 return func(name, filemode, fileobj, **kwargs)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001713
1714 elif "|" in mode:
1715 filemode, comptype = mode.split("|", 1)
1716 filemode = filemode or "r"
1717 comptype = comptype or "tar"
1718
1719 if filemode not in "rw":
Thomas Wouters477c8d52006-05-27 19:21:47 +00001720 raise ValueError("mode must be 'r' or 'w'")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001721
1722 t = cls(name, filemode,
Guido van Rossumd8faa362007-04-27 19:54:29 +00001723 _Stream(name, filemode, comptype, fileobj, bufsize),
1724 **kwargs)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001725 t._extfileobj = False
1726 return t
1727
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001728 elif mode in "aw":
Guido van Rossumd8faa362007-04-27 19:54:29 +00001729 return cls.taropen(name, mode, fileobj, **kwargs)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001730
Thomas Wouters477c8d52006-05-27 19:21:47 +00001731 raise ValueError("undiscernible mode")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001732
Guido van Rossum75b64e62005-01-16 00:16:11 +00001733 @classmethod
Guido van Rossumd8faa362007-04-27 19:54:29 +00001734 def taropen(cls, name, mode="r", fileobj=None, **kwargs):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001735 """Open uncompressed tar archive name for reading or writing.
1736 """
1737 if len(mode) > 1 or mode not in "raw":
Thomas Wouters477c8d52006-05-27 19:21:47 +00001738 raise ValueError("mode must be 'r', 'a' or 'w'")
Guido van Rossumd8faa362007-04-27 19:54:29 +00001739 return cls(name, mode, fileobj, **kwargs)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001740
Guido van Rossum75b64e62005-01-16 00:16:11 +00001741 @classmethod
Guido van Rossumd8faa362007-04-27 19:54:29 +00001742 def gzopen(cls, name, mode="r", fileobj=None, compresslevel=9, **kwargs):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001743 """Open gzip compressed tar archive name for reading or writing.
1744 Appending is not allowed.
1745 """
1746 if len(mode) > 1 or mode not in "rw":
Thomas Wouters477c8d52006-05-27 19:21:47 +00001747 raise ValueError("mode must be 'r' or 'w'")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001748
1749 try:
1750 import gzip
Neal Norwitz4ec68242003-04-11 03:05:56 +00001751 gzip.GzipFile
1752 except (ImportError, AttributeError):
Thomas Wouters477c8d52006-05-27 19:21:47 +00001753 raise CompressionError("gzip module is not available")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001754
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001755 if fileobj is None:
Guido van Rossume7ba4952007-06-06 23:52:48 +00001756 fileobj = bltn_open(name, mode + "b")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001757
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001758 try:
Thomas Wouters902d6eb2007-01-09 23:18:33 +00001759 t = cls.taropen(name, mode,
Guido van Rossumd8faa362007-04-27 19:54:29 +00001760 gzip.GzipFile(name, mode, compresslevel, fileobj),
1761 **kwargs)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001762 except IOError:
Thomas Wouters477c8d52006-05-27 19:21:47 +00001763 raise ReadError("not a gzip file")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001764 t._extfileobj = False
1765 return t
1766
Guido van Rossum75b64e62005-01-16 00:16:11 +00001767 @classmethod
Guido van Rossumd8faa362007-04-27 19:54:29 +00001768 def bz2open(cls, name, mode="r", fileobj=None, compresslevel=9, **kwargs):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001769 """Open bzip2 compressed tar archive name for reading or writing.
1770 Appending is not allowed.
1771 """
1772 if len(mode) > 1 or mode not in "rw":
Thomas Wouters477c8d52006-05-27 19:21:47 +00001773 raise ValueError("mode must be 'r' or 'w'.")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001774
1775 try:
1776 import bz2
1777 except ImportError:
Thomas Wouters477c8d52006-05-27 19:21:47 +00001778 raise CompressionError("bz2 module is not available")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001779
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001780 if fileobj is not None:
Thomas Wouters477c8d52006-05-27 19:21:47 +00001781 fileobj = _BZ2Proxy(fileobj, mode)
1782 else:
1783 fileobj = bz2.BZ2File(name, mode, compresslevel=compresslevel)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001784
1785 try:
Guido van Rossumd8faa362007-04-27 19:54:29 +00001786 t = cls.taropen(name, mode, fileobj, **kwargs)
Lars Gustäbel9520a432009-11-22 18:48:49 +00001787 except (IOError, EOFError):
Thomas Wouters477c8d52006-05-27 19:21:47 +00001788 raise ReadError("not a bzip2 file")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001789 t._extfileobj = False
1790 return t
1791
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001792 # All *open() methods are registered here.
1793 OPEN_METH = {
1794 "tar": "taropen", # uncompressed tar
1795 "gz": "gzopen", # gzip compressed tar
1796 "bz2": "bz2open" # bzip2 compressed tar
1797 }
1798
1799 #--------------------------------------------------------------------------
1800 # The public methods which TarFile provides:
1801
1802 def close(self):
1803 """Close the TarFile. In write-mode, two finishing zero blocks are
1804 appended to the archive.
1805 """
1806 if self.closed:
1807 return
1808
Guido van Rossumd8faa362007-04-27 19:54:29 +00001809 if self.mode in "aw":
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001810 self.fileobj.write(NUL * (BLOCKSIZE * 2))
1811 self.offset += (BLOCKSIZE * 2)
1812 # fill up the end with zero-blocks
1813 # (like option -b20 for tar does)
1814 blocks, remainder = divmod(self.offset, RECORDSIZE)
1815 if remainder > 0:
1816 self.fileobj.write(NUL * (RECORDSIZE - remainder))
1817
1818 if not self._extfileobj:
1819 self.fileobj.close()
1820 self.closed = True
1821
1822 def getmember(self, name):
1823 """Return a TarInfo object for member `name'. If `name' can not be
1824 found in the archive, KeyError is raised. If a member occurs more
Mark Dickinson934896d2009-02-21 20:59:32 +00001825 than once in the archive, its last occurrence is assumed to be the
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001826 most up-to-date version.
1827 """
Martin v. Löwisf3c56112004-09-18 09:08:52 +00001828 tarinfo = self._getmember(name)
1829 if tarinfo is None:
Thomas Wouters477c8d52006-05-27 19:21:47 +00001830 raise KeyError("filename %r not found" % name)
Martin v. Löwisf3c56112004-09-18 09:08:52 +00001831 return tarinfo
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001832
1833 def getmembers(self):
1834 """Return the members of the archive as a list of TarInfo objects. The
1835 list has the same order as the members in the archive.
1836 """
1837 self._check()
1838 if not self._loaded: # if we want to obtain a list of
1839 self._load() # all members, we first have to
1840 # scan the whole archive.
1841 return self.members
1842
1843 def getnames(self):
1844 """Return the members of the archive as a list of their names. It has
1845 the same order as the list returned by getmembers().
1846 """
Martin v. Löwisf3c56112004-09-18 09:08:52 +00001847 return [tarinfo.name for tarinfo in self.getmembers()]
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001848
1849 def gettarinfo(self, name=None, arcname=None, fileobj=None):
1850 """Create a TarInfo object for either the file `name' or the file
1851 object `fileobj' (using os.fstat on its file descriptor). You can
1852 modify some of the TarInfo's attributes before you add it using
1853 addfile(). If given, `arcname' specifies an alternative name for the
1854 file in the archive.
1855 """
1856 self._check("aw")
1857
1858 # When fileobj is given, replace name by
1859 # fileobj's real name.
1860 if fileobj is not None:
1861 name = fileobj.name
1862
1863 # Building the name of the member in the archive.
1864 # Backward slashes are converted to forward slashes,
1865 # Absolute paths are turned to relative paths.
1866 if arcname is None:
1867 arcname = name
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001868 drv, arcname = os.path.splitdrive(arcname)
Lars Gustäbelbfdfdda2009-08-28 19:59:59 +00001869 arcname = arcname.replace(os.sep, "/")
1870 arcname = arcname.lstrip("/")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001871
1872 # Now, fill the TarInfo object with
1873 # information specific for the file.
Guido van Rossumd8faa362007-04-27 19:54:29 +00001874 tarinfo = self.tarinfo()
1875 tarinfo.tarfile = self
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001876
1877 # Use os.stat or os.lstat, depending on platform
1878 # and if symlinks shall be resolved.
1879 if fileobj is None:
1880 if hasattr(os, "lstat") and not self.dereference:
1881 statres = os.lstat(name)
1882 else:
1883 statres = os.stat(name)
1884 else:
1885 statres = os.fstat(fileobj.fileno())
1886 linkname = ""
1887
1888 stmd = statres.st_mode
1889 if stat.S_ISREG(stmd):
1890 inode = (statres.st_ino, statres.st_dev)
Guido van Rossumd8faa362007-04-27 19:54:29 +00001891 if not self.dereference and statres.st_nlink > 1 and \
1892 inode in self.inodes and arcname != self.inodes[inode]:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001893 # Is it a hardlink to an already
1894 # archived file?
1895 type = LNKTYPE
1896 linkname = self.inodes[inode]
1897 else:
1898 # The inode is added only if its valid.
1899 # For win32 it is always 0.
1900 type = REGTYPE
1901 if inode[0]:
1902 self.inodes[inode] = arcname
1903 elif stat.S_ISDIR(stmd):
1904 type = DIRTYPE
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001905 elif stat.S_ISFIFO(stmd):
1906 type = FIFOTYPE
1907 elif stat.S_ISLNK(stmd):
1908 type = SYMTYPE
1909 linkname = os.readlink(name)
1910 elif stat.S_ISCHR(stmd):
1911 type = CHRTYPE
1912 elif stat.S_ISBLK(stmd):
1913 type = BLKTYPE
1914 else:
1915 return None
1916
1917 # Fill the TarInfo object with all
1918 # information we can get.
Martin v. Löwis5dbdc592005-08-27 10:07:56 +00001919 tarinfo.name = arcname
1920 tarinfo.mode = stmd
1921 tarinfo.uid = statres.st_uid
1922 tarinfo.gid = statres.st_gid
Lars Gustäbel2470ff12010-06-03 10:11:52 +00001923 if type == REGTYPE:
Martin v. Löwis61d77e02004-08-20 06:35:46 +00001924 tarinfo.size = statres.st_size
Martin v. Löwis5dbdc592005-08-27 10:07:56 +00001925 else:
Guido van Rossume2a383d2007-01-15 16:59:06 +00001926 tarinfo.size = 0
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001927 tarinfo.mtime = statres.st_mtime
Martin v. Löwis5dbdc592005-08-27 10:07:56 +00001928 tarinfo.type = type
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001929 tarinfo.linkname = linkname
1930 if pwd:
1931 try:
1932 tarinfo.uname = pwd.getpwuid(tarinfo.uid)[0]
1933 except KeyError:
1934 pass
1935 if grp:
1936 try:
1937 tarinfo.gname = grp.getgrgid(tarinfo.gid)[0]
1938 except KeyError:
1939 pass
1940
1941 if type in (CHRTYPE, BLKTYPE):
1942 if hasattr(os, "major") and hasattr(os, "minor"):
1943 tarinfo.devmajor = os.major(statres.st_rdev)
1944 tarinfo.devminor = os.minor(statres.st_rdev)
1945 return tarinfo
1946
1947 def list(self, verbose=True):
1948 """Print a table of contents to sys.stdout. If `verbose' is False, only
1949 the names of the members are printed. If it is True, an `ls -l'-like
1950 output is produced.
1951 """
1952 self._check()
1953
1954 for tarinfo in self:
1955 if verbose:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001956 print(filemode(tarinfo.mode), end=' ')
1957 print("%s/%s" % (tarinfo.uname or tarinfo.uid,
1958 tarinfo.gname or tarinfo.gid), end=' ')
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001959 if tarinfo.ischr() or tarinfo.isblk():
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001960 print("%10s" % ("%d,%d" \
1961 % (tarinfo.devmajor, tarinfo.devminor)), end=' ')
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001962 else:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001963 print("%10d" % tarinfo.size, end=' ')
1964 print("%d-%02d-%02d %02d:%02d:%02d" \
1965 % time.localtime(tarinfo.mtime)[:6], end=' ')
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001966
Guido van Rossumd8faa362007-04-27 19:54:29 +00001967 print(tarinfo.name + ("/" if tarinfo.isdir() else ""), end=' ')
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001968
1969 if verbose:
1970 if tarinfo.issym():
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001971 print("->", tarinfo.linkname, end=' ')
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001972 if tarinfo.islnk():
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001973 print("link to", tarinfo.linkname, end=' ')
1974 print()
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001975
Lars Gustäbel049d2aa2009-09-12 10:44:00 +00001976 def add(self, name, arcname=None, recursive=True, exclude=None, filter=None):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001977 """Add the file `name' to the archive. `name' may be any type of file
1978 (directory, fifo, symbolic link, etc.). If given, `arcname'
1979 specifies an alternative name for the file in the archive.
1980 Directories are added recursively by default. This can be avoided by
Guido van Rossum486364b2007-06-30 05:01:58 +00001981 setting `recursive' to False. `exclude' is a function that should
Lars Gustäbel049d2aa2009-09-12 10:44:00 +00001982 return True for each filename to be excluded. `filter' is a function
1983 that expects a TarInfo object argument and returns the changed
1984 TarInfo object, if it returns None the TarInfo object will be
1985 excluded from the archive.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001986 """
1987 self._check("aw")
1988
1989 if arcname is None:
1990 arcname = name
1991
Guido van Rossum486364b2007-06-30 05:01:58 +00001992 # Exclude pathnames.
Lars Gustäbel049d2aa2009-09-12 10:44:00 +00001993 if exclude is not None:
1994 import warnings
1995 warnings.warn("use the filter argument instead",
1996 DeprecationWarning, 2)
1997 if exclude(name):
1998 self._dbg(2, "tarfile: Excluded %r" % name)
1999 return
Guido van Rossum486364b2007-06-30 05:01:58 +00002000
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002001 # Skip if somebody tries to archive the archive...
Thomas Wouters902d6eb2007-01-09 23:18:33 +00002002 if self.name is not None and os.path.abspath(name) == self.name:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002003 self._dbg(2, "tarfile: Skipped %r" % name)
2004 return
2005
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002006 self._dbg(1, name)
2007
2008 # Create a TarInfo object from the file.
2009 tarinfo = self.gettarinfo(name, arcname)
2010
2011 if tarinfo is None:
2012 self._dbg(1, "tarfile: Unsupported type %r" % name)
2013 return
2014
Lars Gustäbel049d2aa2009-09-12 10:44:00 +00002015 # Change or exclude the TarInfo object.
2016 if filter is not None:
2017 tarinfo = filter(tarinfo)
2018 if tarinfo is None:
2019 self._dbg(2, "tarfile: Excluded %r" % name)
2020 return
2021
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002022 # Append the tar header and data to the archive.
2023 if tarinfo.isreg():
Guido van Rossume7ba4952007-06-06 23:52:48 +00002024 f = bltn_open(name, "rb")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002025 self.addfile(tarinfo, f)
2026 f.close()
2027
Martin v. Löwis5dbdc592005-08-27 10:07:56 +00002028 elif tarinfo.isdir():
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002029 self.addfile(tarinfo)
2030 if recursive:
2031 for f in os.listdir(name):
Lars Gustäbel049d2aa2009-09-12 10:44:00 +00002032 self.add(os.path.join(name, f), os.path.join(arcname, f),
2033 recursive, exclude, filter)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002034
Martin v. Löwis5dbdc592005-08-27 10:07:56 +00002035 else:
2036 self.addfile(tarinfo)
2037
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002038 def addfile(self, tarinfo, fileobj=None):
2039 """Add the TarInfo object `tarinfo' to the archive. If `fileobj' is
2040 given, tarinfo.size bytes are read from it and added to the archive.
2041 You can create TarInfo objects using gettarinfo().
2042 On Windows platforms, `fileobj' should always be opened with mode
2043 'rb' to avoid irritation about the file size.
2044 """
2045 self._check("aw")
2046
Thomas Wouters89f507f2006-12-13 04:49:30 +00002047 tarinfo = copy.copy(tarinfo)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002048
Guido van Rossume7ba4952007-06-06 23:52:48 +00002049 buf = tarinfo.tobuf(self.format, self.encoding, self.errors)
Thomas Wouters89f507f2006-12-13 04:49:30 +00002050 self.fileobj.write(buf)
2051 self.offset += len(buf)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002052
2053 # If there's data to follow, append it.
2054 if fileobj is not None:
2055 copyfileobj(fileobj, self.fileobj, tarinfo.size)
2056 blocks, remainder = divmod(tarinfo.size, BLOCKSIZE)
2057 if remainder > 0:
2058 self.fileobj.write(NUL * (BLOCKSIZE - remainder))
2059 blocks += 1
2060 self.offset += blocks * BLOCKSIZE
2061
Martin v. Löwisf3c56112004-09-18 09:08:52 +00002062 self.members.append(tarinfo)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002063
Martin v. Löwis00a73e72005-03-04 19:40:34 +00002064 def extractall(self, path=".", members=None):
2065 """Extract all members from the archive to the current working
2066 directory and set owner, modification time and permissions on
2067 directories afterwards. `path' specifies a different directory
2068 to extract to. `members' is optional and must be a subset of the
2069 list returned by getmembers().
2070 """
2071 directories = []
2072
2073 if members is None:
2074 members = self
2075
2076 for tarinfo in members:
2077 if tarinfo.isdir():
Christian Heimes2202f872008-02-06 14:31:34 +00002078 # Extract directories with a safe mode.
Martin v. Löwis00a73e72005-03-04 19:40:34 +00002079 directories.append(tarinfo)
Christian Heimes2202f872008-02-06 14:31:34 +00002080 tarinfo = copy.copy(tarinfo)
2081 tarinfo.mode = 0o700
2082 self.extract(tarinfo, path)
Martin v. Löwis00a73e72005-03-04 19:40:34 +00002083
2084 # Reverse sort directories.
Raymond Hettingerd4cb56d2008-01-30 02:55:10 +00002085 directories.sort(key=lambda a: a.name)
Martin v. Löwis00a73e72005-03-04 19:40:34 +00002086 directories.reverse()
2087
2088 # Set correct owner, mtime and filemode on directories.
2089 for tarinfo in directories:
Christian Heimesfaf2f632008-01-06 16:59:19 +00002090 dirpath = os.path.join(path, tarinfo.name)
Martin v. Löwis00a73e72005-03-04 19:40:34 +00002091 try:
Christian Heimesfaf2f632008-01-06 16:59:19 +00002092 self.chown(tarinfo, dirpath)
2093 self.utime(tarinfo, dirpath)
2094 self.chmod(tarinfo, dirpath)
Guido van Rossumb940e112007-01-10 16:19:56 +00002095 except ExtractError as e:
Martin v. Löwis00a73e72005-03-04 19:40:34 +00002096 if self.errorlevel > 1:
2097 raise
2098 else:
2099 self._dbg(1, "tarfile: %s" % e)
2100
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002101 def extract(self, member, path=""):
2102 """Extract a member from the archive to the current working directory,
2103 using its full name. Its file information is extracted as accurately
2104 as possible. `member' may be a filename or a TarInfo object. You can
2105 specify a different directory using `path'.
2106 """
2107 self._check("r")
2108
Guido van Rossum3172c5d2007-10-16 18:12:55 +00002109 if isinstance(member, str):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002110 tarinfo = self.getmember(member)
Guido van Rossumd8faa362007-04-27 19:54:29 +00002111 else:
2112 tarinfo = member
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002113
Neal Norwitza4f651a2004-07-20 22:07:44 +00002114 # Prepare the link target for makelink().
2115 if tarinfo.islnk():
2116 tarinfo._link_target = os.path.join(path, tarinfo.linkname)
2117
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002118 try:
2119 self._extract_member(tarinfo, os.path.join(path, tarinfo.name))
Guido van Rossumb940e112007-01-10 16:19:56 +00002120 except EnvironmentError as e:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002121 if self.errorlevel > 0:
2122 raise
2123 else:
2124 if e.filename is None:
2125 self._dbg(1, "tarfile: %s" % e.strerror)
2126 else:
2127 self._dbg(1, "tarfile: %s %r" % (e.strerror, e.filename))
Guido van Rossumb940e112007-01-10 16:19:56 +00002128 except ExtractError as e:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002129 if self.errorlevel > 1:
2130 raise
2131 else:
2132 self._dbg(1, "tarfile: %s" % e)
2133
2134 def extractfile(self, member):
2135 """Extract a member from the archive as a file object. `member' may be
2136 a filename or a TarInfo object. If `member' is a regular file, a
2137 file-like object is returned. If `member' is a link, a file-like
2138 object is constructed from the link's target. If `member' is none of
2139 the above, None is returned.
2140 The file-like object is read-only and provides the following
2141 methods: read(), readline(), readlines(), seek() and tell()
2142 """
2143 self._check("r")
2144
Guido van Rossum3172c5d2007-10-16 18:12:55 +00002145 if isinstance(member, str):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002146 tarinfo = self.getmember(member)
Guido van Rossumd8faa362007-04-27 19:54:29 +00002147 else:
2148 tarinfo = member
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002149
2150 if tarinfo.isreg():
2151 return self.fileobject(self, tarinfo)
2152
2153 elif tarinfo.type not in SUPPORTED_TYPES:
2154 # If a member's type is unknown, it is treated as a
2155 # regular file.
2156 return self.fileobject(self, tarinfo)
2157
2158 elif tarinfo.islnk() or tarinfo.issym():
2159 if isinstance(self.fileobj, _Stream):
2160 # A small but ugly workaround for the case that someone tries
2161 # to extract a (sym)link as a file-object from a non-seekable
2162 # stream of tar blocks.
Thomas Wouters477c8d52006-05-27 19:21:47 +00002163 raise StreamError("cannot extract (sym)link as file object")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002164 else:
Georg Brandl7eb4b7d2005-07-22 21:49:32 +00002165 # A (sym)link's file object is its target's file object.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002166 return self.extractfile(self._getmember(tarinfo.linkname,
2167 tarinfo))
2168 else:
2169 # If there's no data associated with the member (directory, chrdev,
2170 # blkdev, etc.), return None instead of a file object.
2171 return None
2172
2173 def _extract_member(self, tarinfo, targetpath):
2174 """Extract the TarInfo object tarinfo to a physical
2175 file called targetpath.
2176 """
2177 # Fetch the TarInfo object for the given name
2178 # and build the destination pathname, replacing
2179 # forward slashes to platform specific separators.
Lars Gustäbelbfdfdda2009-08-28 19:59:59 +00002180 targetpath = targetpath.rstrip("/")
2181 targetpath = targetpath.replace("/", os.sep)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002182
2183 # Create all upper directories.
2184 upperdirs = os.path.dirname(targetpath)
2185 if upperdirs and not os.path.exists(upperdirs):
Christian Heimes2202f872008-02-06 14:31:34 +00002186 # Create directories that are not part of the archive with
2187 # default permissions.
Thomas Woutersb2137042007-02-01 18:02:27 +00002188 os.makedirs(upperdirs)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002189
2190 if tarinfo.islnk() or tarinfo.issym():
2191 self._dbg(1, "%s -> %s" % (tarinfo.name, tarinfo.linkname))
2192 else:
2193 self._dbg(1, tarinfo.name)
2194
2195 if tarinfo.isreg():
2196 self.makefile(tarinfo, targetpath)
2197 elif tarinfo.isdir():
2198 self.makedir(tarinfo, targetpath)
2199 elif tarinfo.isfifo():
2200 self.makefifo(tarinfo, targetpath)
2201 elif tarinfo.ischr() or tarinfo.isblk():
2202 self.makedev(tarinfo, targetpath)
2203 elif tarinfo.islnk() or tarinfo.issym():
2204 self.makelink(tarinfo, targetpath)
2205 elif tarinfo.type not in SUPPORTED_TYPES:
2206 self.makeunknown(tarinfo, targetpath)
2207 else:
2208 self.makefile(tarinfo, targetpath)
2209
2210 self.chown(tarinfo, targetpath)
2211 if not tarinfo.issym():
2212 self.chmod(tarinfo, targetpath)
2213 self.utime(tarinfo, targetpath)
2214
2215 #--------------------------------------------------------------------------
2216 # Below are the different file methods. They are called via
2217 # _extract_member() when extract() is called. They can be replaced in a
2218 # subclass to implement other functionality.
2219
2220 def makedir(self, tarinfo, targetpath):
2221 """Make a directory called targetpath.
2222 """
2223 try:
Christian Heimes2202f872008-02-06 14:31:34 +00002224 # Use a safe mode for the directory, the real mode is set
2225 # later in _extract_member().
2226 os.mkdir(targetpath, 0o700)
Guido van Rossumb940e112007-01-10 16:19:56 +00002227 except EnvironmentError as e:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002228 if e.errno != errno.EEXIST:
2229 raise
2230
2231 def makefile(self, tarinfo, targetpath):
2232 """Make a file called targetpath.
2233 """
2234 source = self.extractfile(tarinfo)
Guido van Rossume7ba4952007-06-06 23:52:48 +00002235 target = bltn_open(targetpath, "wb")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002236 copyfileobj(source, target)
2237 source.close()
2238 target.close()
2239
2240 def makeunknown(self, tarinfo, targetpath):
2241 """Make a file from a TarInfo object with an unknown type
2242 at targetpath.
2243 """
2244 self.makefile(tarinfo, targetpath)
2245 self._dbg(1, "tarfile: Unknown file type %r, " \
2246 "extracted as regular file." % tarinfo.type)
2247
2248 def makefifo(self, tarinfo, targetpath):
2249 """Make a fifo called targetpath.
2250 """
2251 if hasattr(os, "mkfifo"):
2252 os.mkfifo(targetpath)
2253 else:
Thomas Wouters477c8d52006-05-27 19:21:47 +00002254 raise ExtractError("fifo not supported by system")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002255
2256 def makedev(self, tarinfo, targetpath):
2257 """Make a character or block device called targetpath.
2258 """
2259 if not hasattr(os, "mknod") or not hasattr(os, "makedev"):
Thomas Wouters477c8d52006-05-27 19:21:47 +00002260 raise ExtractError("special devices not supported by system")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002261
2262 mode = tarinfo.mode
2263 if tarinfo.isblk():
2264 mode |= stat.S_IFBLK
2265 else:
2266 mode |= stat.S_IFCHR
2267
2268 os.mknod(targetpath, mode,
2269 os.makedev(tarinfo.devmajor, tarinfo.devminor))
2270
2271 def makelink(self, tarinfo, targetpath):
2272 """Make a (symbolic) link called targetpath. If it cannot be created
2273 (platform limitation), we try to make a copy of the referenced file
2274 instead of a link.
2275 """
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002276 try:
2277 if tarinfo.issym():
Lars Gustäbelbfdfdda2009-08-28 19:59:59 +00002278 os.symlink(tarinfo.linkname, targetpath)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002279 else:
Neal Norwitza4f651a2004-07-20 22:07:44 +00002280 # See extract().
2281 os.link(tarinfo._link_target, targetpath)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002282 except AttributeError:
2283 if tarinfo.issym():
Lars Gustäbelbfdfdda2009-08-28 19:59:59 +00002284 linkpath = os.path.dirname(tarinfo.name) + "/" + \
2285 tarinfo.linkname
2286 else:
2287 linkpath = tarinfo.linkname
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002288
2289 try:
2290 self._extract_member(self.getmember(linkpath), targetpath)
Guido van Rossumb940e112007-01-10 16:19:56 +00002291 except (EnvironmentError, KeyError) as e:
Lars Gustäbelbfdfdda2009-08-28 19:59:59 +00002292 linkpath = linkpath.replace("/", os.sep)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002293 try:
2294 shutil.copy2(linkpath, targetpath)
Guido van Rossumb940e112007-01-10 16:19:56 +00002295 except EnvironmentError as e:
Thomas Wouters477c8d52006-05-27 19:21:47 +00002296 raise IOError("link could not be created")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002297
2298 def chown(self, tarinfo, targetpath):
2299 """Set owner of targetpath according to tarinfo.
2300 """
2301 if pwd and hasattr(os, "geteuid") and os.geteuid() == 0:
2302 # We have to be root to do so.
2303 try:
2304 g = grp.getgrnam(tarinfo.gname)[2]
2305 except KeyError:
2306 try:
2307 g = grp.getgrgid(tarinfo.gid)[2]
2308 except KeyError:
2309 g = os.getgid()
2310 try:
2311 u = pwd.getpwnam(tarinfo.uname)[2]
2312 except KeyError:
2313 try:
2314 u = pwd.getpwuid(tarinfo.uid)[2]
2315 except KeyError:
2316 u = os.getuid()
2317 try:
2318 if tarinfo.issym() and hasattr(os, "lchown"):
2319 os.lchown(targetpath, u, g)
2320 else:
Andrew MacIntyre7970d202003-02-19 12:51:34 +00002321 if sys.platform != "os2emx":
2322 os.chown(targetpath, u, g)
Guido van Rossumb940e112007-01-10 16:19:56 +00002323 except EnvironmentError as e:
Thomas Wouters477c8d52006-05-27 19:21:47 +00002324 raise ExtractError("could not change owner")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002325
2326 def chmod(self, tarinfo, targetpath):
2327 """Set file permissions of targetpath according to tarinfo.
2328 """
Jack Jansen834eff62003-03-07 12:47:06 +00002329 if hasattr(os, 'chmod'):
2330 try:
2331 os.chmod(targetpath, tarinfo.mode)
Guido van Rossumb940e112007-01-10 16:19:56 +00002332 except EnvironmentError as e:
Thomas Wouters477c8d52006-05-27 19:21:47 +00002333 raise ExtractError("could not change mode")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002334
2335 def utime(self, tarinfo, targetpath):
2336 """Set modification time of targetpath according to tarinfo.
2337 """
Jack Jansen834eff62003-03-07 12:47:06 +00002338 if not hasattr(os, 'utime'):
Tim Petersf9347782003-03-07 15:36:41 +00002339 return
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002340 try:
2341 os.utime(targetpath, (tarinfo.mtime, tarinfo.mtime))
Guido van Rossumb940e112007-01-10 16:19:56 +00002342 except EnvironmentError as e:
Thomas Wouters477c8d52006-05-27 19:21:47 +00002343 raise ExtractError("could not change modification time")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002344
2345 #--------------------------------------------------------------------------
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002346 def next(self):
2347 """Return the next member of the archive as a TarInfo object, when
2348 TarFile is opened for reading. Return None if there is no more
2349 available.
2350 """
2351 self._check("ra")
2352 if self.firstmember is not None:
2353 m = self.firstmember
2354 self.firstmember = None
2355 return m
2356
2357 # Read the next block.
Andrew M. Kuchling864bba12004-07-10 22:02:11 +00002358 self.fileobj.seek(self.offset)
Lars Gustäbel9520a432009-11-22 18:48:49 +00002359 tarinfo = None
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002360 while True:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002361 try:
Guido van Rossumd8faa362007-04-27 19:54:29 +00002362 tarinfo = self.tarinfo.fromtarfile(self)
Lars Gustäbel9520a432009-11-22 18:48:49 +00002363 except EOFHeaderError as e:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002364 if self.ignore_zeros:
Thomas Wouters902d6eb2007-01-09 23:18:33 +00002365 self._dbg(2, "0x%X: %s" % (self.offset, e))
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002366 self.offset += BLOCKSIZE
2367 continue
Lars Gustäbel9520a432009-11-22 18:48:49 +00002368 except InvalidHeaderError as e:
2369 if self.ignore_zeros:
2370 self._dbg(2, "0x%X: %s" % (self.offset, e))
2371 self.offset += BLOCKSIZE
2372 continue
2373 elif self.offset == 0:
2374 raise ReadError(str(e))
2375 except EmptyHeaderError:
2376 if self.offset == 0:
2377 raise ReadError("empty file")
2378 except TruncatedHeaderError as e:
2379 if self.offset == 0:
2380 raise ReadError(str(e))
2381 except SubsequentHeaderError as e:
2382 raise ReadError(str(e))
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002383 break
2384
Lars Gustäbel9520a432009-11-22 18:48:49 +00002385 if tarinfo is not None:
2386 self.members.append(tarinfo)
2387 else:
2388 self._loaded = True
2389
Thomas Wouters477c8d52006-05-27 19:21:47 +00002390 return tarinfo
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002391
2392 #--------------------------------------------------------------------------
2393 # Little helper methods:
2394
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002395 def _getmember(self, name, tarinfo=None):
2396 """Find an archive member by name from bottom to top.
2397 If tarinfo is given, it is used as the starting point.
2398 """
Martin v. Löwisf3c56112004-09-18 09:08:52 +00002399 # Ensure that all members have been loaded.
2400 members = self.getmembers()
2401
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002402 if tarinfo is None:
Martin v. Löwisf3c56112004-09-18 09:08:52 +00002403 end = len(members)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002404 else:
Martin v. Löwisf3c56112004-09-18 09:08:52 +00002405 end = members.index(tarinfo)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002406
Guido van Rossum805365e2007-05-07 22:24:25 +00002407 for i in range(end - 1, -1, -1):
Martin v. Löwisf3c56112004-09-18 09:08:52 +00002408 if name == members[i].name:
2409 return members[i]
Andrew M. Kuchling864bba12004-07-10 22:02:11 +00002410
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002411 def _load(self):
2412 """Read through the entire archive file and look for readable
2413 members.
2414 """
2415 while True:
2416 tarinfo = self.next()
2417 if tarinfo is None:
2418 break
2419 self._loaded = True
2420
2421 def _check(self, mode=None):
2422 """Check if TarFile is still open, and if the operation's mode
2423 corresponds to TarFile's mode.
2424 """
2425 if self.closed:
Thomas Wouters477c8d52006-05-27 19:21:47 +00002426 raise IOError("%s is closed" % self.__class__.__name__)
Guido van Rossumd8faa362007-04-27 19:54:29 +00002427 if mode is not None and self.mode not in mode:
2428 raise IOError("bad operation for mode %r" % self.mode)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002429
2430 def __iter__(self):
2431 """Provide an iterator object.
2432 """
2433 if self._loaded:
2434 return iter(self.members)
2435 else:
2436 return TarIter(self)
2437
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002438 def _dbg(self, level, msg):
2439 """Write debugging output to sys.stderr.
2440 """
2441 if level <= self.debug:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00002442 print(msg, file=sys.stderr)
Lars Gustäbel01385812010-03-03 12:08:54 +00002443
2444 def __enter__(self):
2445 self._check()
2446 return self
2447
2448 def __exit__(self, type, value, traceback):
2449 if type is None:
2450 self.close()
2451 else:
2452 # An exception occurred. We must not call close() because
2453 # it would try to write end-of-archive blocks and padding.
2454 if not self._extfileobj:
2455 self.fileobj.close()
2456 self.closed = True
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002457# class TarFile
2458
2459class TarIter:
2460 """Iterator Class.
2461
2462 for tarinfo in TarFile(...):
2463 suite...
2464 """
2465
2466 def __init__(self, tarfile):
2467 """Construct a TarIter object.
2468 """
2469 self.tarfile = tarfile
Martin v. Löwis637431b2005-03-03 23:12:42 +00002470 self.index = 0
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002471 def __iter__(self):
2472 """Return iterator object.
2473 """
2474 return self
Georg Brandla18af4e2007-04-21 15:47:16 +00002475 def __next__(self):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002476 """Return the next item using TarFile's next() method.
2477 When all members have been read, set TarFile as _loaded.
2478 """
Martin v. Löwis637431b2005-03-03 23:12:42 +00002479 # Fix for SF #1100429: Under rare circumstances it can
2480 # happen that getmembers() is called during iteration,
2481 # which will cause TarIter to stop prematurely.
2482 if not self.tarfile._loaded:
2483 tarinfo = self.tarfile.next()
2484 if not tarinfo:
2485 self.tarfile._loaded = True
2486 raise StopIteration
2487 else:
2488 try:
2489 tarinfo = self.tarfile.members[self.index]
2490 except IndexError:
2491 raise StopIteration
2492 self.index += 1
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002493 return tarinfo
2494
2495# Helper classes for sparse file support
2496class _section:
2497 """Base class for _data and _hole.
2498 """
2499 def __init__(self, offset, size):
2500 self.offset = offset
2501 self.size = size
2502 def __contains__(self, offset):
2503 return self.offset <= offset < self.offset + self.size
2504
2505class _data(_section):
2506 """Represent a data section in a sparse file.
2507 """
2508 def __init__(self, offset, size, realpos):
2509 _section.__init__(self, offset, size)
2510 self.realpos = realpos
2511
2512class _hole(_section):
2513 """Represent a hole section in a sparse file.
2514 """
2515 pass
2516
2517class _ringbuffer(list):
2518 """Ringbuffer class which increases performance
2519 over a regular list.
2520 """
2521 def __init__(self):
2522 self.idx = 0
2523 def find(self, offset):
2524 idx = self.idx
2525 while True:
2526 item = self[idx]
2527 if offset in item:
2528 break
2529 idx += 1
2530 if idx == len(self):
2531 idx = 0
2532 if idx == self.idx:
2533 # End of File
2534 return None
2535 self.idx = idx
2536 return item
2537
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002538#--------------------
2539# exported functions
2540#--------------------
2541def is_tarfile(name):
2542 """Return True if name points to a tar archive that we
2543 are able to handle, else return False.
2544 """
2545 try:
2546 t = open(name)
2547 t.close()
2548 return True
2549 except TarError:
2550 return False
2551
Guido van Rossume7ba4952007-06-06 23:52:48 +00002552bltn_open = open
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002553open = TarFile.open