blob: 16d338c7384485a24b012df4e77b3ddfd51b5ebb [file] [log] [blame]
Benjamin Peterson90f5ba52010-03-11 22:53:45 +00001#!/usr/bin/env python3
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002#-------------------------------------------------------------------
3# tarfile.py
4#-------------------------------------------------------------------
Christian Heimes9c1257e2007-11-04 11:37:22 +00005# Copyright (C) 2002 Lars Gustaebel <lars@gustaebel.de>
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00006# All rights reserved.
7#
8# Permission is hereby granted, free of charge, to any person
9# obtaining a copy of this software and associated documentation
10# files (the "Software"), to deal in the Software without
11# restriction, including without limitation the rights to use,
12# copy, modify, merge, publish, distribute, sublicense, and/or sell
13# copies of the Software, and to permit persons to whom the
14# Software is furnished to do so, subject to the following
15# conditions:
16#
17# The above copyright notice and this permission notice shall be
18# included in all copies or substantial portions of the Software.
19#
20# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
21# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
22# OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
23# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
24# HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
25# WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
26# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
27# OTHER DEALINGS IN THE SOFTWARE.
28#
29"""Read from and write to tar format archives.
30"""
31
Guido van Rossumd8faa362007-04-27 19:54:29 +000032version = "0.9.0"
Guido van Rossum98297ee2007-11-06 21:34:58 +000033__author__ = "Lars Gust\u00e4bel (lars@gustaebel.de)"
Senthil Kumaran7c9719c2011-07-28 22:32:49 +080034__date__ = "$Date: 2011-02-25 17:42:01 +0200 (Fri, 25 Feb 2011) $"
35__cvsid__ = "$Id: tarfile.py 88586 2011-02-25 15:42:01Z marc-andre.lemburg $"
Guido van Rossum98297ee2007-11-06 21:34:58 +000036__credits__ = "Gustavo Niemeyer, Niels Gust\u00e4bel, Richard Townsend."
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +000037
38#---------
39# Imports
40#---------
41import sys
42import os
Eli Bendersky74c503b2012-01-03 06:26:13 +020043import io
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +000044import shutil
45import stat
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +000046import time
47import struct
Thomas Wouters89f507f2006-12-13 04:49:30 +000048import copy
Guido van Rossumd8faa362007-04-27 19:54:29 +000049import re
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +000050
51try:
52 import grp, pwd
53except ImportError:
54 grp = pwd = None
55
Brian Curtin16633fa2010-07-09 13:54:27 +000056# os.symlink on Windows prior to 6.0 raises NotImplementedError
57symlink_exception = (AttributeError, NotImplementedError)
58try:
59 # WindowsError (1314) will be raised if the caller does not hold the
60 # SeCreateSymbolicLinkPrivilege privilege
61 symlink_exception += (WindowsError,)
62except NameError:
63 pass
64
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +000065# from tarfile import *
66__all__ = ["TarFile", "TarInfo", "is_tarfile", "TarError"]
67
Georg Brandl1a3284e2007-12-02 09:40:06 +000068from builtins import open as _open # Since 'open' is TarFile.open
Guido van Rossum8f78fe92006-08-24 04:03:53 +000069
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +000070#---------------------------------------------------------
71# tar constants
72#---------------------------------------------------------
Lars Gustäbelb506dc32007-08-07 18:36:16 +000073NUL = b"\0" # the null character
Guido van Rossumd8faa362007-04-27 19:54:29 +000074BLOCKSIZE = 512 # length of processing blocks
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +000075RECORDSIZE = BLOCKSIZE * 20 # length of records
Lars Gustäbelb506dc32007-08-07 18:36:16 +000076GNU_MAGIC = b"ustar \0" # magic gnu tar string
77POSIX_MAGIC = b"ustar\x0000" # magic posix tar string
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +000078
Guido van Rossumd8faa362007-04-27 19:54:29 +000079LENGTH_NAME = 100 # maximum length of a filename
80LENGTH_LINK = 100 # maximum length of a linkname
81LENGTH_PREFIX = 155 # maximum length of the prefix field
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +000082
Lars Gustäbelb506dc32007-08-07 18:36:16 +000083REGTYPE = b"0" # regular file
84AREGTYPE = b"\0" # regular file
85LNKTYPE = b"1" # link (inside tarfile)
86SYMTYPE = b"2" # symbolic link
87CHRTYPE = b"3" # character special device
88BLKTYPE = b"4" # block special device
89DIRTYPE = b"5" # directory
90FIFOTYPE = b"6" # fifo special device
91CONTTYPE = b"7" # contiguous file
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +000092
Lars Gustäbelb506dc32007-08-07 18:36:16 +000093GNUTYPE_LONGNAME = b"L" # GNU tar longname
94GNUTYPE_LONGLINK = b"K" # GNU tar longlink
95GNUTYPE_SPARSE = b"S" # GNU tar sparse file
Guido van Rossumd8faa362007-04-27 19:54:29 +000096
Lars Gustäbelb506dc32007-08-07 18:36:16 +000097XHDTYPE = b"x" # POSIX.1-2001 extended header
98XGLTYPE = b"g" # POSIX.1-2001 global header
99SOLARIS_XHDTYPE = b"X" # Solaris extended header
Guido van Rossumd8faa362007-04-27 19:54:29 +0000100
101USTAR_FORMAT = 0 # POSIX.1-1988 (ustar) format
102GNU_FORMAT = 1 # GNU tar format
103PAX_FORMAT = 2 # POSIX.1-2001 (pax) format
104DEFAULT_FORMAT = GNU_FORMAT
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000105
106#---------------------------------------------------------
107# tarfile constants
108#---------------------------------------------------------
Guido van Rossumd8faa362007-04-27 19:54:29 +0000109# File types that tarfile supports:
110SUPPORTED_TYPES = (REGTYPE, AREGTYPE, LNKTYPE,
111 SYMTYPE, DIRTYPE, FIFOTYPE,
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000112 CONTTYPE, CHRTYPE, BLKTYPE,
113 GNUTYPE_LONGNAME, GNUTYPE_LONGLINK,
114 GNUTYPE_SPARSE)
115
Guido van Rossumd8faa362007-04-27 19:54:29 +0000116# File types that will be treated as a regular file.
117REGULAR_TYPES = (REGTYPE, AREGTYPE,
118 CONTTYPE, GNUTYPE_SPARSE)
119
120# File types that are part of the GNU tar format.
121GNU_TYPES = (GNUTYPE_LONGNAME, GNUTYPE_LONGLINK,
122 GNUTYPE_SPARSE)
123
124# Fields from a pax header that override a TarInfo attribute.
125PAX_FIELDS = ("path", "linkpath", "size", "mtime",
126 "uid", "gid", "uname", "gname")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000127
Lars Gustäbel1465cc22010-05-17 18:02:50 +0000128# Fields from a pax header that are affected by hdrcharset.
129PAX_NAME_FIELDS = {"path", "linkpath", "uname", "gname"}
130
Guido van Rossume7ba4952007-06-06 23:52:48 +0000131# Fields in a pax header that are numbers, all other fields
132# are treated as strings.
133PAX_NUMBER_FIELDS = {
134 "atime": float,
135 "ctime": float,
136 "mtime": float,
137 "uid": int,
138 "gid": int,
139 "size": int
140}
141
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000142#---------------------------------------------------------
143# Bits used in the mode field, values in octal.
144#---------------------------------------------------------
Guido van Rossumcd16bf62007-06-13 18:07:49 +0000145S_IFLNK = 0o120000 # symbolic link
146S_IFREG = 0o100000 # regular file
147S_IFBLK = 0o060000 # block device
148S_IFDIR = 0o040000 # directory
149S_IFCHR = 0o020000 # character device
150S_IFIFO = 0o010000 # fifo
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000151
Guido van Rossumcd16bf62007-06-13 18:07:49 +0000152TSUID = 0o4000 # set UID on execution
153TSGID = 0o2000 # set GID on execution
154TSVTX = 0o1000 # reserved
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000155
Guido van Rossumcd16bf62007-06-13 18:07:49 +0000156TUREAD = 0o400 # read by owner
157TUWRITE = 0o200 # write by owner
158TUEXEC = 0o100 # execute/search by owner
159TGREAD = 0o040 # read by group
160TGWRITE = 0o020 # write by group
161TGEXEC = 0o010 # execute/search by group
162TOREAD = 0o004 # read by other
163TOWRITE = 0o002 # write by other
164TOEXEC = 0o001 # execute/search by other
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000165
166#---------------------------------------------------------
Guido van Rossumd8faa362007-04-27 19:54:29 +0000167# initialization
168#---------------------------------------------------------
Victor Stinner0f35e2c2010-06-11 23:46:47 +0000169if os.name in ("nt", "ce"):
170 ENCODING = "utf-8"
171else:
172 ENCODING = sys.getfilesystemencoding()
Guido van Rossumd8faa362007-04-27 19:54:29 +0000173
174#---------------------------------------------------------
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000175# Some useful functions
176#---------------------------------------------------------
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000177
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000178def stn(s, length, encoding, errors):
179 """Convert a string to a null-terminated bytes object.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000180 """
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000181 s = s.encode(encoding, errors)
Thomas Wouters89f507f2006-12-13 04:49:30 +0000182 return s[:length] + (length - len(s)) * NUL
Thomas Wouters477c8d52006-05-27 19:21:47 +0000183
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000184def nts(s, encoding, errors):
185 """Convert a null-terminated bytes object to a string.
Guido van Rossumd8faa362007-04-27 19:54:29 +0000186 """
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000187 p = s.find(b"\0")
188 if p != -1:
189 s = s[:p]
190 return s.decode(encoding, errors)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000191
Thomas Wouters477c8d52006-05-27 19:21:47 +0000192def nti(s):
193 """Convert a number field to a python number.
194 """
195 # There are two possible encodings for a number field, see
196 # itn() below.
Lars Gustäbelac3d1372011-10-14 12:46:40 +0200197 if s[0] in (0o200, 0o377):
198 n = 0
199 for i in range(len(s) - 1):
200 n <<= 8
201 n += s[i + 1]
202 if s[0] == 0o377:
203 n = -(256 ** (len(s) - 1) - n)
204 else:
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000205 try:
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000206 n = int(nts(s, "ascii", "strict") or "0", 8)
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000207 except ValueError:
Lars Gustäbel9520a432009-11-22 18:48:49 +0000208 raise InvalidHeaderError("invalid header")
Thomas Wouters477c8d52006-05-27 19:21:47 +0000209 return n
210
Guido van Rossumd8faa362007-04-27 19:54:29 +0000211def itn(n, digits=8, format=DEFAULT_FORMAT):
Thomas Wouters477c8d52006-05-27 19:21:47 +0000212 """Convert a python number to a number field.
213 """
214 # POSIX 1003.1-1988 requires numbers to be encoded as a string of
215 # octal digits followed by a null-byte, this allows values up to
216 # (8**(digits-1))-1. GNU tar allows storing numbers greater than
Lars Gustäbelac3d1372011-10-14 12:46:40 +0200217 # that if necessary. A leading 0o200 or 0o377 byte indicate this
218 # particular encoding, the following digits-1 bytes are a big-endian
219 # base-256 representation. This allows values up to (256**(digits-1))-1.
220 # A 0o200 byte indicates a positive number, a 0o377 byte a negative
221 # number.
Thomas Wouters477c8d52006-05-27 19:21:47 +0000222 if 0 <= n < 8 ** (digits - 1):
Lars Gustäbela280ca752007-08-28 07:34:33 +0000223 s = bytes("%0*o" % (digits - 1, n), "ascii") + NUL
Lars Gustäbelac3d1372011-10-14 12:46:40 +0200224 elif format == GNU_FORMAT and -256 ** (digits - 1) <= n < 256 ** (digits - 1):
225 if n >= 0:
226 s = bytearray([0o200])
227 else:
228 s = bytearray([0o377])
229 n = 256 ** digits + n
Thomas Wouters477c8d52006-05-27 19:21:47 +0000230
Guido van Rossum805365e2007-05-07 22:24:25 +0000231 for i in range(digits - 1):
Lars Gustäbelac3d1372011-10-14 12:46:40 +0200232 s.insert(1, n & 0o377)
Thomas Wouters477c8d52006-05-27 19:21:47 +0000233 n >>= 8
Lars Gustäbelac3d1372011-10-14 12:46:40 +0200234 else:
235 raise ValueError("overflow in number field")
236
Thomas Wouters477c8d52006-05-27 19:21:47 +0000237 return s
238
239def calc_chksums(buf):
240 """Calculate the checksum for a member's header by summing up all
241 characters except for the chksum field which is treated as if
242 it was filled with spaces. According to the GNU tar sources,
243 some tars (Sun and NeXT) calculate chksum with signed char,
244 which will be different if there are chars in the buffer with
245 the high bit set. So we calculate two checksums, unsigned and
246 signed.
247 """
Ross Lagerwall468ff4c2012-05-17 19:49:27 +0200248 unsigned_chksum = 256 + sum(struct.unpack_from("148B8x356B", buf))
249 signed_chksum = 256 + sum(struct.unpack_from("148b8x356b", buf))
Thomas Wouters477c8d52006-05-27 19:21:47 +0000250 return unsigned_chksum, signed_chksum
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000251
252def copyfileobj(src, dst, length=None):
253 """Copy length bytes from fileobj src to fileobj dst.
254 If length is None, copy the entire content.
255 """
256 if length == 0:
257 return
258 if length is None:
259 shutil.copyfileobj(src, dst)
260 return
261
262 BUFSIZE = 16 * 1024
263 blocks, remainder = divmod(length, BUFSIZE)
Guido van Rossum805365e2007-05-07 22:24:25 +0000264 for b in range(blocks):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000265 buf = src.read(BUFSIZE)
266 if len(buf) < BUFSIZE:
Thomas Wouters477c8d52006-05-27 19:21:47 +0000267 raise IOError("end of file reached")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000268 dst.write(buf)
269
270 if remainder != 0:
271 buf = src.read(remainder)
272 if len(buf) < remainder:
Thomas Wouters477c8d52006-05-27 19:21:47 +0000273 raise IOError("end of file reached")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000274 dst.write(buf)
275 return
276
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000277def filemode(mode):
Giampaolo Rodola'ffa1d0b2012-05-15 15:30:25 +0200278 """Deprecated in this location; use stat.filemode."""
279 import warnings
280 warnings.warn("deprecated in favor of stat.filemode",
281 DeprecationWarning, 2)
282 return stat.filemode(mode)
283
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000284
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000285class TarError(Exception):
286 """Base exception."""
287 pass
288class ExtractError(TarError):
289 """General exception for extract errors."""
290 pass
291class ReadError(TarError):
Ezio Melotti30b9d5d2013-08-17 15:50:46 +0300292 """Exception for unreadable tar archives."""
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000293 pass
294class CompressionError(TarError):
295 """Exception for unavailable compression methods."""
296 pass
297class StreamError(TarError):
298 """Exception for unsupported operations on stream-like TarFiles."""
299 pass
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000300class HeaderError(TarError):
Lars Gustäbel9520a432009-11-22 18:48:49 +0000301 """Base exception for header errors."""
302 pass
303class EmptyHeaderError(HeaderError):
304 """Exception for empty headers."""
305 pass
306class TruncatedHeaderError(HeaderError):
307 """Exception for truncated headers."""
308 pass
309class EOFHeaderError(HeaderError):
310 """Exception for end of file headers."""
311 pass
312class InvalidHeaderError(HeaderError):
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000313 """Exception for invalid headers."""
314 pass
Lars Gustäbel9520a432009-11-22 18:48:49 +0000315class SubsequentHeaderError(HeaderError):
316 """Exception for missing and invalid extended headers."""
317 pass
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000318
319#---------------------------
320# internal stream interface
321#---------------------------
322class _LowLevelFile:
323 """Low-level file object. Supports reading and writing.
324 It is used instead of a regular file object for streaming
325 access.
326 """
327
328 def __init__(self, name, mode):
329 mode = {
330 "r": os.O_RDONLY,
331 "w": os.O_WRONLY | os.O_CREAT | os.O_TRUNC,
332 }[mode]
333 if hasattr(os, "O_BINARY"):
334 mode |= os.O_BINARY
Lars Gustäbeld6eb70b2010-04-29 15:37:02 +0000335 self.fd = os.open(name, mode, 0o666)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000336
337 def close(self):
338 os.close(self.fd)
339
340 def read(self, size):
341 return os.read(self.fd, size)
342
343 def write(self, s):
344 os.write(self.fd, s)
345
346class _Stream:
347 """Class that serves as an adapter between TarFile and
348 a stream-like object. The stream-like object only
349 needs to have a read() or write() method and is accessed
350 blockwise. Use of gzip or bzip2 compression is possible.
351 A stream-like object could be for example: sys.stdin,
352 sys.stdout, a socket, a tape device etc.
353
354 _Stream is intended to be used only internally.
355 """
356
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000357 def __init__(self, name, mode, comptype, fileobj, bufsize):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000358 """Construct a _Stream object.
359 """
360 self._extfileobj = True
361 if fileobj is None:
362 fileobj = _LowLevelFile(name, mode)
363 self._extfileobj = False
364
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000365 if comptype == '*':
366 # Enable transparent compression detection for the
367 # stream interface
368 fileobj = _StreamProxy(fileobj)
369 comptype = fileobj.getcomptype()
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000370
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000371 self.name = name or ""
372 self.mode = mode
373 self.comptype = comptype
374 self.fileobj = fileobj
375 self.bufsize = bufsize
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000376 self.buf = b""
Guido van Rossume2a383d2007-01-15 16:59:06 +0000377 self.pos = 0
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000378 self.closed = False
379
Antoine Pitrou605c2932010-09-23 20:15:14 +0000380 try:
381 if comptype == "gz":
382 try:
383 import zlib
384 except ImportError:
385 raise CompressionError("zlib module is not available")
386 self.zlib = zlib
387 self.crc = zlib.crc32(b"")
388 if mode == "r":
389 self._init_read_gz()
Lars Gustäbel0a9dd2f2011-12-10 20:38:14 +0100390 self.exception = zlib.error
Antoine Pitrou605c2932010-09-23 20:15:14 +0000391 else:
392 self._init_write_gz()
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000393
Lars Gustäbel0a9dd2f2011-12-10 20:38:14 +0100394 elif comptype == "bz2":
Antoine Pitrou605c2932010-09-23 20:15:14 +0000395 try:
396 import bz2
397 except ImportError:
398 raise CompressionError("bz2 module is not available")
399 if mode == "r":
400 self.dbuf = b""
401 self.cmp = bz2.BZ2Decompressor()
Lars Gustäbel0a9dd2f2011-12-10 20:38:14 +0100402 self.exception = IOError
Antoine Pitrou605c2932010-09-23 20:15:14 +0000403 else:
404 self.cmp = bz2.BZ2Compressor()
Lars Gustäbel0a9dd2f2011-12-10 20:38:14 +0100405
406 elif comptype == "xz":
407 try:
408 import lzma
409 except ImportError:
410 raise CompressionError("lzma module is not available")
411 if mode == "r":
412 self.dbuf = b""
413 self.cmp = lzma.LZMADecompressor()
414 self.exception = lzma.LZMAError
415 else:
416 self.cmp = lzma.LZMACompressor()
417
418 elif comptype != "tar":
419 raise CompressionError("unknown compression type %r" % comptype)
420
Antoine Pitrou605c2932010-09-23 20:15:14 +0000421 except:
422 if not self._extfileobj:
423 self.fileobj.close()
424 self.closed = True
425 raise
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000426
427 def __del__(self):
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000428 if hasattr(self, "closed") and not self.closed:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000429 self.close()
430
431 def _init_write_gz(self):
432 """Initialize for writing with gzip compression.
433 """
434 self.cmp = self.zlib.compressobj(9, self.zlib.DEFLATED,
435 -self.zlib.MAX_WBITS,
436 self.zlib.DEF_MEM_LEVEL,
437 0)
Guido van Rossume2a383d2007-01-15 16:59:06 +0000438 timestamp = struct.pack("<L", int(time.time()))
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000439 self.__write(b"\037\213\010\010" + timestamp + b"\002\377")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000440 if self.name.endswith(".gz"):
441 self.name = self.name[:-3]
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000442 # RFC1952 says we must use ISO-8859-1 for the FNAME field.
443 self.__write(self.name.encode("iso-8859-1", "replace") + NUL)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000444
445 def write(self, s):
446 """Write string s to the stream.
447 """
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000448 if self.comptype == "gz":
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000449 self.crc = self.zlib.crc32(s, self.crc)
450 self.pos += len(s)
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000451 if self.comptype != "tar":
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000452 s = self.cmp.compress(s)
453 self.__write(s)
454
455 def __write(self, s):
456 """Write string s to the stream if a whole new block
457 is ready to be written.
458 """
459 self.buf += s
460 while len(self.buf) > self.bufsize:
461 self.fileobj.write(self.buf[:self.bufsize])
462 self.buf = self.buf[self.bufsize:]
463
464 def close(self):
465 """Close the _Stream object. No operation should be
466 done on it afterwards.
467 """
468 if self.closed:
469 return
470
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000471 if self.mode == "w" and self.comptype != "tar":
Martin v. Löwisc234a522004-08-22 21:28:33 +0000472 self.buf += self.cmp.flush()
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000473
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000474 if self.mode == "w" and self.buf:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000475 self.fileobj.write(self.buf)
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000476 self.buf = b""
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000477 if self.comptype == "gz":
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000478 # The native zlib crc is an unsigned 32-bit integer, but
479 # the Python wrapper implicitly casts that to a signed C
480 # long. So, on a 32-bit box self.crc may "look negative",
481 # while the same crc on a 64-bit box may "look positive".
482 # To avoid irksome warnings from the `struct` module, force
483 # it to look positive on all boxes.
Guido van Rossume2a383d2007-01-15 16:59:06 +0000484 self.fileobj.write(struct.pack("<L", self.crc & 0xffffffff))
485 self.fileobj.write(struct.pack("<L", self.pos & 0xffffFFFF))
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000486
487 if not self._extfileobj:
488 self.fileobj.close()
489
490 self.closed = True
491
492 def _init_read_gz(self):
493 """Initialize for reading a gzip compressed fileobj.
494 """
495 self.cmp = self.zlib.decompressobj(-self.zlib.MAX_WBITS)
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000496 self.dbuf = b""
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000497
498 # taken from gzip.GzipFile with some alterations
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000499 if self.__read(2) != b"\037\213":
Thomas Wouters477c8d52006-05-27 19:21:47 +0000500 raise ReadError("not a gzip file")
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000501 if self.__read(1) != b"\010":
Thomas Wouters477c8d52006-05-27 19:21:47 +0000502 raise CompressionError("unsupported compression method")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000503
504 flag = ord(self.__read(1))
505 self.__read(6)
506
507 if flag & 4:
508 xlen = ord(self.__read(1)) + 256 * ord(self.__read(1))
509 self.read(xlen)
510 if flag & 8:
511 while True:
512 s = self.__read(1)
513 if not s or s == NUL:
514 break
515 if flag & 16:
516 while True:
517 s = self.__read(1)
518 if not s or s == NUL:
519 break
520 if flag & 2:
521 self.__read(2)
522
523 def tell(self):
524 """Return the stream's file pointer position.
525 """
526 return self.pos
527
528 def seek(self, pos=0):
529 """Set the stream's file pointer to pos. Negative seeking
530 is forbidden.
531 """
532 if pos - self.pos >= 0:
533 blocks, remainder = divmod(pos - self.pos, self.bufsize)
Guido van Rossum805365e2007-05-07 22:24:25 +0000534 for i in range(blocks):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000535 self.read(self.bufsize)
536 self.read(remainder)
537 else:
Thomas Wouters477c8d52006-05-27 19:21:47 +0000538 raise StreamError("seeking backwards is not allowed")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000539 return self.pos
540
541 def read(self, size=None):
542 """Return the next size number of bytes from the stream.
543 If size is not defined, return all bytes of the stream
544 up to EOF.
545 """
546 if size is None:
547 t = []
548 while True:
549 buf = self._read(self.bufsize)
550 if not buf:
551 break
552 t.append(buf)
553 buf = "".join(t)
554 else:
555 buf = self._read(size)
556 self.pos += len(buf)
557 return buf
558
559 def _read(self, size):
560 """Return size bytes from the stream.
561 """
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000562 if self.comptype == "tar":
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000563 return self.__read(size)
564
565 c = len(self.dbuf)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000566 while c < size:
567 buf = self.__read(self.bufsize)
568 if not buf:
569 break
Guido van Rossumd8faa362007-04-27 19:54:29 +0000570 try:
571 buf = self.cmp.decompress(buf)
Lars Gustäbel0a9dd2f2011-12-10 20:38:14 +0100572 except self.exception:
Guido van Rossumd8faa362007-04-27 19:54:29 +0000573 raise ReadError("invalid compressed data")
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000574 self.dbuf += buf
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000575 c += len(buf)
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000576 buf = self.dbuf[:size]
577 self.dbuf = self.dbuf[size:]
578 return buf
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000579
580 def __read(self, size):
581 """Return size bytes from stream. If internal buffer is empty,
582 read another block from the stream.
583 """
584 c = len(self.buf)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000585 while c < size:
586 buf = self.fileobj.read(self.bufsize)
587 if not buf:
588 break
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000589 self.buf += buf
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000590 c += len(buf)
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000591 buf = self.buf[:size]
592 self.buf = self.buf[size:]
593 return buf
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000594# class _Stream
595
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000596class _StreamProxy(object):
597 """Small proxy class that enables transparent compression
598 detection for the Stream interface (mode 'r|*').
599 """
600
601 def __init__(self, fileobj):
602 self.fileobj = fileobj
603 self.buf = self.fileobj.read(BLOCKSIZE)
604
605 def read(self, size):
606 self.read = self.fileobj.read
607 return self.buf
608
609 def getcomptype(self):
Lars Gustäbel0a9dd2f2011-12-10 20:38:14 +0100610 if self.buf.startswith(b"\x1f\x8b\x08"):
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000611 return "gz"
Lars Gustäbel0a9dd2f2011-12-10 20:38:14 +0100612 elif self.buf[0:3] == b"BZh" and self.buf[4:10] == b"1AY&SY":
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000613 return "bz2"
Lars Gustäbel0a9dd2f2011-12-10 20:38:14 +0100614 elif self.buf.startswith((b"\x5d\x00\x00\x80", b"\xfd7zXZ")):
615 return "xz"
616 else:
617 return "tar"
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000618
619 def close(self):
620 self.fileobj.close()
621# class StreamProxy
622
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000623#------------------------
624# Extraction file object
625#------------------------
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000626class _FileInFile(object):
627 """A thin wrapper around an existing file object that
628 provides a part of its data as an individual file
629 object.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000630 """
631
Lars Gustäbel9cbdd752010-10-29 09:08:19 +0000632 def __init__(self, fileobj, offset, size, blockinfo=None):
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000633 self.fileobj = fileobj
634 self.offset = offset
635 self.size = size
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000636 self.position = 0
Lars Gustäbel7a919e92012-05-05 18:15:03 +0200637 self.name = getattr(fileobj, "name", None)
638 self.closed = False
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000639
Lars Gustäbel9cbdd752010-10-29 09:08:19 +0000640 if blockinfo is None:
641 blockinfo = [(0, size)]
642
643 # Construct a map with data and zero blocks.
644 self.map_index = 0
645 self.map = []
646 lastpos = 0
647 realpos = self.offset
648 for offset, size in blockinfo:
649 if offset > lastpos:
650 self.map.append((False, lastpos, offset, None))
651 self.map.append((True, offset, offset + size, realpos))
652 realpos += size
653 lastpos = offset + size
654 if lastpos < self.size:
655 self.map.append((False, lastpos, self.size, None))
656
Lars Gustäbel7a919e92012-05-05 18:15:03 +0200657 def flush(self):
658 pass
659
660 def readable(self):
661 return True
662
663 def writable(self):
664 return False
665
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000666 def seekable(self):
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000667 return self.fileobj.seekable()
668
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000669 def tell(self):
670 """Return the current file position.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000671 """
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000672 return self.position
673
Lars Gustäbel7a919e92012-05-05 18:15:03 +0200674 def seek(self, position, whence=io.SEEK_SET):
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000675 """Seek to a position in the file.
676 """
Lars Gustäbel7a919e92012-05-05 18:15:03 +0200677 if whence == io.SEEK_SET:
678 self.position = min(max(position, 0), self.size)
679 elif whence == io.SEEK_CUR:
680 if position < 0:
681 self.position = max(self.position + position, 0)
682 else:
683 self.position = min(self.position + position, self.size)
684 elif whence == io.SEEK_END:
685 self.position = max(min(self.size + position, self.size), 0)
686 else:
687 raise ValueError("Invalid argument")
688 return self.position
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000689
690 def read(self, size=None):
691 """Read data from the file.
692 """
693 if size is None:
694 size = self.size - self.position
695 else:
696 size = min(size, self.size - self.position)
697
Lars Gustäbel9cbdd752010-10-29 09:08:19 +0000698 buf = b""
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000699 while size > 0:
Lars Gustäbel9cbdd752010-10-29 09:08:19 +0000700 while True:
701 data, start, stop, offset = self.map[self.map_index]
702 if start <= self.position < stop:
703 break
704 else:
705 self.map_index += 1
706 if self.map_index == len(self.map):
707 self.map_index = 0
708 length = min(size, stop - self.position)
709 if data:
Lars Gustäbeldd071042011-02-23 11:42:22 +0000710 self.fileobj.seek(offset + (self.position - start))
711 buf += self.fileobj.read(length)
Lars Gustäbel9cbdd752010-10-29 09:08:19 +0000712 else:
713 buf += NUL * length
714 size -= length
715 self.position += length
716 return buf
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000717
Lars Gustäbel7a919e92012-05-05 18:15:03 +0200718 def readinto(self, b):
719 buf = self.read(len(b))
720 b[:len(buf)] = buf
721 return len(buf)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000722
723 def close(self):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000724 self.closed = True
Lars Gustäbel7a919e92012-05-05 18:15:03 +0200725#class _FileInFile
Martin v. Löwisdf241532005-03-03 08:17:42 +0000726
Lars Gustäbelb062a2f2012-05-14 13:18:16 +0200727class ExFileObject(io.BufferedReader):
728
729 def __init__(self, tarfile, tarinfo):
730 fileobj = _FileInFile(tarfile.fileobj, tarinfo.offset_data,
731 tarinfo.size, tarinfo.sparse)
732 super().__init__(fileobj)
733#class ExFileObject
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000734
735#------------------
736# Exported Classes
737#------------------
738class TarInfo(object):
739 """Informational class which holds the details about an
740 archive member given by a tar header block.
741 TarInfo objects are returned by TarFile.getmember(),
742 TarFile.getmembers() and TarFile.gettarinfo() and are
743 usually created internally.
744 """
745
Lars Gustäbelc2ea8c62008-04-14 10:05:48 +0000746 __slots__ = ("name", "mode", "uid", "gid", "size", "mtime",
747 "chksum", "type", "linkname", "uname", "gname",
748 "devmajor", "devminor",
749 "offset", "offset_data", "pax_headers", "sparse",
750 "tarfile", "_sparse_structs", "_link_target")
751
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000752 def __init__(self, name=""):
753 """Construct a TarInfo object. name is the optional name
754 of the member.
755 """
Guido van Rossumd8faa362007-04-27 19:54:29 +0000756 self.name = name # member name
Guido van Rossumcd16bf62007-06-13 18:07:49 +0000757 self.mode = 0o644 # file permissions
Thomas Wouters477c8d52006-05-27 19:21:47 +0000758 self.uid = 0 # user id
759 self.gid = 0 # group id
760 self.size = 0 # file size
761 self.mtime = 0 # modification time
762 self.chksum = 0 # header checksum
763 self.type = REGTYPE # member type
764 self.linkname = "" # link name
Lars Gustäbel331b8002010-10-04 15:18:47 +0000765 self.uname = "" # user name
766 self.gname = "" # group name
Thomas Wouters477c8d52006-05-27 19:21:47 +0000767 self.devmajor = 0 # device major number
768 self.devminor = 0 # device minor number
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000769
Thomas Wouters477c8d52006-05-27 19:21:47 +0000770 self.offset = 0 # the tar header starts here
771 self.offset_data = 0 # the file's data starts here
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000772
Lars Gustäbelc2ea8c62008-04-14 10:05:48 +0000773 self.sparse = None # sparse member information
Guido van Rossumd8faa362007-04-27 19:54:29 +0000774 self.pax_headers = {} # pax header information
775
776 # In pax headers the "name" and "linkname" field are called
777 # "path" and "linkpath".
778 def _getpath(self):
779 return self.name
780 def _setpath(self, name):
781 self.name = name
782 path = property(_getpath, _setpath)
783
784 def _getlinkpath(self):
785 return self.linkname
786 def _setlinkpath(self, linkname):
787 self.linkname = linkname
788 linkpath = property(_getlinkpath, _setlinkpath)
789
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000790 def __repr__(self):
791 return "<%s %r at %#x>" % (self.__class__.__name__,self.name,id(self))
792
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000793 def get_info(self):
Guido van Rossumd8faa362007-04-27 19:54:29 +0000794 """Return the TarInfo's attributes as a dictionary.
795 """
796 info = {
Lars Gustäbelbfdfdda2009-08-28 19:59:59 +0000797 "name": self.name,
Guido van Rossumcd16bf62007-06-13 18:07:49 +0000798 "mode": self.mode & 0o7777,
Guido van Rossumd8faa362007-04-27 19:54:29 +0000799 "uid": self.uid,
800 "gid": self.gid,
801 "size": self.size,
802 "mtime": self.mtime,
803 "chksum": self.chksum,
804 "type": self.type,
Lars Gustäbelbfdfdda2009-08-28 19:59:59 +0000805 "linkname": self.linkname,
Guido van Rossumd8faa362007-04-27 19:54:29 +0000806 "uname": self.uname,
807 "gname": self.gname,
808 "devmajor": self.devmajor,
809 "devminor": self.devminor
810 }
811
812 if info["type"] == DIRTYPE and not info["name"].endswith("/"):
813 info["name"] += "/"
814
815 return info
816
Victor Stinnerde629d42010-05-05 21:43:57 +0000817 def tobuf(self, format=DEFAULT_FORMAT, encoding=ENCODING, errors="surrogateescape"):
Guido van Rossumd8faa362007-04-27 19:54:29 +0000818 """Return a tar header as a string of 512 byte blocks.
819 """
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000820 info = self.get_info()
Guido van Rossume7ba4952007-06-06 23:52:48 +0000821
Guido van Rossumd8faa362007-04-27 19:54:29 +0000822 if format == USTAR_FORMAT:
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000823 return self.create_ustar_header(info, encoding, errors)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000824 elif format == GNU_FORMAT:
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000825 return self.create_gnu_header(info, encoding, errors)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000826 elif format == PAX_FORMAT:
Lars Gustäbel1465cc22010-05-17 18:02:50 +0000827 return self.create_pax_header(info, encoding)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000828 else:
829 raise ValueError("invalid format")
830
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000831 def create_ustar_header(self, info, encoding, errors):
Guido van Rossumd8faa362007-04-27 19:54:29 +0000832 """Return the object as a ustar header block.
833 """
Guido van Rossumd8faa362007-04-27 19:54:29 +0000834 info["magic"] = POSIX_MAGIC
835
836 if len(info["linkname"]) > LENGTH_LINK:
837 raise ValueError("linkname is too long")
838
839 if len(info["name"]) > LENGTH_NAME:
840 info["prefix"], info["name"] = self._posix_split_name(info["name"])
841
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000842 return self._create_header(info, USTAR_FORMAT, encoding, errors)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000843
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000844 def create_gnu_header(self, info, encoding, errors):
Guido van Rossumd8faa362007-04-27 19:54:29 +0000845 """Return the object as a GNU header block sequence.
846 """
Guido van Rossumd8faa362007-04-27 19:54:29 +0000847 info["magic"] = GNU_MAGIC
848
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000849 buf = b""
Guido van Rossumd8faa362007-04-27 19:54:29 +0000850 if len(info["linkname"]) > LENGTH_LINK:
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000851 buf += self._create_gnu_long_header(info["linkname"], GNUTYPE_LONGLINK, encoding, errors)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000852
853 if len(info["name"]) > LENGTH_NAME:
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000854 buf += self._create_gnu_long_header(info["name"], GNUTYPE_LONGNAME, encoding, errors)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000855
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000856 return buf + self._create_header(info, GNU_FORMAT, encoding, errors)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000857
Lars Gustäbel1465cc22010-05-17 18:02:50 +0000858 def create_pax_header(self, info, encoding):
Guido van Rossumd8faa362007-04-27 19:54:29 +0000859 """Return the object as a ustar header block. If it cannot be
860 represented this way, prepend a pax extended header sequence
861 with supplement information.
862 """
Guido van Rossumd8faa362007-04-27 19:54:29 +0000863 info["magic"] = POSIX_MAGIC
864 pax_headers = self.pax_headers.copy()
865
866 # Test string fields for values that exceed the field length or cannot
867 # be represented in ASCII encoding.
868 for name, hname, length in (
869 ("name", "path", LENGTH_NAME), ("linkname", "linkpath", LENGTH_LINK),
870 ("uname", "uname", 32), ("gname", "gname", 32)):
871
Guido van Rossume7ba4952007-06-06 23:52:48 +0000872 if hname in pax_headers:
873 # The pax header has priority.
874 continue
875
Guido van Rossumd8faa362007-04-27 19:54:29 +0000876 # Try to encode the string as ASCII.
877 try:
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000878 info[name].encode("ascii", "strict")
Guido van Rossumd8faa362007-04-27 19:54:29 +0000879 except UnicodeEncodeError:
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000880 pax_headers[hname] = info[name]
Guido van Rossumd8faa362007-04-27 19:54:29 +0000881 continue
882
Guido van Rossume7ba4952007-06-06 23:52:48 +0000883 if len(info[name]) > length:
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000884 pax_headers[hname] = info[name]
Guido van Rossumd8faa362007-04-27 19:54:29 +0000885
886 # Test number fields for values that exceed the field limit or values
887 # that like to be stored as float.
888 for name, digits in (("uid", 8), ("gid", 8), ("size", 12), ("mtime", 12)):
Guido van Rossume7ba4952007-06-06 23:52:48 +0000889 if name in pax_headers:
890 # The pax header has priority. Avoid overflow.
891 info[name] = 0
892 continue
893
Guido van Rossumd8faa362007-04-27 19:54:29 +0000894 val = info[name]
895 if not 0 <= val < 8 ** (digits - 1) or isinstance(val, float):
Guido van Rossumef87d6e2007-05-02 19:09:54 +0000896 pax_headers[name] = str(val)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000897 info[name] = 0
898
Guido van Rossume7ba4952007-06-06 23:52:48 +0000899 # Create a pax extended header if necessary.
Guido van Rossumd8faa362007-04-27 19:54:29 +0000900 if pax_headers:
Lars Gustäbel1465cc22010-05-17 18:02:50 +0000901 buf = self._create_pax_generic_header(pax_headers, XHDTYPE, encoding)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000902 else:
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000903 buf = b""
Guido van Rossumd8faa362007-04-27 19:54:29 +0000904
Lars Gustäbel3741eff2007-08-21 12:17:05 +0000905 return buf + self._create_header(info, USTAR_FORMAT, "ascii", "replace")
Guido van Rossumd8faa362007-04-27 19:54:29 +0000906
907 @classmethod
Lars Gustäbel3741eff2007-08-21 12:17:05 +0000908 def create_pax_global_header(cls, pax_headers):
Guido van Rossumd8faa362007-04-27 19:54:29 +0000909 """Return the object as a pax global header block sequence.
910 """
Marc-André Lemburg8f36af72011-02-25 15:42:01 +0000911 return cls._create_pax_generic_header(pax_headers, XGLTYPE, "utf-8")
Guido van Rossumd8faa362007-04-27 19:54:29 +0000912
913 def _posix_split_name(self, name):
914 """Split a name longer than 100 chars into a prefix
915 and a name part.
916 """
917 prefix = name[:LENGTH_PREFIX + 1]
918 while prefix and prefix[-1] != "/":
919 prefix = prefix[:-1]
920
921 name = name[len(prefix):]
922 prefix = prefix[:-1]
923
924 if not prefix or len(name) > LENGTH_NAME:
925 raise ValueError("name is too long")
926 return prefix, name
927
928 @staticmethod
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000929 def _create_header(info, format, encoding, errors):
Guido van Rossumd8faa362007-04-27 19:54:29 +0000930 """Return a header block. info is a dictionary with file
931 information, format must be one of the *_FORMAT constants.
932 """
933 parts = [
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000934 stn(info.get("name", ""), 100, encoding, errors),
Guido van Rossumcd16bf62007-06-13 18:07:49 +0000935 itn(info.get("mode", 0) & 0o7777, 8, format),
Guido van Rossumd8faa362007-04-27 19:54:29 +0000936 itn(info.get("uid", 0), 8, format),
937 itn(info.get("gid", 0), 8, format),
938 itn(info.get("size", 0), 12, format),
939 itn(info.get("mtime", 0), 12, format),
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000940 b" ", # checksum field
Guido van Rossumd8faa362007-04-27 19:54:29 +0000941 info.get("type", REGTYPE),
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000942 stn(info.get("linkname", ""), 100, encoding, errors),
943 info.get("magic", POSIX_MAGIC),
Lars Gustäbel331b8002010-10-04 15:18:47 +0000944 stn(info.get("uname", ""), 32, encoding, errors),
945 stn(info.get("gname", ""), 32, encoding, errors),
Guido van Rossumd8faa362007-04-27 19:54:29 +0000946 itn(info.get("devmajor", 0), 8, format),
947 itn(info.get("devminor", 0), 8, format),
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000948 stn(info.get("prefix", ""), 155, encoding, errors)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000949 ]
950
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000951 buf = struct.pack("%ds" % BLOCKSIZE, b"".join(parts))
Guido van Rossumd8faa362007-04-27 19:54:29 +0000952 chksum = calc_chksums(buf[-BLOCKSIZE:])[0]
Lars Gustäbela280ca752007-08-28 07:34:33 +0000953 buf = buf[:-364] + bytes("%06o\0" % chksum, "ascii") + buf[-357:]
Guido van Rossumd8faa362007-04-27 19:54:29 +0000954 return buf
955
956 @staticmethod
957 def _create_payload(payload):
958 """Return the string payload filled with zero bytes
959 up to the next 512 byte border.
960 """
961 blocks, remainder = divmod(len(payload), BLOCKSIZE)
962 if remainder > 0:
963 payload += (BLOCKSIZE - remainder) * NUL
964 return payload
965
966 @classmethod
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000967 def _create_gnu_long_header(cls, name, type, encoding, errors):
Guido van Rossumd8faa362007-04-27 19:54:29 +0000968 """Return a GNUTYPE_LONGNAME or GNUTYPE_LONGLINK sequence
969 for name.
970 """
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000971 name = name.encode(encoding, errors) + NUL
Guido van Rossumd8faa362007-04-27 19:54:29 +0000972
973 info = {}
974 info["name"] = "././@LongLink"
975 info["type"] = type
976 info["size"] = len(name)
977 info["magic"] = GNU_MAGIC
978
979 # create extended header + name blocks.
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000980 return cls._create_header(info, USTAR_FORMAT, encoding, errors) + \
Guido van Rossumd8faa362007-04-27 19:54:29 +0000981 cls._create_payload(name)
982
983 @classmethod
Lars Gustäbel1465cc22010-05-17 18:02:50 +0000984 def _create_pax_generic_header(cls, pax_headers, type, encoding):
985 """Return a POSIX.1-2008 extended or global header sequence
Guido van Rossumd8faa362007-04-27 19:54:29 +0000986 that contains a list of keyword, value pairs. The values
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000987 must be strings.
Guido van Rossumd8faa362007-04-27 19:54:29 +0000988 """
Lars Gustäbel1465cc22010-05-17 18:02:50 +0000989 # Check if one of the fields contains surrogate characters and thereby
990 # forces hdrcharset=BINARY, see _proc_pax() for more information.
991 binary = False
992 for keyword, value in pax_headers.items():
993 try:
Marc-André Lemburg8f36af72011-02-25 15:42:01 +0000994 value.encode("utf-8", "strict")
Lars Gustäbel1465cc22010-05-17 18:02:50 +0000995 except UnicodeEncodeError:
996 binary = True
997 break
998
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000999 records = b""
Lars Gustäbel1465cc22010-05-17 18:02:50 +00001000 if binary:
1001 # Put the hdrcharset field at the beginning of the header.
1002 records += b"21 hdrcharset=BINARY\n"
1003
Guido van Rossumd8faa362007-04-27 19:54:29 +00001004 for keyword, value in pax_headers.items():
Marc-André Lemburg8f36af72011-02-25 15:42:01 +00001005 keyword = keyword.encode("utf-8")
Lars Gustäbel1465cc22010-05-17 18:02:50 +00001006 if binary:
1007 # Try to restore the original byte representation of `value'.
1008 # Needless to say, that the encoding must match the string.
1009 value = value.encode(encoding, "surrogateescape")
1010 else:
Marc-André Lemburg8f36af72011-02-25 15:42:01 +00001011 value = value.encode("utf-8")
Lars Gustäbel1465cc22010-05-17 18:02:50 +00001012
Guido van Rossumd8faa362007-04-27 19:54:29 +00001013 l = len(keyword) + len(value) + 3 # ' ' + '=' + '\n'
1014 n = p = 0
1015 while True:
1016 n = l + len(str(p))
1017 if n == p:
1018 break
1019 p = n
Lars Gustäbela280ca752007-08-28 07:34:33 +00001020 records += bytes(str(p), "ascii") + b" " + keyword + b"=" + value + b"\n"
Guido van Rossumd8faa362007-04-27 19:54:29 +00001021
1022 # We use a hardcoded "././@PaxHeader" name like star does
1023 # instead of the one that POSIX recommends.
1024 info = {}
1025 info["name"] = "././@PaxHeader"
1026 info["type"] = type
1027 info["size"] = len(records)
1028 info["magic"] = POSIX_MAGIC
1029
1030 # Create pax header + record blocks.
Lars Gustäbel3741eff2007-08-21 12:17:05 +00001031 return cls._create_header(info, USTAR_FORMAT, "ascii", "replace") + \
Guido van Rossumd8faa362007-04-27 19:54:29 +00001032 cls._create_payload(records)
1033
Guido van Rossum75b64e62005-01-16 00:16:11 +00001034 @classmethod
Lars Gustäbelb506dc32007-08-07 18:36:16 +00001035 def frombuf(cls, buf, encoding, errors):
1036 """Construct a TarInfo object from a 512 byte bytes object.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001037 """
Lars Gustäbel9520a432009-11-22 18:48:49 +00001038 if len(buf) == 0:
1039 raise EmptyHeaderError("empty header")
Thomas Wouters477c8d52006-05-27 19:21:47 +00001040 if len(buf) != BLOCKSIZE:
Lars Gustäbel9520a432009-11-22 18:48:49 +00001041 raise TruncatedHeaderError("truncated header")
Thomas Wouters477c8d52006-05-27 19:21:47 +00001042 if buf.count(NUL) == BLOCKSIZE:
Lars Gustäbel9520a432009-11-22 18:48:49 +00001043 raise EOFHeaderError("end of file header")
Thomas Wouters902d6eb2007-01-09 23:18:33 +00001044
1045 chksum = nti(buf[148:156])
1046 if chksum not in calc_chksums(buf):
Lars Gustäbel9520a432009-11-22 18:48:49 +00001047 raise InvalidHeaderError("bad checksum")
Thomas Wouters477c8d52006-05-27 19:21:47 +00001048
Guido van Rossumd8faa362007-04-27 19:54:29 +00001049 obj = cls()
Lars Gustäbelb506dc32007-08-07 18:36:16 +00001050 obj.name = nts(buf[0:100], encoding, errors)
Guido van Rossumd8faa362007-04-27 19:54:29 +00001051 obj.mode = nti(buf[100:108])
1052 obj.uid = nti(buf[108:116])
1053 obj.gid = nti(buf[116:124])
1054 obj.size = nti(buf[124:136])
1055 obj.mtime = nti(buf[136:148])
1056 obj.chksum = chksum
1057 obj.type = buf[156:157]
Lars Gustäbelb506dc32007-08-07 18:36:16 +00001058 obj.linkname = nts(buf[157:257], encoding, errors)
1059 obj.uname = nts(buf[265:297], encoding, errors)
1060 obj.gname = nts(buf[297:329], encoding, errors)
Guido van Rossumd8faa362007-04-27 19:54:29 +00001061 obj.devmajor = nti(buf[329:337])
1062 obj.devminor = nti(buf[337:345])
Lars Gustäbelb506dc32007-08-07 18:36:16 +00001063 prefix = nts(buf[345:500], encoding, errors)
Thomas Wouters89f507f2006-12-13 04:49:30 +00001064
Guido van Rossumd8faa362007-04-27 19:54:29 +00001065 # Old V7 tar format represents a directory as a regular
1066 # file with a trailing slash.
1067 if obj.type == AREGTYPE and obj.name.endswith("/"):
1068 obj.type = DIRTYPE
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001069
Lars Gustäbelc2ea8c62008-04-14 10:05:48 +00001070 # The old GNU sparse format occupies some of the unused
1071 # space in the buffer for up to 4 sparse structures.
1072 # Save the them for later processing in _proc_sparse().
1073 if obj.type == GNUTYPE_SPARSE:
1074 pos = 386
1075 structs = []
1076 for i in range(4):
1077 try:
1078 offset = nti(buf[pos:pos + 12])
1079 numbytes = nti(buf[pos + 12:pos + 24])
1080 except ValueError:
1081 break
1082 structs.append((offset, numbytes))
1083 pos += 24
1084 isextended = bool(buf[482])
1085 origsize = nti(buf[483:495])
1086 obj._sparse_structs = (structs, isextended, origsize)
1087
Guido van Rossumd8faa362007-04-27 19:54:29 +00001088 # Remove redundant slashes from directories.
1089 if obj.isdir():
1090 obj.name = obj.name.rstrip("/")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001091
Guido van Rossumd8faa362007-04-27 19:54:29 +00001092 # Reconstruct a ustar longname.
1093 if prefix and obj.type not in GNU_TYPES:
1094 obj.name = prefix + "/" + obj.name
1095 return obj
1096
1097 @classmethod
1098 def fromtarfile(cls, tarfile):
1099 """Return the next TarInfo object from TarFile object
1100 tarfile.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001101 """
Guido van Rossumd8faa362007-04-27 19:54:29 +00001102 buf = tarfile.fileobj.read(BLOCKSIZE)
Lars Gustäbelb506dc32007-08-07 18:36:16 +00001103 obj = cls.frombuf(buf, tarfile.encoding, tarfile.errors)
Guido van Rossumd8faa362007-04-27 19:54:29 +00001104 obj.offset = tarfile.fileobj.tell() - BLOCKSIZE
1105 return obj._proc_member(tarfile)
Thomas Wouters89f507f2006-12-13 04:49:30 +00001106
Guido van Rossumd8faa362007-04-27 19:54:29 +00001107 #--------------------------------------------------------------------------
1108 # The following are methods that are called depending on the type of a
1109 # member. The entry point is _proc_member() which can be overridden in a
1110 # subclass to add custom _proc_*() methods. A _proc_*() method MUST
1111 # implement the following
1112 # operations:
1113 # 1. Set self.offset_data to the position where the data blocks begin,
1114 # if there is data that follows.
1115 # 2. Set tarfile.offset to the position where the next member's header will
1116 # begin.
1117 # 3. Return self or another valid TarInfo object.
1118 def _proc_member(self, tarfile):
1119 """Choose the right processing method depending on
1120 the type and call it.
Thomas Wouters89f507f2006-12-13 04:49:30 +00001121 """
Guido van Rossumd8faa362007-04-27 19:54:29 +00001122 if self.type in (GNUTYPE_LONGNAME, GNUTYPE_LONGLINK):
1123 return self._proc_gnulong(tarfile)
1124 elif self.type == GNUTYPE_SPARSE:
1125 return self._proc_sparse(tarfile)
1126 elif self.type in (XHDTYPE, XGLTYPE, SOLARIS_XHDTYPE):
1127 return self._proc_pax(tarfile)
1128 else:
1129 return self._proc_builtin(tarfile)
Thomas Wouters89f507f2006-12-13 04:49:30 +00001130
Guido van Rossumd8faa362007-04-27 19:54:29 +00001131 def _proc_builtin(self, tarfile):
1132 """Process a builtin type or an unknown type which
1133 will be treated as a regular file.
1134 """
1135 self.offset_data = tarfile.fileobj.tell()
1136 offset = self.offset_data
1137 if self.isreg() or self.type not in SUPPORTED_TYPES:
1138 # Skip the following data blocks.
1139 offset += self._block(self.size)
1140 tarfile.offset = offset
Thomas Wouters89f507f2006-12-13 04:49:30 +00001141
Guido van Rossume7ba4952007-06-06 23:52:48 +00001142 # Patch the TarInfo object with saved global
Guido van Rossumd8faa362007-04-27 19:54:29 +00001143 # header information.
Guido van Rossume7ba4952007-06-06 23:52:48 +00001144 self._apply_pax_info(tarfile.pax_headers, tarfile.encoding, tarfile.errors)
Guido van Rossumd8faa362007-04-27 19:54:29 +00001145
1146 return self
1147
1148 def _proc_gnulong(self, tarfile):
1149 """Process the blocks that hold a GNU longname
1150 or longlink member.
1151 """
1152 buf = tarfile.fileobj.read(self._block(self.size))
1153
1154 # Fetch the next header and process it.
Lars Gustäbel9520a432009-11-22 18:48:49 +00001155 try:
1156 next = self.fromtarfile(tarfile)
1157 except HeaderError:
1158 raise SubsequentHeaderError("missing or bad subsequent header")
Guido van Rossumd8faa362007-04-27 19:54:29 +00001159
1160 # Patch the TarInfo object from the next header with
1161 # the longname information.
1162 next.offset = self.offset
1163 if self.type == GNUTYPE_LONGNAME:
Lars Gustäbelb506dc32007-08-07 18:36:16 +00001164 next.name = nts(buf, tarfile.encoding, tarfile.errors)
Guido van Rossumd8faa362007-04-27 19:54:29 +00001165 elif self.type == GNUTYPE_LONGLINK:
Lars Gustäbelb506dc32007-08-07 18:36:16 +00001166 next.linkname = nts(buf, tarfile.encoding, tarfile.errors)
Guido van Rossumd8faa362007-04-27 19:54:29 +00001167
1168 return next
1169
1170 def _proc_sparse(self, tarfile):
1171 """Process a GNU sparse header plus extra headers.
1172 """
Lars Gustäbelc2ea8c62008-04-14 10:05:48 +00001173 # We already collected some sparse structures in frombuf().
1174 structs, isextended, origsize = self._sparse_structs
1175 del self._sparse_structs
Guido van Rossumd8faa362007-04-27 19:54:29 +00001176
Lars Gustäbelc2ea8c62008-04-14 10:05:48 +00001177 # Collect sparse structures from extended header blocks.
Lars Gustäbelb506dc32007-08-07 18:36:16 +00001178 while isextended:
Guido van Rossumd8faa362007-04-27 19:54:29 +00001179 buf = tarfile.fileobj.read(BLOCKSIZE)
1180 pos = 0
Guido van Rossum805365e2007-05-07 22:24:25 +00001181 for i in range(21):
Guido van Rossumd8faa362007-04-27 19:54:29 +00001182 try:
1183 offset = nti(buf[pos:pos + 12])
1184 numbytes = nti(buf[pos + 12:pos + 24])
1185 except ValueError:
1186 break
Lars Gustäbel9cbdd752010-10-29 09:08:19 +00001187 if offset and numbytes:
1188 structs.append((offset, numbytes))
Guido van Rossumd8faa362007-04-27 19:54:29 +00001189 pos += 24
Lars Gustäbelb506dc32007-08-07 18:36:16 +00001190 isextended = bool(buf[504])
Lars Gustäbel9cbdd752010-10-29 09:08:19 +00001191 self.sparse = structs
Guido van Rossumd8faa362007-04-27 19:54:29 +00001192
1193 self.offset_data = tarfile.fileobj.tell()
1194 tarfile.offset = self.offset_data + self._block(self.size)
1195 self.size = origsize
Guido van Rossumd8faa362007-04-27 19:54:29 +00001196 return self
1197
1198 def _proc_pax(self, tarfile):
1199 """Process an extended or global header as described in
Lars Gustäbel1465cc22010-05-17 18:02:50 +00001200 POSIX.1-2008.
Guido van Rossumd8faa362007-04-27 19:54:29 +00001201 """
1202 # Read the header information.
1203 buf = tarfile.fileobj.read(self._block(self.size))
1204
1205 # A pax header stores supplemental information for either
1206 # the following file (extended) or all following files
1207 # (global).
1208 if self.type == XGLTYPE:
1209 pax_headers = tarfile.pax_headers
1210 else:
1211 pax_headers = tarfile.pax_headers.copy()
1212
Lars Gustäbel1465cc22010-05-17 18:02:50 +00001213 # Check if the pax header contains a hdrcharset field. This tells us
1214 # the encoding of the path, linkpath, uname and gname fields. Normally,
1215 # these fields are UTF-8 encoded but since POSIX.1-2008 tar
1216 # implementations are allowed to store them as raw binary strings if
1217 # the translation to UTF-8 fails.
1218 match = re.search(br"\d+ hdrcharset=([^\n]+)\n", buf)
1219 if match is not None:
Marc-André Lemburg8f36af72011-02-25 15:42:01 +00001220 pax_headers["hdrcharset"] = match.group(1).decode("utf-8")
Lars Gustäbel1465cc22010-05-17 18:02:50 +00001221
1222 # For the time being, we don't care about anything other than "BINARY".
1223 # The only other value that is currently allowed by the standard is
1224 # "ISO-IR 10646 2000 UTF-8" in other words UTF-8.
1225 hdrcharset = pax_headers.get("hdrcharset")
1226 if hdrcharset == "BINARY":
1227 encoding = tarfile.encoding
1228 else:
Marc-André Lemburg8f36af72011-02-25 15:42:01 +00001229 encoding = "utf-8"
Lars Gustäbel1465cc22010-05-17 18:02:50 +00001230
Guido van Rossumd8faa362007-04-27 19:54:29 +00001231 # Parse pax header information. A record looks like that:
1232 # "%d %s=%s\n" % (length, keyword, value). length is the size
1233 # of the complete record including the length field itself and
Guido van Rossume7ba4952007-06-06 23:52:48 +00001234 # the newline. keyword and value are both UTF-8 encoded strings.
Antoine Pitroufd036452008-08-19 17:56:33 +00001235 regex = re.compile(br"(\d+) ([^=]+)=")
Guido van Rossumd8faa362007-04-27 19:54:29 +00001236 pos = 0
1237 while True:
1238 match = regex.match(buf, pos)
1239 if not match:
1240 break
1241
1242 length, keyword = match.groups()
1243 length = int(length)
1244 value = buf[match.end(2) + 1:match.start(1) + length - 1]
1245
Marc-André Lemburg8f36af72011-02-25 15:42:01 +00001246 # Normally, we could just use "utf-8" as the encoding and "strict"
Lars Gustäbel1465cc22010-05-17 18:02:50 +00001247 # as the error handler, but we better not take the risk. For
1248 # example, GNU tar <= 1.23 is known to store filenames it cannot
1249 # translate to UTF-8 as raw strings (unfortunately without a
1250 # hdrcharset=BINARY header).
1251 # We first try the strict standard encoding, and if that fails we
1252 # fall back on the user's encoding and error handler.
Marc-André Lemburg8f36af72011-02-25 15:42:01 +00001253 keyword = self._decode_pax_field(keyword, "utf-8", "utf-8",
Lars Gustäbel1465cc22010-05-17 18:02:50 +00001254 tarfile.errors)
1255 if keyword in PAX_NAME_FIELDS:
1256 value = self._decode_pax_field(value, encoding, tarfile.encoding,
1257 tarfile.errors)
1258 else:
Marc-André Lemburg8f36af72011-02-25 15:42:01 +00001259 value = self._decode_pax_field(value, "utf-8", "utf-8",
Lars Gustäbel1465cc22010-05-17 18:02:50 +00001260 tarfile.errors)
Guido van Rossumd8faa362007-04-27 19:54:29 +00001261
1262 pax_headers[keyword] = value
1263 pos += length
1264
Guido van Rossume7ba4952007-06-06 23:52:48 +00001265 # Fetch the next header.
Lars Gustäbel9520a432009-11-22 18:48:49 +00001266 try:
1267 next = self.fromtarfile(tarfile)
1268 except HeaderError:
1269 raise SubsequentHeaderError("missing or bad subsequent header")
Guido van Rossumd8faa362007-04-27 19:54:29 +00001270
Lars Gustäbel9cbdd752010-10-29 09:08:19 +00001271 # Process GNU sparse information.
1272 if "GNU.sparse.map" in pax_headers:
1273 # GNU extended sparse format version 0.1.
1274 self._proc_gnusparse_01(next, pax_headers)
1275
1276 elif "GNU.sparse.size" in pax_headers:
1277 # GNU extended sparse format version 0.0.
1278 self._proc_gnusparse_00(next, pax_headers, buf)
1279
1280 elif pax_headers.get("GNU.sparse.major") == "1" and pax_headers.get("GNU.sparse.minor") == "0":
1281 # GNU extended sparse format version 1.0.
1282 self._proc_gnusparse_10(next, pax_headers, tarfile)
1283
Guido van Rossume7ba4952007-06-06 23:52:48 +00001284 if self.type in (XHDTYPE, SOLARIS_XHDTYPE):
Guido van Rossume7ba4952007-06-06 23:52:48 +00001285 # Patch the TarInfo object with the extended header info.
1286 next._apply_pax_info(pax_headers, tarfile.encoding, tarfile.errors)
1287 next.offset = self.offset
1288
1289 if "size" in pax_headers:
1290 # If the extended header replaces the size field,
1291 # we need to recalculate the offset where the next
1292 # header starts.
1293 offset = next.offset_data
1294 if next.isreg() or next.type not in SUPPORTED_TYPES:
1295 offset += next._block(next.size)
1296 tarfile.offset = offset
1297
1298 return next
1299
Lars Gustäbel9cbdd752010-10-29 09:08:19 +00001300 def _proc_gnusparse_00(self, next, pax_headers, buf):
1301 """Process a GNU tar extended sparse header, version 0.0.
1302 """
1303 offsets = []
1304 for match in re.finditer(br"\d+ GNU.sparse.offset=(\d+)\n", buf):
1305 offsets.append(int(match.group(1)))
1306 numbytes = []
1307 for match in re.finditer(br"\d+ GNU.sparse.numbytes=(\d+)\n", buf):
1308 numbytes.append(int(match.group(1)))
1309 next.sparse = list(zip(offsets, numbytes))
1310
1311 def _proc_gnusparse_01(self, next, pax_headers):
1312 """Process a GNU tar extended sparse header, version 0.1.
1313 """
1314 sparse = [int(x) for x in pax_headers["GNU.sparse.map"].split(",")]
1315 next.sparse = list(zip(sparse[::2], sparse[1::2]))
1316
1317 def _proc_gnusparse_10(self, next, pax_headers, tarfile):
1318 """Process a GNU tar extended sparse header, version 1.0.
1319 """
1320 fields = None
1321 sparse = []
1322 buf = tarfile.fileobj.read(BLOCKSIZE)
1323 fields, buf = buf.split(b"\n", 1)
1324 fields = int(fields)
1325 while len(sparse) < fields * 2:
1326 if b"\n" not in buf:
1327 buf += tarfile.fileobj.read(BLOCKSIZE)
1328 number, buf = buf.split(b"\n", 1)
1329 sparse.append(int(number))
1330 next.offset_data = tarfile.fileobj.tell()
1331 next.sparse = list(zip(sparse[::2], sparse[1::2]))
1332
Guido van Rossume7ba4952007-06-06 23:52:48 +00001333 def _apply_pax_info(self, pax_headers, encoding, errors):
1334 """Replace fields with supplemental information from a previous
1335 pax extended or global header.
1336 """
1337 for keyword, value in pax_headers.items():
Lars Gustäbel9cbdd752010-10-29 09:08:19 +00001338 if keyword == "GNU.sparse.name":
1339 setattr(self, "path", value)
1340 elif keyword == "GNU.sparse.size":
1341 setattr(self, "size", int(value))
1342 elif keyword == "GNU.sparse.realsize":
1343 setattr(self, "size", int(value))
1344 elif keyword in PAX_FIELDS:
1345 if keyword in PAX_NUMBER_FIELDS:
1346 try:
1347 value = PAX_NUMBER_FIELDS[keyword](value)
1348 except ValueError:
1349 value = 0
1350 if keyword == "path":
1351 value = value.rstrip("/")
1352 setattr(self, keyword, value)
Guido van Rossume7ba4952007-06-06 23:52:48 +00001353
1354 self.pax_headers = pax_headers.copy()
Guido van Rossumd8faa362007-04-27 19:54:29 +00001355
Lars Gustäbel1465cc22010-05-17 18:02:50 +00001356 def _decode_pax_field(self, value, encoding, fallback_encoding, fallback_errors):
1357 """Decode a single field from a pax record.
1358 """
1359 try:
1360 return value.decode(encoding, "strict")
1361 except UnicodeDecodeError:
1362 return value.decode(fallback_encoding, fallback_errors)
1363
Guido van Rossumd8faa362007-04-27 19:54:29 +00001364 def _block(self, count):
1365 """Round up a byte count by BLOCKSIZE and return it,
1366 e.g. _block(834) => 1024.
1367 """
1368 blocks, remainder = divmod(count, BLOCKSIZE)
1369 if remainder:
1370 blocks += 1
1371 return blocks * BLOCKSIZE
Thomas Wouters89f507f2006-12-13 04:49:30 +00001372
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001373 def isreg(self):
1374 return self.type in REGULAR_TYPES
1375 def isfile(self):
1376 return self.isreg()
1377 def isdir(self):
1378 return self.type == DIRTYPE
1379 def issym(self):
1380 return self.type == SYMTYPE
1381 def islnk(self):
1382 return self.type == LNKTYPE
1383 def ischr(self):
1384 return self.type == CHRTYPE
1385 def isblk(self):
1386 return self.type == BLKTYPE
1387 def isfifo(self):
1388 return self.type == FIFOTYPE
1389 def issparse(self):
Lars Gustäbel9cbdd752010-10-29 09:08:19 +00001390 return self.sparse is not None
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001391 def isdev(self):
1392 return self.type in (CHRTYPE, BLKTYPE, FIFOTYPE)
1393# class TarInfo
1394
1395class TarFile(object):
1396 """The TarFile Class provides an interface to tar archives.
1397 """
1398
1399 debug = 0 # May be set from 0 (no msgs) to 3 (all msgs)
1400
1401 dereference = False # If true, add content of linked file to the
1402 # tar file, else the link.
1403
1404 ignore_zeros = False # If true, skips empty or invalid blocks and
1405 # continues processing.
1406
Lars Gustäbel365aff32009-12-13 11:42:29 +00001407 errorlevel = 1 # If 0, fatal errors only appear in debug
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001408 # messages (if debug >= 0). If > 0, errors
1409 # are passed to the caller as exceptions.
1410
Guido van Rossumd8faa362007-04-27 19:54:29 +00001411 format = DEFAULT_FORMAT # The format to use when creating an archive.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001412
Guido van Rossume7ba4952007-06-06 23:52:48 +00001413 encoding = ENCODING # Encoding for 8-bit character strings.
1414
1415 errors = None # Error handler for unicode conversion.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001416
Guido van Rossumd8faa362007-04-27 19:54:29 +00001417 tarinfo = TarInfo # The default TarInfo class to use.
1418
Lars Gustäbelb062a2f2012-05-14 13:18:16 +02001419 fileobject = ExFileObject # The file-object for extractfile().
Guido van Rossumd8faa362007-04-27 19:54:29 +00001420
1421 def __init__(self, name=None, mode="r", fileobj=None, format=None,
1422 tarinfo=None, dereference=None, ignore_zeros=None, encoding=None,
Victor Stinnerde629d42010-05-05 21:43:57 +00001423 errors="surrogateescape", pax_headers=None, debug=None, errorlevel=None):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001424 """Open an (uncompressed) tar archive `name'. `mode' is either 'r' to
1425 read from an existing archive, 'a' to append data to an existing
1426 file or 'w' to create a new file overwriting an existing one. `mode'
1427 defaults to 'r'.
1428 If `fileobj' is given, it is used for reading or writing data. If it
1429 can be determined, `mode' is overridden by `fileobj's mode.
1430 `fileobj' is not closed, when TarFile is closed.
1431 """
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001432 if len(mode) > 1 or mode not in "raw":
Thomas Wouters477c8d52006-05-27 19:21:47 +00001433 raise ValueError("mode must be 'r', 'a' or 'w'")
Guido van Rossumd8faa362007-04-27 19:54:29 +00001434 self.mode = mode
1435 self._mode = {"r": "rb", "a": "r+b", "w": "wb"}[mode]
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001436
1437 if not fileobj:
Guido van Rossumd8faa362007-04-27 19:54:29 +00001438 if self.mode == "a" and not os.path.exists(name):
Thomas Wouterscf297e42007-02-23 15:07:44 +00001439 # Create nonexistent files in append mode.
Guido van Rossumd8faa362007-04-27 19:54:29 +00001440 self.mode = "w"
1441 self._mode = "wb"
Guido van Rossume7ba4952007-06-06 23:52:48 +00001442 fileobj = bltn_open(name, self._mode)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001443 self._extfileobj = False
1444 else:
Guido van Rossumd8faa362007-04-27 19:54:29 +00001445 if name is None and hasattr(fileobj, "name"):
1446 name = fileobj.name
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001447 if hasattr(fileobj, "mode"):
Guido van Rossumd8faa362007-04-27 19:54:29 +00001448 self._mode = fileobj.mode
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001449 self._extfileobj = True
Thomas Woutersed03b412007-08-28 21:37:11 +00001450 self.name = os.path.abspath(name) if name else None
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001451 self.fileobj = fileobj
1452
Guido van Rossumd8faa362007-04-27 19:54:29 +00001453 # Init attributes.
1454 if format is not None:
1455 self.format = format
1456 if tarinfo is not None:
1457 self.tarinfo = tarinfo
1458 if dereference is not None:
1459 self.dereference = dereference
1460 if ignore_zeros is not None:
1461 self.ignore_zeros = ignore_zeros
1462 if encoding is not None:
1463 self.encoding = encoding
Victor Stinnerde629d42010-05-05 21:43:57 +00001464 self.errors = errors
Guido van Rossume7ba4952007-06-06 23:52:48 +00001465
1466 if pax_headers is not None and self.format == PAX_FORMAT:
1467 self.pax_headers = pax_headers
1468 else:
1469 self.pax_headers = {}
1470
Guido van Rossumd8faa362007-04-27 19:54:29 +00001471 if debug is not None:
1472 self.debug = debug
1473 if errorlevel is not None:
1474 self.errorlevel = errorlevel
1475
1476 # Init datastructures.
Thomas Wouters477c8d52006-05-27 19:21:47 +00001477 self.closed = False
1478 self.members = [] # list of members as TarInfo objects
1479 self._loaded = False # flag if all members have been read
Christian Heimesd8654cf2007-12-02 15:22:16 +00001480 self.offset = self.fileobj.tell()
1481 # current position in the archive file
Thomas Wouters477c8d52006-05-27 19:21:47 +00001482 self.inodes = {} # dictionary caching the inodes of
1483 # archive members already added
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001484
Lars Gustäbel7b465392009-11-18 20:29:25 +00001485 try:
1486 if self.mode == "r":
1487 self.firstmember = None
1488 self.firstmember = self.next()
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001489
Lars Gustäbel7b465392009-11-18 20:29:25 +00001490 if self.mode == "a":
1491 # Move to the end of the archive,
1492 # before the first empty block.
Lars Gustäbel7b465392009-11-18 20:29:25 +00001493 while True:
Lars Gustäbel9520a432009-11-22 18:48:49 +00001494 self.fileobj.seek(self.offset)
1495 try:
1496 tarinfo = self.tarinfo.fromtarfile(self)
1497 self.members.append(tarinfo)
1498 except EOFHeaderError:
1499 self.fileobj.seek(self.offset)
Lars Gustäbel7b465392009-11-18 20:29:25 +00001500 break
Lars Gustäbel9520a432009-11-22 18:48:49 +00001501 except HeaderError as e:
1502 raise ReadError(str(e))
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001503
Lars Gustäbel7b465392009-11-18 20:29:25 +00001504 if self.mode in "aw":
1505 self._loaded = True
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001506
Lars Gustäbel7b465392009-11-18 20:29:25 +00001507 if self.pax_headers:
1508 buf = self.tarinfo.create_pax_global_header(self.pax_headers.copy())
1509 self.fileobj.write(buf)
1510 self.offset += len(buf)
1511 except:
1512 if not self._extfileobj:
1513 self.fileobj.close()
1514 self.closed = True
1515 raise
Guido van Rossumd8faa362007-04-27 19:54:29 +00001516
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001517 #--------------------------------------------------------------------------
1518 # Below are the classmethods which act as alternate constructors to the
1519 # TarFile class. The open() method is the only one that is needed for
1520 # public use; it is the "super"-constructor and is able to select an
1521 # adequate "sub"-constructor for a particular compression using the mapping
1522 # from OPEN_METH.
1523 #
1524 # This concept allows one to subclass TarFile without losing the comfort of
1525 # the super-constructor. A sub-constructor is registered and made available
1526 # by adding it to the mapping in OPEN_METH.
1527
Guido van Rossum75b64e62005-01-16 00:16:11 +00001528 @classmethod
Guido van Rossumd8faa362007-04-27 19:54:29 +00001529 def open(cls, name=None, mode="r", fileobj=None, bufsize=RECORDSIZE, **kwargs):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001530 """Open a tar archive for reading, writing or appending. Return
1531 an appropriate TarFile class.
1532
1533 mode:
Martin v. Löwis78be7df2005-03-05 12:47:42 +00001534 'r' or 'r:*' open for reading with transparent compression
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001535 'r:' open for reading exclusively uncompressed
1536 'r:gz' open for reading with gzip compression
1537 'r:bz2' open for reading with bzip2 compression
Lars Gustäbel0a9dd2f2011-12-10 20:38:14 +01001538 'r:xz' open for reading with lzma compression
Thomas Wouterscf297e42007-02-23 15:07:44 +00001539 'a' or 'a:' open for appending, creating the file if necessary
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001540 'w' or 'w:' open for writing without compression
1541 'w:gz' open for writing with gzip compression
1542 'w:bz2' open for writing with bzip2 compression
Lars Gustäbel0a9dd2f2011-12-10 20:38:14 +01001543 'w:xz' open for writing with lzma compression
Martin v. Löwis78be7df2005-03-05 12:47:42 +00001544
1545 'r|*' open a stream of tar blocks with transparent compression
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001546 'r|' open an uncompressed stream of tar blocks for reading
1547 'r|gz' open a gzip compressed stream of tar blocks
1548 'r|bz2' open a bzip2 compressed stream of tar blocks
Lars Gustäbel0a9dd2f2011-12-10 20:38:14 +01001549 'r|xz' open an lzma compressed stream of tar blocks
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001550 'w|' open an uncompressed stream for writing
1551 'w|gz' open a gzip compressed stream for writing
1552 'w|bz2' open a bzip2 compressed stream for writing
Lars Gustäbel0a9dd2f2011-12-10 20:38:14 +01001553 'w|xz' open an lzma compressed stream for writing
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001554 """
1555
1556 if not name and not fileobj:
Thomas Wouters477c8d52006-05-27 19:21:47 +00001557 raise ValueError("nothing to open")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001558
Martin v. Löwis78be7df2005-03-05 12:47:42 +00001559 if mode in ("r", "r:*"):
1560 # Find out which *open() is appropriate for opening the file.
1561 for comptype in cls.OPEN_METH:
1562 func = getattr(cls, cls.OPEN_METH[comptype])
Thomas Wouters902d6eb2007-01-09 23:18:33 +00001563 if fileobj is not None:
1564 saved_pos = fileobj.tell()
Martin v. Löwis78be7df2005-03-05 12:47:42 +00001565 try:
Guido van Rossumd8faa362007-04-27 19:54:29 +00001566 return func(name, "r", fileobj, **kwargs)
1567 except (ReadError, CompressionError) as e:
Thomas Wouters902d6eb2007-01-09 23:18:33 +00001568 if fileobj is not None:
1569 fileobj.seek(saved_pos)
Martin v. Löwis78be7df2005-03-05 12:47:42 +00001570 continue
Thomas Wouters477c8d52006-05-27 19:21:47 +00001571 raise ReadError("file could not be opened successfully")
Martin v. Löwis78be7df2005-03-05 12:47:42 +00001572
1573 elif ":" in mode:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001574 filemode, comptype = mode.split(":", 1)
1575 filemode = filemode or "r"
1576 comptype = comptype or "tar"
1577
1578 # Select the *open() function according to
1579 # given compression.
1580 if comptype in cls.OPEN_METH:
1581 func = getattr(cls, cls.OPEN_METH[comptype])
1582 else:
Thomas Wouters477c8d52006-05-27 19:21:47 +00001583 raise CompressionError("unknown compression type %r" % comptype)
Guido van Rossumd8faa362007-04-27 19:54:29 +00001584 return func(name, filemode, fileobj, **kwargs)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001585
1586 elif "|" in mode:
1587 filemode, comptype = mode.split("|", 1)
1588 filemode = filemode or "r"
1589 comptype = comptype or "tar"
1590
1591 if filemode not in "rw":
Thomas Wouters477c8d52006-05-27 19:21:47 +00001592 raise ValueError("mode must be 'r' or 'w'")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001593
Antoine Pitrou605c2932010-09-23 20:15:14 +00001594 stream = _Stream(name, filemode, comptype, fileobj, bufsize)
1595 try:
1596 t = cls(name, filemode, stream, **kwargs)
1597 except:
1598 stream.close()
1599 raise
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001600 t._extfileobj = False
1601 return t
1602
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001603 elif mode in "aw":
Guido van Rossumd8faa362007-04-27 19:54:29 +00001604 return cls.taropen(name, mode, fileobj, **kwargs)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001605
Thomas Wouters477c8d52006-05-27 19:21:47 +00001606 raise ValueError("undiscernible mode")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001607
Guido van Rossum75b64e62005-01-16 00:16:11 +00001608 @classmethod
Guido van Rossumd8faa362007-04-27 19:54:29 +00001609 def taropen(cls, name, mode="r", fileobj=None, **kwargs):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001610 """Open uncompressed tar archive name for reading or writing.
1611 """
1612 if len(mode) > 1 or mode not in "raw":
Thomas Wouters477c8d52006-05-27 19:21:47 +00001613 raise ValueError("mode must be 'r', 'a' or 'w'")
Guido van Rossumd8faa362007-04-27 19:54:29 +00001614 return cls(name, mode, fileobj, **kwargs)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001615
Guido van Rossum75b64e62005-01-16 00:16:11 +00001616 @classmethod
Guido van Rossumd8faa362007-04-27 19:54:29 +00001617 def gzopen(cls, name, mode="r", fileobj=None, compresslevel=9, **kwargs):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001618 """Open gzip compressed tar archive name for reading or writing.
1619 Appending is not allowed.
1620 """
1621 if len(mode) > 1 or mode not in "rw":
Thomas Wouters477c8d52006-05-27 19:21:47 +00001622 raise ValueError("mode must be 'r' or 'w'")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001623
1624 try:
1625 import gzip
Neal Norwitz4ec68242003-04-11 03:05:56 +00001626 gzip.GzipFile
1627 except (ImportError, AttributeError):
Thomas Wouters477c8d52006-05-27 19:21:47 +00001628 raise CompressionError("gzip module is not available")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001629
Antoine Pitroue1eca4e2010-10-29 23:49:49 +00001630 extfileobj = fileobj is not None
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001631 try:
Antoine Pitroue1eca4e2010-10-29 23:49:49 +00001632 fileobj = gzip.GzipFile(name, mode + "b", compresslevel, fileobj)
1633 t = cls.taropen(name, mode, fileobj, **kwargs)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001634 except IOError:
Georg Brandl3abb3722011-08-13 11:48:12 +02001635 if not extfileobj and fileobj is not None:
Antoine Pitrou95f55602010-09-23 18:36:46 +00001636 fileobj.close()
Georg Brandl3abb3722011-08-13 11:48:12 +02001637 if fileobj is None:
1638 raise
Thomas Wouters477c8d52006-05-27 19:21:47 +00001639 raise ReadError("not a gzip file")
Antoine Pitroue1eca4e2010-10-29 23:49:49 +00001640 except:
Georg Brandl3abb3722011-08-13 11:48:12 +02001641 if not extfileobj and fileobj is not None:
Antoine Pitroue1eca4e2010-10-29 23:49:49 +00001642 fileobj.close()
1643 raise
Antoine Pitrou95f55602010-09-23 18:36:46 +00001644 t._extfileobj = extfileobj
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001645 return t
1646
Guido van Rossum75b64e62005-01-16 00:16:11 +00001647 @classmethod
Guido van Rossumd8faa362007-04-27 19:54:29 +00001648 def bz2open(cls, name, mode="r", fileobj=None, compresslevel=9, **kwargs):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001649 """Open bzip2 compressed tar archive name for reading or writing.
1650 Appending is not allowed.
1651 """
1652 if len(mode) > 1 or mode not in "rw":
Thomas Wouters477c8d52006-05-27 19:21:47 +00001653 raise ValueError("mode must be 'r' or 'w'.")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001654
1655 try:
1656 import bz2
1657 except ImportError:
Thomas Wouters477c8d52006-05-27 19:21:47 +00001658 raise CompressionError("bz2 module is not available")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001659
Nadeem Vawdaaebcdba2012-06-04 23:31:20 +02001660 fileobj = bz2.BZ2File(fileobj or name, mode,
1661 compresslevel=compresslevel)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001662
1663 try:
Guido van Rossumd8faa362007-04-27 19:54:29 +00001664 t = cls.taropen(name, mode, fileobj, **kwargs)
Lars Gustäbel9520a432009-11-22 18:48:49 +00001665 except (IOError, EOFError):
Antoine Pitrou95f55602010-09-23 18:36:46 +00001666 fileobj.close()
Thomas Wouters477c8d52006-05-27 19:21:47 +00001667 raise ReadError("not a bzip2 file")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001668 t._extfileobj = False
1669 return t
1670
Lars Gustäbel0a9dd2f2011-12-10 20:38:14 +01001671 @classmethod
Lars Gustäbelc5e11992012-01-18 14:01:17 +01001672 def xzopen(cls, name, mode="r", fileobj=None, preset=None, **kwargs):
Lars Gustäbel0a9dd2f2011-12-10 20:38:14 +01001673 """Open lzma compressed tar archive name for reading or writing.
1674 Appending is not allowed.
1675 """
1676 if mode not in ("r", "w"):
1677 raise ValueError("mode must be 'r' or 'w'")
1678
1679 try:
1680 import lzma
1681 except ImportError:
1682 raise CompressionError("lzma module is not available")
1683
Nadeem Vawda33c34da2012-06-04 23:34:07 +02001684 fileobj = lzma.LZMAFile(fileobj or name, mode, preset=preset)
Lars Gustäbel0a9dd2f2011-12-10 20:38:14 +01001685
1686 try:
1687 t = cls.taropen(name, mode, fileobj, **kwargs)
1688 except (lzma.LZMAError, EOFError):
1689 fileobj.close()
1690 raise ReadError("not an lzma file")
1691 t._extfileobj = False
1692 return t
1693
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001694 # All *open() methods are registered here.
1695 OPEN_METH = {
1696 "tar": "taropen", # uncompressed tar
1697 "gz": "gzopen", # gzip compressed tar
Lars Gustäbel0a9dd2f2011-12-10 20:38:14 +01001698 "bz2": "bz2open", # bzip2 compressed tar
1699 "xz": "xzopen" # lzma compressed tar
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001700 }
1701
1702 #--------------------------------------------------------------------------
1703 # The public methods which TarFile provides:
1704
1705 def close(self):
1706 """Close the TarFile. In write-mode, two finishing zero blocks are
1707 appended to the archive.
1708 """
1709 if self.closed:
1710 return
1711
Guido van Rossumd8faa362007-04-27 19:54:29 +00001712 if self.mode in "aw":
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001713 self.fileobj.write(NUL * (BLOCKSIZE * 2))
1714 self.offset += (BLOCKSIZE * 2)
1715 # fill up the end with zero-blocks
1716 # (like option -b20 for tar does)
1717 blocks, remainder = divmod(self.offset, RECORDSIZE)
1718 if remainder > 0:
1719 self.fileobj.write(NUL * (RECORDSIZE - remainder))
1720
1721 if not self._extfileobj:
1722 self.fileobj.close()
1723 self.closed = True
1724
1725 def getmember(self, name):
1726 """Return a TarInfo object for member `name'. If `name' can not be
1727 found in the archive, KeyError is raised. If a member occurs more
Mark Dickinson934896d2009-02-21 20:59:32 +00001728 than once in the archive, its last occurrence is assumed to be the
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001729 most up-to-date version.
1730 """
Martin v. Löwisf3c56112004-09-18 09:08:52 +00001731 tarinfo = self._getmember(name)
1732 if tarinfo is None:
Thomas Wouters477c8d52006-05-27 19:21:47 +00001733 raise KeyError("filename %r not found" % name)
Martin v. Löwisf3c56112004-09-18 09:08:52 +00001734 return tarinfo
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001735
1736 def getmembers(self):
1737 """Return the members of the archive as a list of TarInfo objects. The
1738 list has the same order as the members in the archive.
1739 """
1740 self._check()
1741 if not self._loaded: # if we want to obtain a list of
1742 self._load() # all members, we first have to
1743 # scan the whole archive.
1744 return self.members
1745
1746 def getnames(self):
1747 """Return the members of the archive as a list of their names. It has
1748 the same order as the list returned by getmembers().
1749 """
Martin v. Löwisf3c56112004-09-18 09:08:52 +00001750 return [tarinfo.name for tarinfo in self.getmembers()]
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001751
1752 def gettarinfo(self, name=None, arcname=None, fileobj=None):
1753 """Create a TarInfo object for either the file `name' or the file
1754 object `fileobj' (using os.fstat on its file descriptor). You can
1755 modify some of the TarInfo's attributes before you add it using
1756 addfile(). If given, `arcname' specifies an alternative name for the
1757 file in the archive.
1758 """
1759 self._check("aw")
1760
1761 # When fileobj is given, replace name by
1762 # fileobj's real name.
1763 if fileobj is not None:
1764 name = fileobj.name
1765
1766 # Building the name of the member in the archive.
1767 # Backward slashes are converted to forward slashes,
1768 # Absolute paths are turned to relative paths.
1769 if arcname is None:
1770 arcname = name
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001771 drv, arcname = os.path.splitdrive(arcname)
Lars Gustäbelbfdfdda2009-08-28 19:59:59 +00001772 arcname = arcname.replace(os.sep, "/")
1773 arcname = arcname.lstrip("/")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001774
1775 # Now, fill the TarInfo object with
1776 # information specific for the file.
Guido van Rossumd8faa362007-04-27 19:54:29 +00001777 tarinfo = self.tarinfo()
1778 tarinfo.tarfile = self
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001779
1780 # Use os.stat or os.lstat, depending on platform
1781 # and if symlinks shall be resolved.
1782 if fileobj is None:
1783 if hasattr(os, "lstat") and not self.dereference:
1784 statres = os.lstat(name)
1785 else:
1786 statres = os.stat(name)
1787 else:
1788 statres = os.fstat(fileobj.fileno())
1789 linkname = ""
1790
1791 stmd = statres.st_mode
1792 if stat.S_ISREG(stmd):
1793 inode = (statres.st_ino, statres.st_dev)
Guido van Rossumd8faa362007-04-27 19:54:29 +00001794 if not self.dereference and statres.st_nlink > 1 and \
1795 inode in self.inodes and arcname != self.inodes[inode]:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001796 # Is it a hardlink to an already
1797 # archived file?
1798 type = LNKTYPE
1799 linkname = self.inodes[inode]
1800 else:
1801 # The inode is added only if its valid.
1802 # For win32 it is always 0.
1803 type = REGTYPE
1804 if inode[0]:
1805 self.inodes[inode] = arcname
1806 elif stat.S_ISDIR(stmd):
1807 type = DIRTYPE
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001808 elif stat.S_ISFIFO(stmd):
1809 type = FIFOTYPE
1810 elif stat.S_ISLNK(stmd):
1811 type = SYMTYPE
1812 linkname = os.readlink(name)
1813 elif stat.S_ISCHR(stmd):
1814 type = CHRTYPE
1815 elif stat.S_ISBLK(stmd):
1816 type = BLKTYPE
1817 else:
1818 return None
1819
1820 # Fill the TarInfo object with all
1821 # information we can get.
Martin v. Löwis5dbdc592005-08-27 10:07:56 +00001822 tarinfo.name = arcname
1823 tarinfo.mode = stmd
1824 tarinfo.uid = statres.st_uid
1825 tarinfo.gid = statres.st_gid
Lars Gustäbel2470ff12010-06-03 10:11:52 +00001826 if type == REGTYPE:
Martin v. Löwis61d77e02004-08-20 06:35:46 +00001827 tarinfo.size = statres.st_size
Martin v. Löwis5dbdc592005-08-27 10:07:56 +00001828 else:
Guido van Rossume2a383d2007-01-15 16:59:06 +00001829 tarinfo.size = 0
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001830 tarinfo.mtime = statres.st_mtime
Martin v. Löwis5dbdc592005-08-27 10:07:56 +00001831 tarinfo.type = type
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001832 tarinfo.linkname = linkname
1833 if pwd:
1834 try:
1835 tarinfo.uname = pwd.getpwuid(tarinfo.uid)[0]
1836 except KeyError:
1837 pass
1838 if grp:
1839 try:
1840 tarinfo.gname = grp.getgrgid(tarinfo.gid)[0]
1841 except KeyError:
1842 pass
1843
1844 if type in (CHRTYPE, BLKTYPE):
1845 if hasattr(os, "major") and hasattr(os, "minor"):
1846 tarinfo.devmajor = os.major(statres.st_rdev)
1847 tarinfo.devminor = os.minor(statres.st_rdev)
1848 return tarinfo
1849
1850 def list(self, verbose=True):
1851 """Print a table of contents to sys.stdout. If `verbose' is False, only
1852 the names of the members are printed. If it is True, an `ls -l'-like
1853 output is produced.
1854 """
1855 self._check()
1856
1857 for tarinfo in self:
1858 if verbose:
Giampaolo Rodola'ffa1d0b2012-05-15 15:30:25 +02001859 print(stat.filemode(tarinfo.mode), end=' ')
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001860 print("%s/%s" % (tarinfo.uname or tarinfo.uid,
1861 tarinfo.gname or tarinfo.gid), end=' ')
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001862 if tarinfo.ischr() or tarinfo.isblk():
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001863 print("%10s" % ("%d,%d" \
1864 % (tarinfo.devmajor, tarinfo.devminor)), end=' ')
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001865 else:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001866 print("%10d" % tarinfo.size, end=' ')
1867 print("%d-%02d-%02d %02d:%02d:%02d" \
1868 % time.localtime(tarinfo.mtime)[:6], end=' ')
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001869
Guido van Rossumd8faa362007-04-27 19:54:29 +00001870 print(tarinfo.name + ("/" if tarinfo.isdir() else ""), end=' ')
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001871
1872 if verbose:
1873 if tarinfo.issym():
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001874 print("->", tarinfo.linkname, end=' ')
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001875 if tarinfo.islnk():
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001876 print("link to", tarinfo.linkname, end=' ')
1877 print()
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001878
Raymond Hettingera63a3122011-01-26 20:34:14 +00001879 def add(self, name, arcname=None, recursive=True, exclude=None, *, filter=None):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001880 """Add the file `name' to the archive. `name' may be any type of file
1881 (directory, fifo, symbolic link, etc.). If given, `arcname'
1882 specifies an alternative name for the file in the archive.
1883 Directories are added recursively by default. This can be avoided by
Guido van Rossum486364b2007-06-30 05:01:58 +00001884 setting `recursive' to False. `exclude' is a function that should
Lars Gustäbel049d2aa2009-09-12 10:44:00 +00001885 return True for each filename to be excluded. `filter' is a function
1886 that expects a TarInfo object argument and returns the changed
1887 TarInfo object, if it returns None the TarInfo object will be
1888 excluded from the archive.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001889 """
1890 self._check("aw")
1891
1892 if arcname is None:
1893 arcname = name
1894
Guido van Rossum486364b2007-06-30 05:01:58 +00001895 # Exclude pathnames.
Lars Gustäbel049d2aa2009-09-12 10:44:00 +00001896 if exclude is not None:
1897 import warnings
1898 warnings.warn("use the filter argument instead",
1899 DeprecationWarning, 2)
1900 if exclude(name):
1901 self._dbg(2, "tarfile: Excluded %r" % name)
1902 return
Guido van Rossum486364b2007-06-30 05:01:58 +00001903
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001904 # Skip if somebody tries to archive the archive...
Thomas Wouters902d6eb2007-01-09 23:18:33 +00001905 if self.name is not None and os.path.abspath(name) == self.name:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001906 self._dbg(2, "tarfile: Skipped %r" % name)
1907 return
1908
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001909 self._dbg(1, name)
1910
1911 # Create a TarInfo object from the file.
1912 tarinfo = self.gettarinfo(name, arcname)
1913
1914 if tarinfo is None:
1915 self._dbg(1, "tarfile: Unsupported type %r" % name)
1916 return
1917
Lars Gustäbel049d2aa2009-09-12 10:44:00 +00001918 # Change or exclude the TarInfo object.
1919 if filter is not None:
1920 tarinfo = filter(tarinfo)
1921 if tarinfo is None:
1922 self._dbg(2, "tarfile: Excluded %r" % name)
1923 return
1924
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001925 # Append the tar header and data to the archive.
1926 if tarinfo.isreg():
Andrew Svetlov718df1d2012-11-29 14:20:47 +02001927 with bltn_open(name, "rb") as f:
1928 self.addfile(tarinfo, f)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001929
Martin v. Löwis5dbdc592005-08-27 10:07:56 +00001930 elif tarinfo.isdir():
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001931 self.addfile(tarinfo)
1932 if recursive:
1933 for f in os.listdir(name):
Lars Gustäbel049d2aa2009-09-12 10:44:00 +00001934 self.add(os.path.join(name, f), os.path.join(arcname, f),
Raymond Hettingera63a3122011-01-26 20:34:14 +00001935 recursive, exclude, filter=filter)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001936
Martin v. Löwis5dbdc592005-08-27 10:07:56 +00001937 else:
1938 self.addfile(tarinfo)
1939
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001940 def addfile(self, tarinfo, fileobj=None):
1941 """Add the TarInfo object `tarinfo' to the archive. If `fileobj' is
1942 given, tarinfo.size bytes are read from it and added to the archive.
1943 You can create TarInfo objects using gettarinfo().
1944 On Windows platforms, `fileobj' should always be opened with mode
1945 'rb' to avoid irritation about the file size.
1946 """
1947 self._check("aw")
1948
Thomas Wouters89f507f2006-12-13 04:49:30 +00001949 tarinfo = copy.copy(tarinfo)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001950
Guido van Rossume7ba4952007-06-06 23:52:48 +00001951 buf = tarinfo.tobuf(self.format, self.encoding, self.errors)
Thomas Wouters89f507f2006-12-13 04:49:30 +00001952 self.fileobj.write(buf)
1953 self.offset += len(buf)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001954
1955 # If there's data to follow, append it.
1956 if fileobj is not None:
1957 copyfileobj(fileobj, self.fileobj, tarinfo.size)
1958 blocks, remainder = divmod(tarinfo.size, BLOCKSIZE)
1959 if remainder > 0:
1960 self.fileobj.write(NUL * (BLOCKSIZE - remainder))
1961 blocks += 1
1962 self.offset += blocks * BLOCKSIZE
1963
Martin v. Löwisf3c56112004-09-18 09:08:52 +00001964 self.members.append(tarinfo)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001965
Martin v. Löwis00a73e72005-03-04 19:40:34 +00001966 def extractall(self, path=".", members=None):
1967 """Extract all members from the archive to the current working
1968 directory and set owner, modification time and permissions on
1969 directories afterwards. `path' specifies a different directory
1970 to extract to. `members' is optional and must be a subset of the
1971 list returned by getmembers().
1972 """
1973 directories = []
1974
1975 if members is None:
1976 members = self
1977
1978 for tarinfo in members:
1979 if tarinfo.isdir():
Christian Heimes2202f872008-02-06 14:31:34 +00001980 # Extract directories with a safe mode.
Martin v. Löwis00a73e72005-03-04 19:40:34 +00001981 directories.append(tarinfo)
Christian Heimes2202f872008-02-06 14:31:34 +00001982 tarinfo = copy.copy(tarinfo)
1983 tarinfo.mode = 0o700
Martin v. Löwis16f344d2010-11-01 21:39:13 +00001984 # Do not set_attrs directories, as we will do that further down
1985 self.extract(tarinfo, path, set_attrs=not tarinfo.isdir())
Martin v. Löwis00a73e72005-03-04 19:40:34 +00001986
1987 # Reverse sort directories.
Raymond Hettingerd4cb56d2008-01-30 02:55:10 +00001988 directories.sort(key=lambda a: a.name)
Martin v. Löwis00a73e72005-03-04 19:40:34 +00001989 directories.reverse()
1990
1991 # Set correct owner, mtime and filemode on directories.
1992 for tarinfo in directories:
Christian Heimesfaf2f632008-01-06 16:59:19 +00001993 dirpath = os.path.join(path, tarinfo.name)
Martin v. Löwis00a73e72005-03-04 19:40:34 +00001994 try:
Christian Heimesfaf2f632008-01-06 16:59:19 +00001995 self.chown(tarinfo, dirpath)
1996 self.utime(tarinfo, dirpath)
1997 self.chmod(tarinfo, dirpath)
Guido van Rossumb940e112007-01-10 16:19:56 +00001998 except ExtractError as e:
Martin v. Löwis00a73e72005-03-04 19:40:34 +00001999 if self.errorlevel > 1:
2000 raise
2001 else:
2002 self._dbg(1, "tarfile: %s" % e)
2003
Martin v. Löwis16f344d2010-11-01 21:39:13 +00002004 def extract(self, member, path="", set_attrs=True):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002005 """Extract a member from the archive to the current working directory,
2006 using its full name. Its file information is extracted as accurately
2007 as possible. `member' may be a filename or a TarInfo object. You can
Martin v. Löwis16f344d2010-11-01 21:39:13 +00002008 specify a different directory using `path'. File attributes (owner,
2009 mtime, mode) are set unless `set_attrs' is False.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002010 """
2011 self._check("r")
2012
Guido van Rossum3172c5d2007-10-16 18:12:55 +00002013 if isinstance(member, str):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002014 tarinfo = self.getmember(member)
Guido van Rossumd8faa362007-04-27 19:54:29 +00002015 else:
2016 tarinfo = member
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002017
Neal Norwitza4f651a2004-07-20 22:07:44 +00002018 # Prepare the link target for makelink().
2019 if tarinfo.islnk():
2020 tarinfo._link_target = os.path.join(path, tarinfo.linkname)
2021
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002022 try:
Martin v. Löwis16f344d2010-11-01 21:39:13 +00002023 self._extract_member(tarinfo, os.path.join(path, tarinfo.name),
2024 set_attrs=set_attrs)
Guido van Rossumb940e112007-01-10 16:19:56 +00002025 except EnvironmentError as e:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002026 if self.errorlevel > 0:
2027 raise
2028 else:
2029 if e.filename is None:
2030 self._dbg(1, "tarfile: %s" % e.strerror)
2031 else:
2032 self._dbg(1, "tarfile: %s %r" % (e.strerror, e.filename))
Guido van Rossumb940e112007-01-10 16:19:56 +00002033 except ExtractError as e:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002034 if self.errorlevel > 1:
2035 raise
2036 else:
2037 self._dbg(1, "tarfile: %s" % e)
2038
2039 def extractfile(self, member):
2040 """Extract a member from the archive as a file object. `member' may be
Lars Gustäbel7a919e92012-05-05 18:15:03 +02002041 a filename or a TarInfo object. If `member' is a regular file or a
2042 link, an io.BufferedReader object is returned. Otherwise, None is
2043 returned.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002044 """
2045 self._check("r")
2046
Guido van Rossum3172c5d2007-10-16 18:12:55 +00002047 if isinstance(member, str):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002048 tarinfo = self.getmember(member)
Guido van Rossumd8faa362007-04-27 19:54:29 +00002049 else:
2050 tarinfo = member
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002051
Lars Gustäbel7a919e92012-05-05 18:15:03 +02002052 if tarinfo.isreg() or tarinfo.type not in SUPPORTED_TYPES:
2053 # Members with unknown types are treated as regular files.
Lars Gustäbelb062a2f2012-05-14 13:18:16 +02002054 return self.fileobject(self, tarinfo)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002055
2056 elif tarinfo.islnk() or tarinfo.issym():
2057 if isinstance(self.fileobj, _Stream):
2058 # A small but ugly workaround for the case that someone tries
2059 # to extract a (sym)link as a file-object from a non-seekable
2060 # stream of tar blocks.
Thomas Wouters477c8d52006-05-27 19:21:47 +00002061 raise StreamError("cannot extract (sym)link as file object")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002062 else:
Georg Brandl7eb4b7d2005-07-22 21:49:32 +00002063 # A (sym)link's file object is its target's file object.
Lars Gustäbel1b512722010-06-03 12:45:16 +00002064 return self.extractfile(self._find_link_target(tarinfo))
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002065 else:
2066 # If there's no data associated with the member (directory, chrdev,
2067 # blkdev, etc.), return None instead of a file object.
2068 return None
2069
Martin v. Löwis16f344d2010-11-01 21:39:13 +00002070 def _extract_member(self, tarinfo, targetpath, set_attrs=True):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002071 """Extract the TarInfo object tarinfo to a physical
2072 file called targetpath.
2073 """
2074 # Fetch the TarInfo object for the given name
2075 # and build the destination pathname, replacing
2076 # forward slashes to platform specific separators.
Lars Gustäbelbfdfdda2009-08-28 19:59:59 +00002077 targetpath = targetpath.rstrip("/")
2078 targetpath = targetpath.replace("/", os.sep)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002079
2080 # Create all upper directories.
2081 upperdirs = os.path.dirname(targetpath)
2082 if upperdirs and not os.path.exists(upperdirs):
Christian Heimes2202f872008-02-06 14:31:34 +00002083 # Create directories that are not part of the archive with
2084 # default permissions.
Thomas Woutersb2137042007-02-01 18:02:27 +00002085 os.makedirs(upperdirs)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002086
2087 if tarinfo.islnk() or tarinfo.issym():
2088 self._dbg(1, "%s -> %s" % (tarinfo.name, tarinfo.linkname))
2089 else:
2090 self._dbg(1, tarinfo.name)
2091
2092 if tarinfo.isreg():
2093 self.makefile(tarinfo, targetpath)
2094 elif tarinfo.isdir():
2095 self.makedir(tarinfo, targetpath)
2096 elif tarinfo.isfifo():
2097 self.makefifo(tarinfo, targetpath)
2098 elif tarinfo.ischr() or tarinfo.isblk():
2099 self.makedev(tarinfo, targetpath)
2100 elif tarinfo.islnk() or tarinfo.issym():
2101 self.makelink(tarinfo, targetpath)
2102 elif tarinfo.type not in SUPPORTED_TYPES:
2103 self.makeunknown(tarinfo, targetpath)
2104 else:
2105 self.makefile(tarinfo, targetpath)
2106
Martin v. Löwis16f344d2010-11-01 21:39:13 +00002107 if set_attrs:
2108 self.chown(tarinfo, targetpath)
2109 if not tarinfo.issym():
2110 self.chmod(tarinfo, targetpath)
2111 self.utime(tarinfo, targetpath)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002112
2113 #--------------------------------------------------------------------------
2114 # Below are the different file methods. They are called via
2115 # _extract_member() when extract() is called. They can be replaced in a
2116 # subclass to implement other functionality.
2117
2118 def makedir(self, tarinfo, targetpath):
2119 """Make a directory called targetpath.
2120 """
2121 try:
Christian Heimes2202f872008-02-06 14:31:34 +00002122 # Use a safe mode for the directory, the real mode is set
2123 # later in _extract_member().
2124 os.mkdir(targetpath, 0o700)
Florent Xicluna68f71a32011-10-28 16:06:23 +02002125 except FileExistsError:
2126 pass
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002127
2128 def makefile(self, tarinfo, targetpath):
2129 """Make a file called targetpath.
2130 """
Lars Gustäbel9cbdd752010-10-29 09:08:19 +00002131 source = self.fileobj
2132 source.seek(tarinfo.offset_data)
Andrew Svetlov718df1d2012-11-29 14:20:47 +02002133 with bltn_open(targetpath, "wb") as target:
2134 if tarinfo.sparse is not None:
2135 for offset, size in tarinfo.sparse:
2136 target.seek(offset)
2137 copyfileobj(source, target, size)
2138 else:
2139 copyfileobj(source, target, tarinfo.size)
2140 target.seek(tarinfo.size)
2141 target.truncate()
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002142
2143 def makeunknown(self, tarinfo, targetpath):
2144 """Make a file from a TarInfo object with an unknown type
2145 at targetpath.
2146 """
2147 self.makefile(tarinfo, targetpath)
2148 self._dbg(1, "tarfile: Unknown file type %r, " \
2149 "extracted as regular file." % tarinfo.type)
2150
2151 def makefifo(self, tarinfo, targetpath):
2152 """Make a fifo called targetpath.
2153 """
2154 if hasattr(os, "mkfifo"):
2155 os.mkfifo(targetpath)
2156 else:
Thomas Wouters477c8d52006-05-27 19:21:47 +00002157 raise ExtractError("fifo not supported by system")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002158
2159 def makedev(self, tarinfo, targetpath):
2160 """Make a character or block device called targetpath.
2161 """
2162 if not hasattr(os, "mknod") or not hasattr(os, "makedev"):
Thomas Wouters477c8d52006-05-27 19:21:47 +00002163 raise ExtractError("special devices not supported by system")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002164
2165 mode = tarinfo.mode
2166 if tarinfo.isblk():
2167 mode |= stat.S_IFBLK
2168 else:
2169 mode |= stat.S_IFCHR
2170
2171 os.mknod(targetpath, mode,
2172 os.makedev(tarinfo.devmajor, tarinfo.devminor))
2173
2174 def makelink(self, tarinfo, targetpath):
2175 """Make a (symbolic) link called targetpath. If it cannot be created
2176 (platform limitation), we try to make a copy of the referenced file
2177 instead of a link.
2178 """
Brian Curtind40e6f72010-07-08 21:39:08 +00002179 try:
Lars Gustäbel1b512722010-06-03 12:45:16 +00002180 # For systems that support symbolic and hard links.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002181 if tarinfo.issym():
Lars Gustäbelbfdfdda2009-08-28 19:59:59 +00002182 os.symlink(tarinfo.linkname, targetpath)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002183 else:
Neal Norwitza4f651a2004-07-20 22:07:44 +00002184 # See extract().
Lars Gustäbel1b512722010-06-03 12:45:16 +00002185 if os.path.exists(tarinfo._link_target):
2186 os.link(tarinfo._link_target, targetpath)
2187 else:
Brian Curtin82df53e2010-09-24 21:04:05 +00002188 self._extract_member(self._find_link_target(tarinfo),
2189 targetpath)
Brian Curtin16633fa2010-07-09 13:54:27 +00002190 except symlink_exception:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002191 try:
Brian Curtin16633fa2010-07-09 13:54:27 +00002192 self._extract_member(self._find_link_target(tarinfo),
2193 targetpath)
Lars Gustäbel1b512722010-06-03 12:45:16 +00002194 except KeyError:
2195 raise ExtractError("unable to resolve link inside archive")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002196
2197 def chown(self, tarinfo, targetpath):
2198 """Set owner of targetpath according to tarinfo.
2199 """
2200 if pwd and hasattr(os, "geteuid") and os.geteuid() == 0:
2201 # We have to be root to do so.
2202 try:
2203 g = grp.getgrnam(tarinfo.gname)[2]
2204 except KeyError:
Lars Gustäbel2e7ddd32011-09-05 16:58:14 +02002205 g = tarinfo.gid
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002206 try:
2207 u = pwd.getpwnam(tarinfo.uname)[2]
2208 except KeyError:
Lars Gustäbel2e7ddd32011-09-05 16:58:14 +02002209 u = tarinfo.uid
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002210 try:
2211 if tarinfo.issym() and hasattr(os, "lchown"):
2212 os.lchown(targetpath, u, g)
2213 else:
Andrew MacIntyre7970d202003-02-19 12:51:34 +00002214 if sys.platform != "os2emx":
2215 os.chown(targetpath, u, g)
Guido van Rossumb940e112007-01-10 16:19:56 +00002216 except EnvironmentError as e:
Thomas Wouters477c8d52006-05-27 19:21:47 +00002217 raise ExtractError("could not change owner")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002218
2219 def chmod(self, tarinfo, targetpath):
2220 """Set file permissions of targetpath according to tarinfo.
2221 """
Jack Jansen834eff62003-03-07 12:47:06 +00002222 if hasattr(os, 'chmod'):
2223 try:
2224 os.chmod(targetpath, tarinfo.mode)
Guido van Rossumb940e112007-01-10 16:19:56 +00002225 except EnvironmentError as e:
Thomas Wouters477c8d52006-05-27 19:21:47 +00002226 raise ExtractError("could not change mode")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002227
2228 def utime(self, tarinfo, targetpath):
2229 """Set modification time of targetpath according to tarinfo.
2230 """
Jack Jansen834eff62003-03-07 12:47:06 +00002231 if not hasattr(os, 'utime'):
Tim Petersf9347782003-03-07 15:36:41 +00002232 return
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002233 try:
2234 os.utime(targetpath, (tarinfo.mtime, tarinfo.mtime))
Guido van Rossumb940e112007-01-10 16:19:56 +00002235 except EnvironmentError as e:
Thomas Wouters477c8d52006-05-27 19:21:47 +00002236 raise ExtractError("could not change modification time")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002237
2238 #--------------------------------------------------------------------------
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002239 def next(self):
2240 """Return the next member of the archive as a TarInfo object, when
2241 TarFile is opened for reading. Return None if there is no more
2242 available.
2243 """
2244 self._check("ra")
2245 if self.firstmember is not None:
2246 m = self.firstmember
2247 self.firstmember = None
2248 return m
2249
2250 # Read the next block.
Andrew M. Kuchling864bba12004-07-10 22:02:11 +00002251 self.fileobj.seek(self.offset)
Lars Gustäbel9520a432009-11-22 18:48:49 +00002252 tarinfo = None
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002253 while True:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002254 try:
Guido van Rossumd8faa362007-04-27 19:54:29 +00002255 tarinfo = self.tarinfo.fromtarfile(self)
Lars Gustäbel9520a432009-11-22 18:48:49 +00002256 except EOFHeaderError as e:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002257 if self.ignore_zeros:
Thomas Wouters902d6eb2007-01-09 23:18:33 +00002258 self._dbg(2, "0x%X: %s" % (self.offset, e))
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002259 self.offset += BLOCKSIZE
2260 continue
Lars Gustäbel9520a432009-11-22 18:48:49 +00002261 except InvalidHeaderError as e:
2262 if self.ignore_zeros:
2263 self._dbg(2, "0x%X: %s" % (self.offset, e))
2264 self.offset += BLOCKSIZE
2265 continue
2266 elif self.offset == 0:
2267 raise ReadError(str(e))
2268 except EmptyHeaderError:
2269 if self.offset == 0:
2270 raise ReadError("empty file")
2271 except TruncatedHeaderError as e:
2272 if self.offset == 0:
2273 raise ReadError(str(e))
2274 except SubsequentHeaderError as e:
2275 raise ReadError(str(e))
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002276 break
2277
Lars Gustäbel9520a432009-11-22 18:48:49 +00002278 if tarinfo is not None:
2279 self.members.append(tarinfo)
2280 else:
2281 self._loaded = True
2282
Thomas Wouters477c8d52006-05-27 19:21:47 +00002283 return tarinfo
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002284
2285 #--------------------------------------------------------------------------
2286 # Little helper methods:
2287
Lars Gustäbel1b512722010-06-03 12:45:16 +00002288 def _getmember(self, name, tarinfo=None, normalize=False):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002289 """Find an archive member by name from bottom to top.
2290 If tarinfo is given, it is used as the starting point.
2291 """
Martin v. Löwisf3c56112004-09-18 09:08:52 +00002292 # Ensure that all members have been loaded.
2293 members = self.getmembers()
2294
Lars Gustäbel1b512722010-06-03 12:45:16 +00002295 # Limit the member search list up to tarinfo.
2296 if tarinfo is not None:
2297 members = members[:members.index(tarinfo)]
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002298
Lars Gustäbel1b512722010-06-03 12:45:16 +00002299 if normalize:
2300 name = os.path.normpath(name)
2301
2302 for member in reversed(members):
2303 if normalize:
2304 member_name = os.path.normpath(member.name)
2305 else:
2306 member_name = member.name
2307
2308 if name == member_name:
2309 return member
Andrew M. Kuchling864bba12004-07-10 22:02:11 +00002310
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002311 def _load(self):
2312 """Read through the entire archive file and look for readable
2313 members.
2314 """
2315 while True:
2316 tarinfo = self.next()
2317 if tarinfo is None:
2318 break
2319 self._loaded = True
2320
2321 def _check(self, mode=None):
2322 """Check if TarFile is still open, and if the operation's mode
2323 corresponds to TarFile's mode.
2324 """
2325 if self.closed:
Thomas Wouters477c8d52006-05-27 19:21:47 +00002326 raise IOError("%s is closed" % self.__class__.__name__)
Guido van Rossumd8faa362007-04-27 19:54:29 +00002327 if mode is not None and self.mode not in mode:
2328 raise IOError("bad operation for mode %r" % self.mode)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002329
Lars Gustäbel1b512722010-06-03 12:45:16 +00002330 def _find_link_target(self, tarinfo):
2331 """Find the target member of a symlink or hardlink member in the
2332 archive.
2333 """
2334 if tarinfo.issym():
2335 # Always search the entire archive.
Lars Gustäbel1ef9eda2012-04-24 21:04:40 +02002336 linkname = "/".join(filter(None, (os.path.dirname(tarinfo.name), tarinfo.linkname)))
Lars Gustäbel1b512722010-06-03 12:45:16 +00002337 limit = None
2338 else:
2339 # Search the archive before the link, because a hard link is
2340 # just a reference to an already archived file.
2341 linkname = tarinfo.linkname
2342 limit = tarinfo
2343
2344 member = self._getmember(linkname, tarinfo=limit, normalize=True)
2345 if member is None:
2346 raise KeyError("linkname %r not found" % linkname)
2347 return member
2348
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002349 def __iter__(self):
2350 """Provide an iterator object.
2351 """
2352 if self._loaded:
2353 return iter(self.members)
2354 else:
2355 return TarIter(self)
2356
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002357 def _dbg(self, level, msg):
2358 """Write debugging output to sys.stderr.
2359 """
2360 if level <= self.debug:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00002361 print(msg, file=sys.stderr)
Lars Gustäbel01385812010-03-03 12:08:54 +00002362
2363 def __enter__(self):
2364 self._check()
2365 return self
2366
2367 def __exit__(self, type, value, traceback):
2368 if type is None:
2369 self.close()
2370 else:
2371 # An exception occurred. We must not call close() because
2372 # it would try to write end-of-archive blocks and padding.
2373 if not self._extfileobj:
2374 self.fileobj.close()
2375 self.closed = True
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002376# class TarFile
2377
2378class TarIter:
2379 """Iterator Class.
2380
2381 for tarinfo in TarFile(...):
2382 suite...
2383 """
2384
2385 def __init__(self, tarfile):
2386 """Construct a TarIter object.
2387 """
2388 self.tarfile = tarfile
Martin v. Löwis637431b2005-03-03 23:12:42 +00002389 self.index = 0
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002390 def __iter__(self):
2391 """Return iterator object.
2392 """
2393 return self
Georg Brandla18af4e2007-04-21 15:47:16 +00002394 def __next__(self):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002395 """Return the next item using TarFile's next() method.
2396 When all members have been read, set TarFile as _loaded.
2397 """
Martin v. Löwis637431b2005-03-03 23:12:42 +00002398 # Fix for SF #1100429: Under rare circumstances it can
2399 # happen that getmembers() is called during iteration,
2400 # which will cause TarIter to stop prematurely.
Serhiy Storchaka263fab92013-05-09 14:22:26 +03002401
2402 if self.index == 0 and self.tarfile.firstmember is not None:
2403 tarinfo = self.tarfile.next()
2404 elif self.index < len(self.tarfile.members):
2405 tarinfo = self.tarfile.members[self.index]
2406 elif not self.tarfile._loaded:
Martin v. Löwis637431b2005-03-03 23:12:42 +00002407 tarinfo = self.tarfile.next()
2408 if not tarinfo:
2409 self.tarfile._loaded = True
2410 raise StopIteration
2411 else:
Serhiy Storchaka263fab92013-05-09 14:22:26 +03002412 raise StopIteration
Martin v. Löwis637431b2005-03-03 23:12:42 +00002413 self.index += 1
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002414 return tarinfo
2415
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002416#--------------------
2417# exported functions
2418#--------------------
2419def is_tarfile(name):
2420 """Return True if name points to a tar archive that we
2421 are able to handle, else return False.
2422 """
2423 try:
2424 t = open(name)
2425 t.close()
2426 return True
2427 except TarError:
2428 return False
2429
Guido van Rossume7ba4952007-06-06 23:52:48 +00002430bltn_open = open
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002431open = TarFile.open