blob: 8dd4c0f82dbc6a7dc603efbabf096970b823e00d [file] [log] [blame]
Benjamin Peterson90f5ba52010-03-11 22:53:45 +00001#!/usr/bin/env python3
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002#-------------------------------------------------------------------
3# tarfile.py
4#-------------------------------------------------------------------
Christian Heimes9c1257e2007-11-04 11:37:22 +00005# Copyright (C) 2002 Lars Gustaebel <lars@gustaebel.de>
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00006# All rights reserved.
7#
8# Permission is hereby granted, free of charge, to any person
9# obtaining a copy of this software and associated documentation
10# files (the "Software"), to deal in the Software without
11# restriction, including without limitation the rights to use,
12# copy, modify, merge, publish, distribute, sublicense, and/or sell
13# copies of the Software, and to permit persons to whom the
14# Software is furnished to do so, subject to the following
15# conditions:
16#
17# The above copyright notice and this permission notice shall be
18# included in all copies or substantial portions of the Software.
19#
20# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
21# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
22# OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
23# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
24# HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
25# WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
26# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
27# OTHER DEALINGS IN THE SOFTWARE.
28#
29"""Read from and write to tar format archives.
30"""
31
Guido van Rossumd8faa362007-04-27 19:54:29 +000032version = "0.9.0"
Guido van Rossum98297ee2007-11-06 21:34:58 +000033__author__ = "Lars Gust\u00e4bel (lars@gustaebel.de)"
Senthil Kumaran7c9719c2011-07-28 22:32:49 +080034__date__ = "$Date: 2011-02-25 17:42:01 +0200 (Fri, 25 Feb 2011) $"
35__cvsid__ = "$Id: tarfile.py 88586 2011-02-25 15:42:01Z marc-andre.lemburg $"
Guido van Rossum98297ee2007-11-06 21:34:58 +000036__credits__ = "Gustavo Niemeyer, Niels Gust\u00e4bel, Richard Townsend."
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +000037
38#---------
39# Imports
40#---------
41import sys
42import os
Eli Bendersky74c503b2012-01-03 06:26:13 +020043import io
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +000044import shutil
45import stat
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +000046import time
47import struct
Thomas Wouters89f507f2006-12-13 04:49:30 +000048import copy
Guido van Rossumd8faa362007-04-27 19:54:29 +000049import re
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +000050
51try:
52 import grp, pwd
53except ImportError:
54 grp = pwd = None
55
Brian Curtin16633fa2010-07-09 13:54:27 +000056# os.symlink on Windows prior to 6.0 raises NotImplementedError
57symlink_exception = (AttributeError, NotImplementedError)
58try:
59 # WindowsError (1314) will be raised if the caller does not hold the
60 # SeCreateSymbolicLinkPrivilege privilege
61 symlink_exception += (WindowsError,)
62except NameError:
63 pass
64
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +000065# from tarfile import *
66__all__ = ["TarFile", "TarInfo", "is_tarfile", "TarError"]
67
Georg Brandl1a3284e2007-12-02 09:40:06 +000068from builtins import open as _open # Since 'open' is TarFile.open
Guido van Rossum8f78fe92006-08-24 04:03:53 +000069
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +000070#---------------------------------------------------------
71# tar constants
72#---------------------------------------------------------
Lars Gustäbelb506dc32007-08-07 18:36:16 +000073NUL = b"\0" # the null character
Guido van Rossumd8faa362007-04-27 19:54:29 +000074BLOCKSIZE = 512 # length of processing blocks
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +000075RECORDSIZE = BLOCKSIZE * 20 # length of records
Lars Gustäbelb506dc32007-08-07 18:36:16 +000076GNU_MAGIC = b"ustar \0" # magic gnu tar string
77POSIX_MAGIC = b"ustar\x0000" # magic posix tar string
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +000078
Guido van Rossumd8faa362007-04-27 19:54:29 +000079LENGTH_NAME = 100 # maximum length of a filename
80LENGTH_LINK = 100 # maximum length of a linkname
81LENGTH_PREFIX = 155 # maximum length of the prefix field
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +000082
Lars Gustäbelb506dc32007-08-07 18:36:16 +000083REGTYPE = b"0" # regular file
84AREGTYPE = b"\0" # regular file
85LNKTYPE = b"1" # link (inside tarfile)
86SYMTYPE = b"2" # symbolic link
87CHRTYPE = b"3" # character special device
88BLKTYPE = b"4" # block special device
89DIRTYPE = b"5" # directory
90FIFOTYPE = b"6" # fifo special device
91CONTTYPE = b"7" # contiguous file
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +000092
Lars Gustäbelb506dc32007-08-07 18:36:16 +000093GNUTYPE_LONGNAME = b"L" # GNU tar longname
94GNUTYPE_LONGLINK = b"K" # GNU tar longlink
95GNUTYPE_SPARSE = b"S" # GNU tar sparse file
Guido van Rossumd8faa362007-04-27 19:54:29 +000096
Lars Gustäbelb506dc32007-08-07 18:36:16 +000097XHDTYPE = b"x" # POSIX.1-2001 extended header
98XGLTYPE = b"g" # POSIX.1-2001 global header
99SOLARIS_XHDTYPE = b"X" # Solaris extended header
Guido van Rossumd8faa362007-04-27 19:54:29 +0000100
101USTAR_FORMAT = 0 # POSIX.1-1988 (ustar) format
102GNU_FORMAT = 1 # GNU tar format
103PAX_FORMAT = 2 # POSIX.1-2001 (pax) format
104DEFAULT_FORMAT = GNU_FORMAT
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000105
106#---------------------------------------------------------
107# tarfile constants
108#---------------------------------------------------------
Guido van Rossumd8faa362007-04-27 19:54:29 +0000109# File types that tarfile supports:
110SUPPORTED_TYPES = (REGTYPE, AREGTYPE, LNKTYPE,
111 SYMTYPE, DIRTYPE, FIFOTYPE,
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000112 CONTTYPE, CHRTYPE, BLKTYPE,
113 GNUTYPE_LONGNAME, GNUTYPE_LONGLINK,
114 GNUTYPE_SPARSE)
115
Guido van Rossumd8faa362007-04-27 19:54:29 +0000116# File types that will be treated as a regular file.
117REGULAR_TYPES = (REGTYPE, AREGTYPE,
118 CONTTYPE, GNUTYPE_SPARSE)
119
120# File types that are part of the GNU tar format.
121GNU_TYPES = (GNUTYPE_LONGNAME, GNUTYPE_LONGLINK,
122 GNUTYPE_SPARSE)
123
124# Fields from a pax header that override a TarInfo attribute.
125PAX_FIELDS = ("path", "linkpath", "size", "mtime",
126 "uid", "gid", "uname", "gname")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000127
Lars Gustäbel1465cc22010-05-17 18:02:50 +0000128# Fields from a pax header that are affected by hdrcharset.
129PAX_NAME_FIELDS = {"path", "linkpath", "uname", "gname"}
130
Guido van Rossume7ba4952007-06-06 23:52:48 +0000131# Fields in a pax header that are numbers, all other fields
132# are treated as strings.
133PAX_NUMBER_FIELDS = {
134 "atime": float,
135 "ctime": float,
136 "mtime": float,
137 "uid": int,
138 "gid": int,
139 "size": int
140}
141
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000142#---------------------------------------------------------
143# Bits used in the mode field, values in octal.
144#---------------------------------------------------------
Guido van Rossumcd16bf62007-06-13 18:07:49 +0000145S_IFLNK = 0o120000 # symbolic link
146S_IFREG = 0o100000 # regular file
147S_IFBLK = 0o060000 # block device
148S_IFDIR = 0o040000 # directory
149S_IFCHR = 0o020000 # character device
150S_IFIFO = 0o010000 # fifo
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000151
Guido van Rossumcd16bf62007-06-13 18:07:49 +0000152TSUID = 0o4000 # set UID on execution
153TSGID = 0o2000 # set GID on execution
154TSVTX = 0o1000 # reserved
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000155
Guido van Rossumcd16bf62007-06-13 18:07:49 +0000156TUREAD = 0o400 # read by owner
157TUWRITE = 0o200 # write by owner
158TUEXEC = 0o100 # execute/search by owner
159TGREAD = 0o040 # read by group
160TGWRITE = 0o020 # write by group
161TGEXEC = 0o010 # execute/search by group
162TOREAD = 0o004 # read by other
163TOWRITE = 0o002 # write by other
164TOEXEC = 0o001 # execute/search by other
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000165
166#---------------------------------------------------------
Guido van Rossumd8faa362007-04-27 19:54:29 +0000167# initialization
168#---------------------------------------------------------
Victor Stinner0f35e2c2010-06-11 23:46:47 +0000169if os.name in ("nt", "ce"):
170 ENCODING = "utf-8"
171else:
172 ENCODING = sys.getfilesystemencoding()
Guido van Rossumd8faa362007-04-27 19:54:29 +0000173
174#---------------------------------------------------------
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000175# Some useful functions
176#---------------------------------------------------------
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000177
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000178def stn(s, length, encoding, errors):
179 """Convert a string to a null-terminated bytes object.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000180 """
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000181 s = s.encode(encoding, errors)
Thomas Wouters89f507f2006-12-13 04:49:30 +0000182 return s[:length] + (length - len(s)) * NUL
Thomas Wouters477c8d52006-05-27 19:21:47 +0000183
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000184def nts(s, encoding, errors):
185 """Convert a null-terminated bytes object to a string.
Guido van Rossumd8faa362007-04-27 19:54:29 +0000186 """
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000187 p = s.find(b"\0")
188 if p != -1:
189 s = s[:p]
190 return s.decode(encoding, errors)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000191
Thomas Wouters477c8d52006-05-27 19:21:47 +0000192def nti(s):
193 """Convert a number field to a python number.
194 """
195 # There are two possible encodings for a number field, see
196 # itn() below.
Lars Gustäbelac3d1372011-10-14 12:46:40 +0200197 if s[0] in (0o200, 0o377):
198 n = 0
199 for i in range(len(s) - 1):
200 n <<= 8
201 n += s[i + 1]
202 if s[0] == 0o377:
203 n = -(256 ** (len(s) - 1) - n)
204 else:
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000205 try:
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000206 n = int(nts(s, "ascii", "strict") or "0", 8)
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000207 except ValueError:
Lars Gustäbel9520a432009-11-22 18:48:49 +0000208 raise InvalidHeaderError("invalid header")
Thomas Wouters477c8d52006-05-27 19:21:47 +0000209 return n
210
Guido van Rossumd8faa362007-04-27 19:54:29 +0000211def itn(n, digits=8, format=DEFAULT_FORMAT):
Thomas Wouters477c8d52006-05-27 19:21:47 +0000212 """Convert a python number to a number field.
213 """
214 # POSIX 1003.1-1988 requires numbers to be encoded as a string of
215 # octal digits followed by a null-byte, this allows values up to
216 # (8**(digits-1))-1. GNU tar allows storing numbers greater than
Lars Gustäbelac3d1372011-10-14 12:46:40 +0200217 # that if necessary. A leading 0o200 or 0o377 byte indicate this
218 # particular encoding, the following digits-1 bytes are a big-endian
219 # base-256 representation. This allows values up to (256**(digits-1))-1.
220 # A 0o200 byte indicates a positive number, a 0o377 byte a negative
221 # number.
Thomas Wouters477c8d52006-05-27 19:21:47 +0000222 if 0 <= n < 8 ** (digits - 1):
Lars Gustäbela280ca752007-08-28 07:34:33 +0000223 s = bytes("%0*o" % (digits - 1, n), "ascii") + NUL
Lars Gustäbelac3d1372011-10-14 12:46:40 +0200224 elif format == GNU_FORMAT and -256 ** (digits - 1) <= n < 256 ** (digits - 1):
225 if n >= 0:
226 s = bytearray([0o200])
227 else:
228 s = bytearray([0o377])
229 n = 256 ** digits + n
Thomas Wouters477c8d52006-05-27 19:21:47 +0000230
Guido van Rossum805365e2007-05-07 22:24:25 +0000231 for i in range(digits - 1):
Lars Gustäbelac3d1372011-10-14 12:46:40 +0200232 s.insert(1, n & 0o377)
Thomas Wouters477c8d52006-05-27 19:21:47 +0000233 n >>= 8
Lars Gustäbelac3d1372011-10-14 12:46:40 +0200234 else:
235 raise ValueError("overflow in number field")
236
Thomas Wouters477c8d52006-05-27 19:21:47 +0000237 return s
238
239def calc_chksums(buf):
240 """Calculate the checksum for a member's header by summing up all
241 characters except for the chksum field which is treated as if
242 it was filled with spaces. According to the GNU tar sources,
243 some tars (Sun and NeXT) calculate chksum with signed char,
244 which will be different if there are chars in the buffer with
245 the high bit set. So we calculate two checksums, unsigned and
246 signed.
247 """
248 unsigned_chksum = 256 + sum(struct.unpack("148B", buf[:148]) + struct.unpack("356B", buf[156:512]))
249 signed_chksum = 256 + sum(struct.unpack("148b", buf[:148]) + struct.unpack("356b", buf[156:512]))
250 return unsigned_chksum, signed_chksum
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000251
252def copyfileobj(src, dst, length=None):
253 """Copy length bytes from fileobj src to fileobj dst.
254 If length is None, copy the entire content.
255 """
256 if length == 0:
257 return
258 if length is None:
259 shutil.copyfileobj(src, dst)
260 return
261
262 BUFSIZE = 16 * 1024
263 blocks, remainder = divmod(length, BUFSIZE)
Guido van Rossum805365e2007-05-07 22:24:25 +0000264 for b in range(blocks):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000265 buf = src.read(BUFSIZE)
266 if len(buf) < BUFSIZE:
Thomas Wouters477c8d52006-05-27 19:21:47 +0000267 raise IOError("end of file reached")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000268 dst.write(buf)
269
270 if remainder != 0:
271 buf = src.read(remainder)
272 if len(buf) < remainder:
Thomas Wouters477c8d52006-05-27 19:21:47 +0000273 raise IOError("end of file reached")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000274 dst.write(buf)
275 return
276
277filemode_table = (
Andrew M. Kuchling8bc462f2004-10-20 11:48:42 +0000278 ((S_IFLNK, "l"),
279 (S_IFREG, "-"),
280 (S_IFBLK, "b"),
281 (S_IFDIR, "d"),
282 (S_IFCHR, "c"),
283 (S_IFIFO, "p")),
284
285 ((TUREAD, "r"),),
286 ((TUWRITE, "w"),),
287 ((TUEXEC|TSUID, "s"),
288 (TSUID, "S"),
289 (TUEXEC, "x")),
290
291 ((TGREAD, "r"),),
292 ((TGWRITE, "w"),),
293 ((TGEXEC|TSGID, "s"),
294 (TSGID, "S"),
295 (TGEXEC, "x")),
296
297 ((TOREAD, "r"),),
298 ((TOWRITE, "w"),),
299 ((TOEXEC|TSVTX, "t"),
300 (TSVTX, "T"),
301 (TOEXEC, "x"))
302)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000303
304def filemode(mode):
305 """Convert a file's mode to a string of the form
306 -rwxrwxrwx.
307 Used by TarFile.list()
308 """
Andrew M. Kuchling8bc462f2004-10-20 11:48:42 +0000309 perm = []
310 for table in filemode_table:
311 for bit, char in table:
312 if mode & bit == bit:
313 perm.append(char)
314 break
315 else:
316 perm.append("-")
317 return "".join(perm)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000318
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000319class TarError(Exception):
320 """Base exception."""
321 pass
322class ExtractError(TarError):
323 """General exception for extract errors."""
324 pass
325class ReadError(TarError):
326 """Exception for unreadble tar archives."""
327 pass
328class CompressionError(TarError):
329 """Exception for unavailable compression methods."""
330 pass
331class StreamError(TarError):
332 """Exception for unsupported operations on stream-like TarFiles."""
333 pass
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000334class HeaderError(TarError):
Lars Gustäbel9520a432009-11-22 18:48:49 +0000335 """Base exception for header errors."""
336 pass
337class EmptyHeaderError(HeaderError):
338 """Exception for empty headers."""
339 pass
340class TruncatedHeaderError(HeaderError):
341 """Exception for truncated headers."""
342 pass
343class EOFHeaderError(HeaderError):
344 """Exception for end of file headers."""
345 pass
346class InvalidHeaderError(HeaderError):
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000347 """Exception for invalid headers."""
348 pass
Lars Gustäbel9520a432009-11-22 18:48:49 +0000349class SubsequentHeaderError(HeaderError):
350 """Exception for missing and invalid extended headers."""
351 pass
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000352
353#---------------------------
354# internal stream interface
355#---------------------------
356class _LowLevelFile:
357 """Low-level file object. Supports reading and writing.
358 It is used instead of a regular file object for streaming
359 access.
360 """
361
362 def __init__(self, name, mode):
363 mode = {
364 "r": os.O_RDONLY,
365 "w": os.O_WRONLY | os.O_CREAT | os.O_TRUNC,
366 }[mode]
367 if hasattr(os, "O_BINARY"):
368 mode |= os.O_BINARY
Lars Gustäbeld6eb70b2010-04-29 15:37:02 +0000369 self.fd = os.open(name, mode, 0o666)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000370
371 def close(self):
372 os.close(self.fd)
373
374 def read(self, size):
375 return os.read(self.fd, size)
376
377 def write(self, s):
378 os.write(self.fd, s)
379
380class _Stream:
381 """Class that serves as an adapter between TarFile and
382 a stream-like object. The stream-like object only
383 needs to have a read() or write() method and is accessed
384 blockwise. Use of gzip or bzip2 compression is possible.
385 A stream-like object could be for example: sys.stdin,
386 sys.stdout, a socket, a tape device etc.
387
388 _Stream is intended to be used only internally.
389 """
390
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000391 def __init__(self, name, mode, comptype, fileobj, bufsize):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000392 """Construct a _Stream object.
393 """
394 self._extfileobj = True
395 if fileobj is None:
396 fileobj = _LowLevelFile(name, mode)
397 self._extfileobj = False
398
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000399 if comptype == '*':
400 # Enable transparent compression detection for the
401 # stream interface
402 fileobj = _StreamProxy(fileobj)
403 comptype = fileobj.getcomptype()
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000404
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000405 self.name = name or ""
406 self.mode = mode
407 self.comptype = comptype
408 self.fileobj = fileobj
409 self.bufsize = bufsize
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000410 self.buf = b""
Guido van Rossume2a383d2007-01-15 16:59:06 +0000411 self.pos = 0
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000412 self.closed = False
413
Antoine Pitrou605c2932010-09-23 20:15:14 +0000414 try:
415 if comptype == "gz":
416 try:
417 import zlib
418 except ImportError:
419 raise CompressionError("zlib module is not available")
420 self.zlib = zlib
421 self.crc = zlib.crc32(b"")
422 if mode == "r":
423 self._init_read_gz()
Lars Gustäbel0a9dd2f2011-12-10 20:38:14 +0100424 self.exception = zlib.error
Antoine Pitrou605c2932010-09-23 20:15:14 +0000425 else:
426 self._init_write_gz()
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000427
Lars Gustäbel0a9dd2f2011-12-10 20:38:14 +0100428 elif comptype == "bz2":
Antoine Pitrou605c2932010-09-23 20:15:14 +0000429 try:
430 import bz2
431 except ImportError:
432 raise CompressionError("bz2 module is not available")
433 if mode == "r":
434 self.dbuf = b""
435 self.cmp = bz2.BZ2Decompressor()
Lars Gustäbel0a9dd2f2011-12-10 20:38:14 +0100436 self.exception = IOError
Antoine Pitrou605c2932010-09-23 20:15:14 +0000437 else:
438 self.cmp = bz2.BZ2Compressor()
Lars Gustäbel0a9dd2f2011-12-10 20:38:14 +0100439
440 elif comptype == "xz":
441 try:
442 import lzma
443 except ImportError:
444 raise CompressionError("lzma module is not available")
445 if mode == "r":
446 self.dbuf = b""
447 self.cmp = lzma.LZMADecompressor()
448 self.exception = lzma.LZMAError
449 else:
450 self.cmp = lzma.LZMACompressor()
451
452 elif comptype != "tar":
453 raise CompressionError("unknown compression type %r" % comptype)
454
Antoine Pitrou605c2932010-09-23 20:15:14 +0000455 except:
456 if not self._extfileobj:
457 self.fileobj.close()
458 self.closed = True
459 raise
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000460
461 def __del__(self):
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000462 if hasattr(self, "closed") and not self.closed:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000463 self.close()
464
465 def _init_write_gz(self):
466 """Initialize for writing with gzip compression.
467 """
468 self.cmp = self.zlib.compressobj(9, self.zlib.DEFLATED,
469 -self.zlib.MAX_WBITS,
470 self.zlib.DEF_MEM_LEVEL,
471 0)
Guido van Rossume2a383d2007-01-15 16:59:06 +0000472 timestamp = struct.pack("<L", int(time.time()))
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000473 self.__write(b"\037\213\010\010" + timestamp + b"\002\377")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000474 if self.name.endswith(".gz"):
475 self.name = self.name[:-3]
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000476 # RFC1952 says we must use ISO-8859-1 for the FNAME field.
477 self.__write(self.name.encode("iso-8859-1", "replace") + NUL)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000478
479 def write(self, s):
480 """Write string s to the stream.
481 """
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000482 if self.comptype == "gz":
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000483 self.crc = self.zlib.crc32(s, self.crc)
484 self.pos += len(s)
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000485 if self.comptype != "tar":
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000486 s = self.cmp.compress(s)
487 self.__write(s)
488
489 def __write(self, s):
490 """Write string s to the stream if a whole new block
491 is ready to be written.
492 """
493 self.buf += s
494 while len(self.buf) > self.bufsize:
495 self.fileobj.write(self.buf[:self.bufsize])
496 self.buf = self.buf[self.bufsize:]
497
498 def close(self):
499 """Close the _Stream object. No operation should be
500 done on it afterwards.
501 """
502 if self.closed:
503 return
504
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000505 if self.mode == "w" and self.comptype != "tar":
Martin v. Löwisc234a522004-08-22 21:28:33 +0000506 self.buf += self.cmp.flush()
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000507
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000508 if self.mode == "w" and self.buf:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000509 self.fileobj.write(self.buf)
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000510 self.buf = b""
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000511 if self.comptype == "gz":
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000512 # The native zlib crc is an unsigned 32-bit integer, but
513 # the Python wrapper implicitly casts that to a signed C
514 # long. So, on a 32-bit box self.crc may "look negative",
515 # while the same crc on a 64-bit box may "look positive".
516 # To avoid irksome warnings from the `struct` module, force
517 # it to look positive on all boxes.
Guido van Rossume2a383d2007-01-15 16:59:06 +0000518 self.fileobj.write(struct.pack("<L", self.crc & 0xffffffff))
519 self.fileobj.write(struct.pack("<L", self.pos & 0xffffFFFF))
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000520
521 if not self._extfileobj:
522 self.fileobj.close()
523
524 self.closed = True
525
526 def _init_read_gz(self):
527 """Initialize for reading a gzip compressed fileobj.
528 """
529 self.cmp = self.zlib.decompressobj(-self.zlib.MAX_WBITS)
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000530 self.dbuf = b""
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000531
532 # taken from gzip.GzipFile with some alterations
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000533 if self.__read(2) != b"\037\213":
Thomas Wouters477c8d52006-05-27 19:21:47 +0000534 raise ReadError("not a gzip file")
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000535 if self.__read(1) != b"\010":
Thomas Wouters477c8d52006-05-27 19:21:47 +0000536 raise CompressionError("unsupported compression method")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000537
538 flag = ord(self.__read(1))
539 self.__read(6)
540
541 if flag & 4:
542 xlen = ord(self.__read(1)) + 256 * ord(self.__read(1))
543 self.read(xlen)
544 if flag & 8:
545 while True:
546 s = self.__read(1)
547 if not s or s == NUL:
548 break
549 if flag & 16:
550 while True:
551 s = self.__read(1)
552 if not s or s == NUL:
553 break
554 if flag & 2:
555 self.__read(2)
556
557 def tell(self):
558 """Return the stream's file pointer position.
559 """
560 return self.pos
561
562 def seek(self, pos=0):
563 """Set the stream's file pointer to pos. Negative seeking
564 is forbidden.
565 """
566 if pos - self.pos >= 0:
567 blocks, remainder = divmod(pos - self.pos, self.bufsize)
Guido van Rossum805365e2007-05-07 22:24:25 +0000568 for i in range(blocks):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000569 self.read(self.bufsize)
570 self.read(remainder)
571 else:
Thomas Wouters477c8d52006-05-27 19:21:47 +0000572 raise StreamError("seeking backwards is not allowed")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000573 return self.pos
574
575 def read(self, size=None):
576 """Return the next size number of bytes from the stream.
577 If size is not defined, return all bytes of the stream
578 up to EOF.
579 """
580 if size is None:
581 t = []
582 while True:
583 buf = self._read(self.bufsize)
584 if not buf:
585 break
586 t.append(buf)
587 buf = "".join(t)
588 else:
589 buf = self._read(size)
590 self.pos += len(buf)
591 return buf
592
593 def _read(self, size):
594 """Return size bytes from the stream.
595 """
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000596 if self.comptype == "tar":
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000597 return self.__read(size)
598
599 c = len(self.dbuf)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000600 while c < size:
601 buf = self.__read(self.bufsize)
602 if not buf:
603 break
Guido van Rossumd8faa362007-04-27 19:54:29 +0000604 try:
605 buf = self.cmp.decompress(buf)
Lars Gustäbel0a9dd2f2011-12-10 20:38:14 +0100606 except self.exception:
Guido van Rossumd8faa362007-04-27 19:54:29 +0000607 raise ReadError("invalid compressed data")
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000608 self.dbuf += buf
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000609 c += len(buf)
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000610 buf = self.dbuf[:size]
611 self.dbuf = self.dbuf[size:]
612 return buf
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000613
614 def __read(self, size):
615 """Return size bytes from stream. If internal buffer is empty,
616 read another block from the stream.
617 """
618 c = len(self.buf)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000619 while c < size:
620 buf = self.fileobj.read(self.bufsize)
621 if not buf:
622 break
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000623 self.buf += buf
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000624 c += len(buf)
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000625 buf = self.buf[:size]
626 self.buf = self.buf[size:]
627 return buf
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000628# class _Stream
629
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000630class _StreamProxy(object):
631 """Small proxy class that enables transparent compression
632 detection for the Stream interface (mode 'r|*').
633 """
634
635 def __init__(self, fileobj):
636 self.fileobj = fileobj
637 self.buf = self.fileobj.read(BLOCKSIZE)
638
639 def read(self, size):
640 self.read = self.fileobj.read
641 return self.buf
642
643 def getcomptype(self):
Lars Gustäbel0a9dd2f2011-12-10 20:38:14 +0100644 if self.buf.startswith(b"\x1f\x8b\x08"):
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000645 return "gz"
Lars Gustäbel0a9dd2f2011-12-10 20:38:14 +0100646 elif self.buf[0:3] == b"BZh" and self.buf[4:10] == b"1AY&SY":
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000647 return "bz2"
Lars Gustäbel0a9dd2f2011-12-10 20:38:14 +0100648 elif self.buf.startswith((b"\x5d\x00\x00\x80", b"\xfd7zXZ")):
649 return "xz"
650 else:
651 return "tar"
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000652
653 def close(self):
654 self.fileobj.close()
655# class StreamProxy
656
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000657#------------------------
658# Extraction file object
659#------------------------
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000660class _FileInFile(object):
661 """A thin wrapper around an existing file object that
662 provides a part of its data as an individual file
663 object.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000664 """
665
Lars Gustäbel9cbdd752010-10-29 09:08:19 +0000666 def __init__(self, fileobj, offset, size, blockinfo=None):
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000667 self.fileobj = fileobj
668 self.offset = offset
669 self.size = size
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000670 self.position = 0
Lars Gustäbel7a919e92012-05-05 18:15:03 +0200671 self.name = getattr(fileobj, "name", None)
672 self.closed = False
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000673
Lars Gustäbel9cbdd752010-10-29 09:08:19 +0000674 if blockinfo is None:
675 blockinfo = [(0, size)]
676
677 # Construct a map with data and zero blocks.
678 self.map_index = 0
679 self.map = []
680 lastpos = 0
681 realpos = self.offset
682 for offset, size in blockinfo:
683 if offset > lastpos:
684 self.map.append((False, lastpos, offset, None))
685 self.map.append((True, offset, offset + size, realpos))
686 realpos += size
687 lastpos = offset + size
688 if lastpos < self.size:
689 self.map.append((False, lastpos, self.size, None))
690
Lars Gustäbel7a919e92012-05-05 18:15:03 +0200691 def flush(self):
692 pass
693
694 def readable(self):
695 return True
696
697 def writable(self):
698 return False
699
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000700 def seekable(self):
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000701 return self.fileobj.seekable()
702
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000703 def tell(self):
704 """Return the current file position.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000705 """
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000706 return self.position
707
Lars Gustäbel7a919e92012-05-05 18:15:03 +0200708 def seek(self, position, whence=io.SEEK_SET):
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000709 """Seek to a position in the file.
710 """
Lars Gustäbel7a919e92012-05-05 18:15:03 +0200711 if whence == io.SEEK_SET:
712 self.position = min(max(position, 0), self.size)
713 elif whence == io.SEEK_CUR:
714 if position < 0:
715 self.position = max(self.position + position, 0)
716 else:
717 self.position = min(self.position + position, self.size)
718 elif whence == io.SEEK_END:
719 self.position = max(min(self.size + position, self.size), 0)
720 else:
721 raise ValueError("Invalid argument")
722 return self.position
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000723
724 def read(self, size=None):
725 """Read data from the file.
726 """
727 if size is None:
728 size = self.size - self.position
729 else:
730 size = min(size, self.size - self.position)
731
Lars Gustäbel9cbdd752010-10-29 09:08:19 +0000732 buf = b""
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000733 while size > 0:
Lars Gustäbel9cbdd752010-10-29 09:08:19 +0000734 while True:
735 data, start, stop, offset = self.map[self.map_index]
736 if start <= self.position < stop:
737 break
738 else:
739 self.map_index += 1
740 if self.map_index == len(self.map):
741 self.map_index = 0
742 length = min(size, stop - self.position)
743 if data:
Lars Gustäbeldd071042011-02-23 11:42:22 +0000744 self.fileobj.seek(offset + (self.position - start))
745 buf += self.fileobj.read(length)
Lars Gustäbel9cbdd752010-10-29 09:08:19 +0000746 else:
747 buf += NUL * length
748 size -= length
749 self.position += length
750 return buf
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000751
Lars Gustäbel7a919e92012-05-05 18:15:03 +0200752 def readinto(self, b):
753 buf = self.read(len(b))
754 b[:len(buf)] = buf
755 return len(buf)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000756
757 def close(self):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000758 self.closed = True
Lars Gustäbel7a919e92012-05-05 18:15:03 +0200759#class _FileInFile
Martin v. Löwisdf241532005-03-03 08:17:42 +0000760
Lars Gustäbelb062a2f2012-05-14 13:18:16 +0200761class ExFileObject(io.BufferedReader):
762
763 def __init__(self, tarfile, tarinfo):
764 fileobj = _FileInFile(tarfile.fileobj, tarinfo.offset_data,
765 tarinfo.size, tarinfo.sparse)
766 super().__init__(fileobj)
767#class ExFileObject
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000768
769#------------------
770# Exported Classes
771#------------------
772class TarInfo(object):
773 """Informational class which holds the details about an
774 archive member given by a tar header block.
775 TarInfo objects are returned by TarFile.getmember(),
776 TarFile.getmembers() and TarFile.gettarinfo() and are
777 usually created internally.
778 """
779
Lars Gustäbelc2ea8c62008-04-14 10:05:48 +0000780 __slots__ = ("name", "mode", "uid", "gid", "size", "mtime",
781 "chksum", "type", "linkname", "uname", "gname",
782 "devmajor", "devminor",
783 "offset", "offset_data", "pax_headers", "sparse",
784 "tarfile", "_sparse_structs", "_link_target")
785
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000786 def __init__(self, name=""):
787 """Construct a TarInfo object. name is the optional name
788 of the member.
789 """
Guido van Rossumd8faa362007-04-27 19:54:29 +0000790 self.name = name # member name
Guido van Rossumcd16bf62007-06-13 18:07:49 +0000791 self.mode = 0o644 # file permissions
Thomas Wouters477c8d52006-05-27 19:21:47 +0000792 self.uid = 0 # user id
793 self.gid = 0 # group id
794 self.size = 0 # file size
795 self.mtime = 0 # modification time
796 self.chksum = 0 # header checksum
797 self.type = REGTYPE # member type
798 self.linkname = "" # link name
Lars Gustäbel331b8002010-10-04 15:18:47 +0000799 self.uname = "" # user name
800 self.gname = "" # group name
Thomas Wouters477c8d52006-05-27 19:21:47 +0000801 self.devmajor = 0 # device major number
802 self.devminor = 0 # device minor number
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000803
Thomas Wouters477c8d52006-05-27 19:21:47 +0000804 self.offset = 0 # the tar header starts here
805 self.offset_data = 0 # the file's data starts here
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000806
Lars Gustäbelc2ea8c62008-04-14 10:05:48 +0000807 self.sparse = None # sparse member information
Guido van Rossumd8faa362007-04-27 19:54:29 +0000808 self.pax_headers = {} # pax header information
809
810 # In pax headers the "name" and "linkname" field are called
811 # "path" and "linkpath".
812 def _getpath(self):
813 return self.name
814 def _setpath(self, name):
815 self.name = name
816 path = property(_getpath, _setpath)
817
818 def _getlinkpath(self):
819 return self.linkname
820 def _setlinkpath(self, linkname):
821 self.linkname = linkname
822 linkpath = property(_getlinkpath, _setlinkpath)
823
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000824 def __repr__(self):
825 return "<%s %r at %#x>" % (self.__class__.__name__,self.name,id(self))
826
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000827 def get_info(self):
Guido van Rossumd8faa362007-04-27 19:54:29 +0000828 """Return the TarInfo's attributes as a dictionary.
829 """
830 info = {
Lars Gustäbelbfdfdda2009-08-28 19:59:59 +0000831 "name": self.name,
Guido van Rossumcd16bf62007-06-13 18:07:49 +0000832 "mode": self.mode & 0o7777,
Guido van Rossumd8faa362007-04-27 19:54:29 +0000833 "uid": self.uid,
834 "gid": self.gid,
835 "size": self.size,
836 "mtime": self.mtime,
837 "chksum": self.chksum,
838 "type": self.type,
Lars Gustäbelbfdfdda2009-08-28 19:59:59 +0000839 "linkname": self.linkname,
Guido van Rossumd8faa362007-04-27 19:54:29 +0000840 "uname": self.uname,
841 "gname": self.gname,
842 "devmajor": self.devmajor,
843 "devminor": self.devminor
844 }
845
846 if info["type"] == DIRTYPE and not info["name"].endswith("/"):
847 info["name"] += "/"
848
849 return info
850
Victor Stinnerde629d42010-05-05 21:43:57 +0000851 def tobuf(self, format=DEFAULT_FORMAT, encoding=ENCODING, errors="surrogateescape"):
Guido van Rossumd8faa362007-04-27 19:54:29 +0000852 """Return a tar header as a string of 512 byte blocks.
853 """
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000854 info = self.get_info()
Guido van Rossume7ba4952007-06-06 23:52:48 +0000855
Guido van Rossumd8faa362007-04-27 19:54:29 +0000856 if format == USTAR_FORMAT:
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000857 return self.create_ustar_header(info, encoding, errors)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000858 elif format == GNU_FORMAT:
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000859 return self.create_gnu_header(info, encoding, errors)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000860 elif format == PAX_FORMAT:
Lars Gustäbel1465cc22010-05-17 18:02:50 +0000861 return self.create_pax_header(info, encoding)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000862 else:
863 raise ValueError("invalid format")
864
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000865 def create_ustar_header(self, info, encoding, errors):
Guido van Rossumd8faa362007-04-27 19:54:29 +0000866 """Return the object as a ustar header block.
867 """
Guido van Rossumd8faa362007-04-27 19:54:29 +0000868 info["magic"] = POSIX_MAGIC
869
870 if len(info["linkname"]) > LENGTH_LINK:
871 raise ValueError("linkname is too long")
872
873 if len(info["name"]) > LENGTH_NAME:
874 info["prefix"], info["name"] = self._posix_split_name(info["name"])
875
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000876 return self._create_header(info, USTAR_FORMAT, encoding, errors)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000877
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000878 def create_gnu_header(self, info, encoding, errors):
Guido van Rossumd8faa362007-04-27 19:54:29 +0000879 """Return the object as a GNU header block sequence.
880 """
Guido van Rossumd8faa362007-04-27 19:54:29 +0000881 info["magic"] = GNU_MAGIC
882
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000883 buf = b""
Guido van Rossumd8faa362007-04-27 19:54:29 +0000884 if len(info["linkname"]) > LENGTH_LINK:
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000885 buf += self._create_gnu_long_header(info["linkname"], GNUTYPE_LONGLINK, encoding, errors)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000886
887 if len(info["name"]) > LENGTH_NAME:
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000888 buf += self._create_gnu_long_header(info["name"], GNUTYPE_LONGNAME, encoding, errors)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000889
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000890 return buf + self._create_header(info, GNU_FORMAT, encoding, errors)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000891
Lars Gustäbel1465cc22010-05-17 18:02:50 +0000892 def create_pax_header(self, info, encoding):
Guido van Rossumd8faa362007-04-27 19:54:29 +0000893 """Return the object as a ustar header block. If it cannot be
894 represented this way, prepend a pax extended header sequence
895 with supplement information.
896 """
Guido van Rossumd8faa362007-04-27 19:54:29 +0000897 info["magic"] = POSIX_MAGIC
898 pax_headers = self.pax_headers.copy()
899
900 # Test string fields for values that exceed the field length or cannot
901 # be represented in ASCII encoding.
902 for name, hname, length in (
903 ("name", "path", LENGTH_NAME), ("linkname", "linkpath", LENGTH_LINK),
904 ("uname", "uname", 32), ("gname", "gname", 32)):
905
Guido van Rossume7ba4952007-06-06 23:52:48 +0000906 if hname in pax_headers:
907 # The pax header has priority.
908 continue
909
Guido van Rossumd8faa362007-04-27 19:54:29 +0000910 # Try to encode the string as ASCII.
911 try:
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000912 info[name].encode("ascii", "strict")
Guido van Rossumd8faa362007-04-27 19:54:29 +0000913 except UnicodeEncodeError:
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000914 pax_headers[hname] = info[name]
Guido van Rossumd8faa362007-04-27 19:54:29 +0000915 continue
916
Guido van Rossume7ba4952007-06-06 23:52:48 +0000917 if len(info[name]) > length:
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000918 pax_headers[hname] = info[name]
Guido van Rossumd8faa362007-04-27 19:54:29 +0000919
920 # Test number fields for values that exceed the field limit or values
921 # that like to be stored as float.
922 for name, digits in (("uid", 8), ("gid", 8), ("size", 12), ("mtime", 12)):
Guido van Rossume7ba4952007-06-06 23:52:48 +0000923 if name in pax_headers:
924 # The pax header has priority. Avoid overflow.
925 info[name] = 0
926 continue
927
Guido van Rossumd8faa362007-04-27 19:54:29 +0000928 val = info[name]
929 if not 0 <= val < 8 ** (digits - 1) or isinstance(val, float):
Guido van Rossumef87d6e2007-05-02 19:09:54 +0000930 pax_headers[name] = str(val)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000931 info[name] = 0
932
Guido van Rossume7ba4952007-06-06 23:52:48 +0000933 # Create a pax extended header if necessary.
Guido van Rossumd8faa362007-04-27 19:54:29 +0000934 if pax_headers:
Lars Gustäbel1465cc22010-05-17 18:02:50 +0000935 buf = self._create_pax_generic_header(pax_headers, XHDTYPE, encoding)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000936 else:
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000937 buf = b""
Guido van Rossumd8faa362007-04-27 19:54:29 +0000938
Lars Gustäbel3741eff2007-08-21 12:17:05 +0000939 return buf + self._create_header(info, USTAR_FORMAT, "ascii", "replace")
Guido van Rossumd8faa362007-04-27 19:54:29 +0000940
941 @classmethod
Lars Gustäbel3741eff2007-08-21 12:17:05 +0000942 def create_pax_global_header(cls, pax_headers):
Guido van Rossumd8faa362007-04-27 19:54:29 +0000943 """Return the object as a pax global header block sequence.
944 """
Marc-André Lemburg8f36af72011-02-25 15:42:01 +0000945 return cls._create_pax_generic_header(pax_headers, XGLTYPE, "utf-8")
Guido van Rossumd8faa362007-04-27 19:54:29 +0000946
947 def _posix_split_name(self, name):
948 """Split a name longer than 100 chars into a prefix
949 and a name part.
950 """
951 prefix = name[:LENGTH_PREFIX + 1]
952 while prefix and prefix[-1] != "/":
953 prefix = prefix[:-1]
954
955 name = name[len(prefix):]
956 prefix = prefix[:-1]
957
958 if not prefix or len(name) > LENGTH_NAME:
959 raise ValueError("name is too long")
960 return prefix, name
961
962 @staticmethod
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000963 def _create_header(info, format, encoding, errors):
Guido van Rossumd8faa362007-04-27 19:54:29 +0000964 """Return a header block. info is a dictionary with file
965 information, format must be one of the *_FORMAT constants.
966 """
967 parts = [
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000968 stn(info.get("name", ""), 100, encoding, errors),
Guido van Rossumcd16bf62007-06-13 18:07:49 +0000969 itn(info.get("mode", 0) & 0o7777, 8, format),
Guido van Rossumd8faa362007-04-27 19:54:29 +0000970 itn(info.get("uid", 0), 8, format),
971 itn(info.get("gid", 0), 8, format),
972 itn(info.get("size", 0), 12, format),
973 itn(info.get("mtime", 0), 12, format),
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000974 b" ", # checksum field
Guido van Rossumd8faa362007-04-27 19:54:29 +0000975 info.get("type", REGTYPE),
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000976 stn(info.get("linkname", ""), 100, encoding, errors),
977 info.get("magic", POSIX_MAGIC),
Lars Gustäbel331b8002010-10-04 15:18:47 +0000978 stn(info.get("uname", ""), 32, encoding, errors),
979 stn(info.get("gname", ""), 32, encoding, errors),
Guido van Rossumd8faa362007-04-27 19:54:29 +0000980 itn(info.get("devmajor", 0), 8, format),
981 itn(info.get("devminor", 0), 8, format),
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000982 stn(info.get("prefix", ""), 155, encoding, errors)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000983 ]
984
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000985 buf = struct.pack("%ds" % BLOCKSIZE, b"".join(parts))
Guido van Rossumd8faa362007-04-27 19:54:29 +0000986 chksum = calc_chksums(buf[-BLOCKSIZE:])[0]
Lars Gustäbela280ca752007-08-28 07:34:33 +0000987 buf = buf[:-364] + bytes("%06o\0" % chksum, "ascii") + buf[-357:]
Guido van Rossumd8faa362007-04-27 19:54:29 +0000988 return buf
989
990 @staticmethod
991 def _create_payload(payload):
992 """Return the string payload filled with zero bytes
993 up to the next 512 byte border.
994 """
995 blocks, remainder = divmod(len(payload), BLOCKSIZE)
996 if remainder > 0:
997 payload += (BLOCKSIZE - remainder) * NUL
998 return payload
999
1000 @classmethod
Lars Gustäbelb506dc32007-08-07 18:36:16 +00001001 def _create_gnu_long_header(cls, name, type, encoding, errors):
Guido van Rossumd8faa362007-04-27 19:54:29 +00001002 """Return a GNUTYPE_LONGNAME or GNUTYPE_LONGLINK sequence
1003 for name.
1004 """
Lars Gustäbelb506dc32007-08-07 18:36:16 +00001005 name = name.encode(encoding, errors) + NUL
Guido van Rossumd8faa362007-04-27 19:54:29 +00001006
1007 info = {}
1008 info["name"] = "././@LongLink"
1009 info["type"] = type
1010 info["size"] = len(name)
1011 info["magic"] = GNU_MAGIC
1012
1013 # create extended header + name blocks.
Lars Gustäbelb506dc32007-08-07 18:36:16 +00001014 return cls._create_header(info, USTAR_FORMAT, encoding, errors) + \
Guido van Rossumd8faa362007-04-27 19:54:29 +00001015 cls._create_payload(name)
1016
1017 @classmethod
Lars Gustäbel1465cc22010-05-17 18:02:50 +00001018 def _create_pax_generic_header(cls, pax_headers, type, encoding):
1019 """Return a POSIX.1-2008 extended or global header sequence
Guido van Rossumd8faa362007-04-27 19:54:29 +00001020 that contains a list of keyword, value pairs. The values
Lars Gustäbelb506dc32007-08-07 18:36:16 +00001021 must be strings.
Guido van Rossumd8faa362007-04-27 19:54:29 +00001022 """
Lars Gustäbel1465cc22010-05-17 18:02:50 +00001023 # Check if one of the fields contains surrogate characters and thereby
1024 # forces hdrcharset=BINARY, see _proc_pax() for more information.
1025 binary = False
1026 for keyword, value in pax_headers.items():
1027 try:
Marc-André Lemburg8f36af72011-02-25 15:42:01 +00001028 value.encode("utf-8", "strict")
Lars Gustäbel1465cc22010-05-17 18:02:50 +00001029 except UnicodeEncodeError:
1030 binary = True
1031 break
1032
Lars Gustäbelb506dc32007-08-07 18:36:16 +00001033 records = b""
Lars Gustäbel1465cc22010-05-17 18:02:50 +00001034 if binary:
1035 # Put the hdrcharset field at the beginning of the header.
1036 records += b"21 hdrcharset=BINARY\n"
1037
Guido van Rossumd8faa362007-04-27 19:54:29 +00001038 for keyword, value in pax_headers.items():
Marc-André Lemburg8f36af72011-02-25 15:42:01 +00001039 keyword = keyword.encode("utf-8")
Lars Gustäbel1465cc22010-05-17 18:02:50 +00001040 if binary:
1041 # Try to restore the original byte representation of `value'.
1042 # Needless to say, that the encoding must match the string.
1043 value = value.encode(encoding, "surrogateescape")
1044 else:
Marc-André Lemburg8f36af72011-02-25 15:42:01 +00001045 value = value.encode("utf-8")
Lars Gustäbel1465cc22010-05-17 18:02:50 +00001046
Guido van Rossumd8faa362007-04-27 19:54:29 +00001047 l = len(keyword) + len(value) + 3 # ' ' + '=' + '\n'
1048 n = p = 0
1049 while True:
1050 n = l + len(str(p))
1051 if n == p:
1052 break
1053 p = n
Lars Gustäbela280ca752007-08-28 07:34:33 +00001054 records += bytes(str(p), "ascii") + b" " + keyword + b"=" + value + b"\n"
Guido van Rossumd8faa362007-04-27 19:54:29 +00001055
1056 # We use a hardcoded "././@PaxHeader" name like star does
1057 # instead of the one that POSIX recommends.
1058 info = {}
1059 info["name"] = "././@PaxHeader"
1060 info["type"] = type
1061 info["size"] = len(records)
1062 info["magic"] = POSIX_MAGIC
1063
1064 # Create pax header + record blocks.
Lars Gustäbel3741eff2007-08-21 12:17:05 +00001065 return cls._create_header(info, USTAR_FORMAT, "ascii", "replace") + \
Guido van Rossumd8faa362007-04-27 19:54:29 +00001066 cls._create_payload(records)
1067
Guido van Rossum75b64e62005-01-16 00:16:11 +00001068 @classmethod
Lars Gustäbelb506dc32007-08-07 18:36:16 +00001069 def frombuf(cls, buf, encoding, errors):
1070 """Construct a TarInfo object from a 512 byte bytes object.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001071 """
Lars Gustäbel9520a432009-11-22 18:48:49 +00001072 if len(buf) == 0:
1073 raise EmptyHeaderError("empty header")
Thomas Wouters477c8d52006-05-27 19:21:47 +00001074 if len(buf) != BLOCKSIZE:
Lars Gustäbel9520a432009-11-22 18:48:49 +00001075 raise TruncatedHeaderError("truncated header")
Thomas Wouters477c8d52006-05-27 19:21:47 +00001076 if buf.count(NUL) == BLOCKSIZE:
Lars Gustäbel9520a432009-11-22 18:48:49 +00001077 raise EOFHeaderError("end of file header")
Thomas Wouters902d6eb2007-01-09 23:18:33 +00001078
1079 chksum = nti(buf[148:156])
1080 if chksum not in calc_chksums(buf):
Lars Gustäbel9520a432009-11-22 18:48:49 +00001081 raise InvalidHeaderError("bad checksum")
Thomas Wouters477c8d52006-05-27 19:21:47 +00001082
Guido van Rossumd8faa362007-04-27 19:54:29 +00001083 obj = cls()
Lars Gustäbelb506dc32007-08-07 18:36:16 +00001084 obj.name = nts(buf[0:100], encoding, errors)
Guido van Rossumd8faa362007-04-27 19:54:29 +00001085 obj.mode = nti(buf[100:108])
1086 obj.uid = nti(buf[108:116])
1087 obj.gid = nti(buf[116:124])
1088 obj.size = nti(buf[124:136])
1089 obj.mtime = nti(buf[136:148])
1090 obj.chksum = chksum
1091 obj.type = buf[156:157]
Lars Gustäbelb506dc32007-08-07 18:36:16 +00001092 obj.linkname = nts(buf[157:257], encoding, errors)
1093 obj.uname = nts(buf[265:297], encoding, errors)
1094 obj.gname = nts(buf[297:329], encoding, errors)
Guido van Rossumd8faa362007-04-27 19:54:29 +00001095 obj.devmajor = nti(buf[329:337])
1096 obj.devminor = nti(buf[337:345])
Lars Gustäbelb506dc32007-08-07 18:36:16 +00001097 prefix = nts(buf[345:500], encoding, errors)
Thomas Wouters89f507f2006-12-13 04:49:30 +00001098
Guido van Rossumd8faa362007-04-27 19:54:29 +00001099 # Old V7 tar format represents a directory as a regular
1100 # file with a trailing slash.
1101 if obj.type == AREGTYPE and obj.name.endswith("/"):
1102 obj.type = DIRTYPE
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001103
Lars Gustäbelc2ea8c62008-04-14 10:05:48 +00001104 # The old GNU sparse format occupies some of the unused
1105 # space in the buffer for up to 4 sparse structures.
1106 # Save the them for later processing in _proc_sparse().
1107 if obj.type == GNUTYPE_SPARSE:
1108 pos = 386
1109 structs = []
1110 for i in range(4):
1111 try:
1112 offset = nti(buf[pos:pos + 12])
1113 numbytes = nti(buf[pos + 12:pos + 24])
1114 except ValueError:
1115 break
1116 structs.append((offset, numbytes))
1117 pos += 24
1118 isextended = bool(buf[482])
1119 origsize = nti(buf[483:495])
1120 obj._sparse_structs = (structs, isextended, origsize)
1121
Guido van Rossumd8faa362007-04-27 19:54:29 +00001122 # Remove redundant slashes from directories.
1123 if obj.isdir():
1124 obj.name = obj.name.rstrip("/")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001125
Guido van Rossumd8faa362007-04-27 19:54:29 +00001126 # Reconstruct a ustar longname.
1127 if prefix and obj.type not in GNU_TYPES:
1128 obj.name = prefix + "/" + obj.name
1129 return obj
1130
1131 @classmethod
1132 def fromtarfile(cls, tarfile):
1133 """Return the next TarInfo object from TarFile object
1134 tarfile.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001135 """
Guido van Rossumd8faa362007-04-27 19:54:29 +00001136 buf = tarfile.fileobj.read(BLOCKSIZE)
Lars Gustäbelb506dc32007-08-07 18:36:16 +00001137 obj = cls.frombuf(buf, tarfile.encoding, tarfile.errors)
Guido van Rossumd8faa362007-04-27 19:54:29 +00001138 obj.offset = tarfile.fileobj.tell() - BLOCKSIZE
1139 return obj._proc_member(tarfile)
Thomas Wouters89f507f2006-12-13 04:49:30 +00001140
Guido van Rossumd8faa362007-04-27 19:54:29 +00001141 #--------------------------------------------------------------------------
1142 # The following are methods that are called depending on the type of a
1143 # member. The entry point is _proc_member() which can be overridden in a
1144 # subclass to add custom _proc_*() methods. A _proc_*() method MUST
1145 # implement the following
1146 # operations:
1147 # 1. Set self.offset_data to the position where the data blocks begin,
1148 # if there is data that follows.
1149 # 2. Set tarfile.offset to the position where the next member's header will
1150 # begin.
1151 # 3. Return self or another valid TarInfo object.
1152 def _proc_member(self, tarfile):
1153 """Choose the right processing method depending on
1154 the type and call it.
Thomas Wouters89f507f2006-12-13 04:49:30 +00001155 """
Guido van Rossumd8faa362007-04-27 19:54:29 +00001156 if self.type in (GNUTYPE_LONGNAME, GNUTYPE_LONGLINK):
1157 return self._proc_gnulong(tarfile)
1158 elif self.type == GNUTYPE_SPARSE:
1159 return self._proc_sparse(tarfile)
1160 elif self.type in (XHDTYPE, XGLTYPE, SOLARIS_XHDTYPE):
1161 return self._proc_pax(tarfile)
1162 else:
1163 return self._proc_builtin(tarfile)
Thomas Wouters89f507f2006-12-13 04:49:30 +00001164
Guido van Rossumd8faa362007-04-27 19:54:29 +00001165 def _proc_builtin(self, tarfile):
1166 """Process a builtin type or an unknown type which
1167 will be treated as a regular file.
1168 """
1169 self.offset_data = tarfile.fileobj.tell()
1170 offset = self.offset_data
1171 if self.isreg() or self.type not in SUPPORTED_TYPES:
1172 # Skip the following data blocks.
1173 offset += self._block(self.size)
1174 tarfile.offset = offset
Thomas Wouters89f507f2006-12-13 04:49:30 +00001175
Guido van Rossume7ba4952007-06-06 23:52:48 +00001176 # Patch the TarInfo object with saved global
Guido van Rossumd8faa362007-04-27 19:54:29 +00001177 # header information.
Guido van Rossume7ba4952007-06-06 23:52:48 +00001178 self._apply_pax_info(tarfile.pax_headers, tarfile.encoding, tarfile.errors)
Guido van Rossumd8faa362007-04-27 19:54:29 +00001179
1180 return self
1181
1182 def _proc_gnulong(self, tarfile):
1183 """Process the blocks that hold a GNU longname
1184 or longlink member.
1185 """
1186 buf = tarfile.fileobj.read(self._block(self.size))
1187
1188 # Fetch the next header and process it.
Lars Gustäbel9520a432009-11-22 18:48:49 +00001189 try:
1190 next = self.fromtarfile(tarfile)
1191 except HeaderError:
1192 raise SubsequentHeaderError("missing or bad subsequent header")
Guido van Rossumd8faa362007-04-27 19:54:29 +00001193
1194 # Patch the TarInfo object from the next header with
1195 # the longname information.
1196 next.offset = self.offset
1197 if self.type == GNUTYPE_LONGNAME:
Lars Gustäbelb506dc32007-08-07 18:36:16 +00001198 next.name = nts(buf, tarfile.encoding, tarfile.errors)
Guido van Rossumd8faa362007-04-27 19:54:29 +00001199 elif self.type == GNUTYPE_LONGLINK:
Lars Gustäbelb506dc32007-08-07 18:36:16 +00001200 next.linkname = nts(buf, tarfile.encoding, tarfile.errors)
Guido van Rossumd8faa362007-04-27 19:54:29 +00001201
1202 return next
1203
1204 def _proc_sparse(self, tarfile):
1205 """Process a GNU sparse header plus extra headers.
1206 """
Lars Gustäbelc2ea8c62008-04-14 10:05:48 +00001207 # We already collected some sparse structures in frombuf().
1208 structs, isextended, origsize = self._sparse_structs
1209 del self._sparse_structs
Guido van Rossumd8faa362007-04-27 19:54:29 +00001210
Lars Gustäbelc2ea8c62008-04-14 10:05:48 +00001211 # Collect sparse structures from extended header blocks.
Lars Gustäbelb506dc32007-08-07 18:36:16 +00001212 while isextended:
Guido van Rossumd8faa362007-04-27 19:54:29 +00001213 buf = tarfile.fileobj.read(BLOCKSIZE)
1214 pos = 0
Guido van Rossum805365e2007-05-07 22:24:25 +00001215 for i in range(21):
Guido van Rossumd8faa362007-04-27 19:54:29 +00001216 try:
1217 offset = nti(buf[pos:pos + 12])
1218 numbytes = nti(buf[pos + 12:pos + 24])
1219 except ValueError:
1220 break
Lars Gustäbel9cbdd752010-10-29 09:08:19 +00001221 if offset and numbytes:
1222 structs.append((offset, numbytes))
Guido van Rossumd8faa362007-04-27 19:54:29 +00001223 pos += 24
Lars Gustäbelb506dc32007-08-07 18:36:16 +00001224 isextended = bool(buf[504])
Lars Gustäbel9cbdd752010-10-29 09:08:19 +00001225 self.sparse = structs
Guido van Rossumd8faa362007-04-27 19:54:29 +00001226
1227 self.offset_data = tarfile.fileobj.tell()
1228 tarfile.offset = self.offset_data + self._block(self.size)
1229 self.size = origsize
Guido van Rossumd8faa362007-04-27 19:54:29 +00001230 return self
1231
1232 def _proc_pax(self, tarfile):
1233 """Process an extended or global header as described in
Lars Gustäbel1465cc22010-05-17 18:02:50 +00001234 POSIX.1-2008.
Guido van Rossumd8faa362007-04-27 19:54:29 +00001235 """
1236 # Read the header information.
1237 buf = tarfile.fileobj.read(self._block(self.size))
1238
1239 # A pax header stores supplemental information for either
1240 # the following file (extended) or all following files
1241 # (global).
1242 if self.type == XGLTYPE:
1243 pax_headers = tarfile.pax_headers
1244 else:
1245 pax_headers = tarfile.pax_headers.copy()
1246
Lars Gustäbel1465cc22010-05-17 18:02:50 +00001247 # Check if the pax header contains a hdrcharset field. This tells us
1248 # the encoding of the path, linkpath, uname and gname fields. Normally,
1249 # these fields are UTF-8 encoded but since POSIX.1-2008 tar
1250 # implementations are allowed to store them as raw binary strings if
1251 # the translation to UTF-8 fails.
1252 match = re.search(br"\d+ hdrcharset=([^\n]+)\n", buf)
1253 if match is not None:
Marc-André Lemburg8f36af72011-02-25 15:42:01 +00001254 pax_headers["hdrcharset"] = match.group(1).decode("utf-8")
Lars Gustäbel1465cc22010-05-17 18:02:50 +00001255
1256 # For the time being, we don't care about anything other than "BINARY".
1257 # The only other value that is currently allowed by the standard is
1258 # "ISO-IR 10646 2000 UTF-8" in other words UTF-8.
1259 hdrcharset = pax_headers.get("hdrcharset")
1260 if hdrcharset == "BINARY":
1261 encoding = tarfile.encoding
1262 else:
Marc-André Lemburg8f36af72011-02-25 15:42:01 +00001263 encoding = "utf-8"
Lars Gustäbel1465cc22010-05-17 18:02:50 +00001264
Guido van Rossumd8faa362007-04-27 19:54:29 +00001265 # Parse pax header information. A record looks like that:
1266 # "%d %s=%s\n" % (length, keyword, value). length is the size
1267 # of the complete record including the length field itself and
Guido van Rossume7ba4952007-06-06 23:52:48 +00001268 # the newline. keyword and value are both UTF-8 encoded strings.
Antoine Pitroufd036452008-08-19 17:56:33 +00001269 regex = re.compile(br"(\d+) ([^=]+)=")
Guido van Rossumd8faa362007-04-27 19:54:29 +00001270 pos = 0
1271 while True:
1272 match = regex.match(buf, pos)
1273 if not match:
1274 break
1275
1276 length, keyword = match.groups()
1277 length = int(length)
1278 value = buf[match.end(2) + 1:match.start(1) + length - 1]
1279
Marc-André Lemburg8f36af72011-02-25 15:42:01 +00001280 # Normally, we could just use "utf-8" as the encoding and "strict"
Lars Gustäbel1465cc22010-05-17 18:02:50 +00001281 # as the error handler, but we better not take the risk. For
1282 # example, GNU tar <= 1.23 is known to store filenames it cannot
1283 # translate to UTF-8 as raw strings (unfortunately without a
1284 # hdrcharset=BINARY header).
1285 # We first try the strict standard encoding, and if that fails we
1286 # fall back on the user's encoding and error handler.
Marc-André Lemburg8f36af72011-02-25 15:42:01 +00001287 keyword = self._decode_pax_field(keyword, "utf-8", "utf-8",
Lars Gustäbel1465cc22010-05-17 18:02:50 +00001288 tarfile.errors)
1289 if keyword in PAX_NAME_FIELDS:
1290 value = self._decode_pax_field(value, encoding, tarfile.encoding,
1291 tarfile.errors)
1292 else:
Marc-André Lemburg8f36af72011-02-25 15:42:01 +00001293 value = self._decode_pax_field(value, "utf-8", "utf-8",
Lars Gustäbel1465cc22010-05-17 18:02:50 +00001294 tarfile.errors)
Guido van Rossumd8faa362007-04-27 19:54:29 +00001295
1296 pax_headers[keyword] = value
1297 pos += length
1298
Guido van Rossume7ba4952007-06-06 23:52:48 +00001299 # Fetch the next header.
Lars Gustäbel9520a432009-11-22 18:48:49 +00001300 try:
1301 next = self.fromtarfile(tarfile)
1302 except HeaderError:
1303 raise SubsequentHeaderError("missing or bad subsequent header")
Guido van Rossumd8faa362007-04-27 19:54:29 +00001304
Lars Gustäbel9cbdd752010-10-29 09:08:19 +00001305 # Process GNU sparse information.
1306 if "GNU.sparse.map" in pax_headers:
1307 # GNU extended sparse format version 0.1.
1308 self._proc_gnusparse_01(next, pax_headers)
1309
1310 elif "GNU.sparse.size" in pax_headers:
1311 # GNU extended sparse format version 0.0.
1312 self._proc_gnusparse_00(next, pax_headers, buf)
1313
1314 elif pax_headers.get("GNU.sparse.major") == "1" and pax_headers.get("GNU.sparse.minor") == "0":
1315 # GNU extended sparse format version 1.0.
1316 self._proc_gnusparse_10(next, pax_headers, tarfile)
1317
Guido van Rossume7ba4952007-06-06 23:52:48 +00001318 if self.type in (XHDTYPE, SOLARIS_XHDTYPE):
Guido van Rossume7ba4952007-06-06 23:52:48 +00001319 # Patch the TarInfo object with the extended header info.
1320 next._apply_pax_info(pax_headers, tarfile.encoding, tarfile.errors)
1321 next.offset = self.offset
1322
1323 if "size" in pax_headers:
1324 # If the extended header replaces the size field,
1325 # we need to recalculate the offset where the next
1326 # header starts.
1327 offset = next.offset_data
1328 if next.isreg() or next.type not in SUPPORTED_TYPES:
1329 offset += next._block(next.size)
1330 tarfile.offset = offset
1331
1332 return next
1333
Lars Gustäbel9cbdd752010-10-29 09:08:19 +00001334 def _proc_gnusparse_00(self, next, pax_headers, buf):
1335 """Process a GNU tar extended sparse header, version 0.0.
1336 """
1337 offsets = []
1338 for match in re.finditer(br"\d+ GNU.sparse.offset=(\d+)\n", buf):
1339 offsets.append(int(match.group(1)))
1340 numbytes = []
1341 for match in re.finditer(br"\d+ GNU.sparse.numbytes=(\d+)\n", buf):
1342 numbytes.append(int(match.group(1)))
1343 next.sparse = list(zip(offsets, numbytes))
1344
1345 def _proc_gnusparse_01(self, next, pax_headers):
1346 """Process a GNU tar extended sparse header, version 0.1.
1347 """
1348 sparse = [int(x) for x in pax_headers["GNU.sparse.map"].split(",")]
1349 next.sparse = list(zip(sparse[::2], sparse[1::2]))
1350
1351 def _proc_gnusparse_10(self, next, pax_headers, tarfile):
1352 """Process a GNU tar extended sparse header, version 1.0.
1353 """
1354 fields = None
1355 sparse = []
1356 buf = tarfile.fileobj.read(BLOCKSIZE)
1357 fields, buf = buf.split(b"\n", 1)
1358 fields = int(fields)
1359 while len(sparse) < fields * 2:
1360 if b"\n" not in buf:
1361 buf += tarfile.fileobj.read(BLOCKSIZE)
1362 number, buf = buf.split(b"\n", 1)
1363 sparse.append(int(number))
1364 next.offset_data = tarfile.fileobj.tell()
1365 next.sparse = list(zip(sparse[::2], sparse[1::2]))
1366
Guido van Rossume7ba4952007-06-06 23:52:48 +00001367 def _apply_pax_info(self, pax_headers, encoding, errors):
1368 """Replace fields with supplemental information from a previous
1369 pax extended or global header.
1370 """
1371 for keyword, value in pax_headers.items():
Lars Gustäbel9cbdd752010-10-29 09:08:19 +00001372 if keyword == "GNU.sparse.name":
1373 setattr(self, "path", value)
1374 elif keyword == "GNU.sparse.size":
1375 setattr(self, "size", int(value))
1376 elif keyword == "GNU.sparse.realsize":
1377 setattr(self, "size", int(value))
1378 elif keyword in PAX_FIELDS:
1379 if keyword in PAX_NUMBER_FIELDS:
1380 try:
1381 value = PAX_NUMBER_FIELDS[keyword](value)
1382 except ValueError:
1383 value = 0
1384 if keyword == "path":
1385 value = value.rstrip("/")
1386 setattr(self, keyword, value)
Guido van Rossume7ba4952007-06-06 23:52:48 +00001387
1388 self.pax_headers = pax_headers.copy()
Guido van Rossumd8faa362007-04-27 19:54:29 +00001389
Lars Gustäbel1465cc22010-05-17 18:02:50 +00001390 def _decode_pax_field(self, value, encoding, fallback_encoding, fallback_errors):
1391 """Decode a single field from a pax record.
1392 """
1393 try:
1394 return value.decode(encoding, "strict")
1395 except UnicodeDecodeError:
1396 return value.decode(fallback_encoding, fallback_errors)
1397
Guido van Rossumd8faa362007-04-27 19:54:29 +00001398 def _block(self, count):
1399 """Round up a byte count by BLOCKSIZE and return it,
1400 e.g. _block(834) => 1024.
1401 """
1402 blocks, remainder = divmod(count, BLOCKSIZE)
1403 if remainder:
1404 blocks += 1
1405 return blocks * BLOCKSIZE
Thomas Wouters89f507f2006-12-13 04:49:30 +00001406
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001407 def isreg(self):
1408 return self.type in REGULAR_TYPES
1409 def isfile(self):
1410 return self.isreg()
1411 def isdir(self):
1412 return self.type == DIRTYPE
1413 def issym(self):
1414 return self.type == SYMTYPE
1415 def islnk(self):
1416 return self.type == LNKTYPE
1417 def ischr(self):
1418 return self.type == CHRTYPE
1419 def isblk(self):
1420 return self.type == BLKTYPE
1421 def isfifo(self):
1422 return self.type == FIFOTYPE
1423 def issparse(self):
Lars Gustäbel9cbdd752010-10-29 09:08:19 +00001424 return self.sparse is not None
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001425 def isdev(self):
1426 return self.type in (CHRTYPE, BLKTYPE, FIFOTYPE)
1427# class TarInfo
1428
1429class TarFile(object):
1430 """The TarFile Class provides an interface to tar archives.
1431 """
1432
1433 debug = 0 # May be set from 0 (no msgs) to 3 (all msgs)
1434
1435 dereference = False # If true, add content of linked file to the
1436 # tar file, else the link.
1437
1438 ignore_zeros = False # If true, skips empty or invalid blocks and
1439 # continues processing.
1440
Lars Gustäbel365aff32009-12-13 11:42:29 +00001441 errorlevel = 1 # If 0, fatal errors only appear in debug
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001442 # messages (if debug >= 0). If > 0, errors
1443 # are passed to the caller as exceptions.
1444
Guido van Rossumd8faa362007-04-27 19:54:29 +00001445 format = DEFAULT_FORMAT # The format to use when creating an archive.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001446
Guido van Rossume7ba4952007-06-06 23:52:48 +00001447 encoding = ENCODING # Encoding for 8-bit character strings.
1448
1449 errors = None # Error handler for unicode conversion.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001450
Guido van Rossumd8faa362007-04-27 19:54:29 +00001451 tarinfo = TarInfo # The default TarInfo class to use.
1452
Lars Gustäbelb062a2f2012-05-14 13:18:16 +02001453 fileobject = ExFileObject # The file-object for extractfile().
Guido van Rossumd8faa362007-04-27 19:54:29 +00001454
1455 def __init__(self, name=None, mode="r", fileobj=None, format=None,
1456 tarinfo=None, dereference=None, ignore_zeros=None, encoding=None,
Victor Stinnerde629d42010-05-05 21:43:57 +00001457 errors="surrogateescape", pax_headers=None, debug=None, errorlevel=None):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001458 """Open an (uncompressed) tar archive `name'. `mode' is either 'r' to
1459 read from an existing archive, 'a' to append data to an existing
1460 file or 'w' to create a new file overwriting an existing one. `mode'
1461 defaults to 'r'.
1462 If `fileobj' is given, it is used for reading or writing data. If it
1463 can be determined, `mode' is overridden by `fileobj's mode.
1464 `fileobj' is not closed, when TarFile is closed.
1465 """
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001466 if len(mode) > 1 or mode not in "raw":
Thomas Wouters477c8d52006-05-27 19:21:47 +00001467 raise ValueError("mode must be 'r', 'a' or 'w'")
Guido van Rossumd8faa362007-04-27 19:54:29 +00001468 self.mode = mode
1469 self._mode = {"r": "rb", "a": "r+b", "w": "wb"}[mode]
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001470
1471 if not fileobj:
Guido van Rossumd8faa362007-04-27 19:54:29 +00001472 if self.mode == "a" and not os.path.exists(name):
Thomas Wouterscf297e42007-02-23 15:07:44 +00001473 # Create nonexistent files in append mode.
Guido van Rossumd8faa362007-04-27 19:54:29 +00001474 self.mode = "w"
1475 self._mode = "wb"
Guido van Rossume7ba4952007-06-06 23:52:48 +00001476 fileobj = bltn_open(name, self._mode)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001477 self._extfileobj = False
1478 else:
Guido van Rossumd8faa362007-04-27 19:54:29 +00001479 if name is None and hasattr(fileobj, "name"):
1480 name = fileobj.name
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001481 if hasattr(fileobj, "mode"):
Guido van Rossumd8faa362007-04-27 19:54:29 +00001482 self._mode = fileobj.mode
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001483 self._extfileobj = True
Thomas Woutersed03b412007-08-28 21:37:11 +00001484 self.name = os.path.abspath(name) if name else None
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001485 self.fileobj = fileobj
1486
Guido van Rossumd8faa362007-04-27 19:54:29 +00001487 # Init attributes.
1488 if format is not None:
1489 self.format = format
1490 if tarinfo is not None:
1491 self.tarinfo = tarinfo
1492 if dereference is not None:
1493 self.dereference = dereference
1494 if ignore_zeros is not None:
1495 self.ignore_zeros = ignore_zeros
1496 if encoding is not None:
1497 self.encoding = encoding
Victor Stinnerde629d42010-05-05 21:43:57 +00001498 self.errors = errors
Guido van Rossume7ba4952007-06-06 23:52:48 +00001499
1500 if pax_headers is not None and self.format == PAX_FORMAT:
1501 self.pax_headers = pax_headers
1502 else:
1503 self.pax_headers = {}
1504
Guido van Rossumd8faa362007-04-27 19:54:29 +00001505 if debug is not None:
1506 self.debug = debug
1507 if errorlevel is not None:
1508 self.errorlevel = errorlevel
1509
1510 # Init datastructures.
Thomas Wouters477c8d52006-05-27 19:21:47 +00001511 self.closed = False
1512 self.members = [] # list of members as TarInfo objects
1513 self._loaded = False # flag if all members have been read
Christian Heimesd8654cf2007-12-02 15:22:16 +00001514 self.offset = self.fileobj.tell()
1515 # current position in the archive file
Thomas Wouters477c8d52006-05-27 19:21:47 +00001516 self.inodes = {} # dictionary caching the inodes of
1517 # archive members already added
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001518
Lars Gustäbel7b465392009-11-18 20:29:25 +00001519 try:
1520 if self.mode == "r":
1521 self.firstmember = None
1522 self.firstmember = self.next()
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001523
Lars Gustäbel7b465392009-11-18 20:29:25 +00001524 if self.mode == "a":
1525 # Move to the end of the archive,
1526 # before the first empty block.
Lars Gustäbel7b465392009-11-18 20:29:25 +00001527 while True:
Lars Gustäbel9520a432009-11-22 18:48:49 +00001528 self.fileobj.seek(self.offset)
1529 try:
1530 tarinfo = self.tarinfo.fromtarfile(self)
1531 self.members.append(tarinfo)
1532 except EOFHeaderError:
1533 self.fileobj.seek(self.offset)
Lars Gustäbel7b465392009-11-18 20:29:25 +00001534 break
Lars Gustäbel9520a432009-11-22 18:48:49 +00001535 except HeaderError as e:
1536 raise ReadError(str(e))
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001537
Lars Gustäbel7b465392009-11-18 20:29:25 +00001538 if self.mode in "aw":
1539 self._loaded = True
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001540
Lars Gustäbel7b465392009-11-18 20:29:25 +00001541 if self.pax_headers:
1542 buf = self.tarinfo.create_pax_global_header(self.pax_headers.copy())
1543 self.fileobj.write(buf)
1544 self.offset += len(buf)
1545 except:
1546 if not self._extfileobj:
1547 self.fileobj.close()
1548 self.closed = True
1549 raise
Guido van Rossumd8faa362007-04-27 19:54:29 +00001550
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001551 #--------------------------------------------------------------------------
1552 # Below are the classmethods which act as alternate constructors to the
1553 # TarFile class. The open() method is the only one that is needed for
1554 # public use; it is the "super"-constructor and is able to select an
1555 # adequate "sub"-constructor for a particular compression using the mapping
1556 # from OPEN_METH.
1557 #
1558 # This concept allows one to subclass TarFile without losing the comfort of
1559 # the super-constructor. A sub-constructor is registered and made available
1560 # by adding it to the mapping in OPEN_METH.
1561
Guido van Rossum75b64e62005-01-16 00:16:11 +00001562 @classmethod
Guido van Rossumd8faa362007-04-27 19:54:29 +00001563 def open(cls, name=None, mode="r", fileobj=None, bufsize=RECORDSIZE, **kwargs):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001564 """Open a tar archive for reading, writing or appending. Return
1565 an appropriate TarFile class.
1566
1567 mode:
Martin v. Löwis78be7df2005-03-05 12:47:42 +00001568 'r' or 'r:*' open for reading with transparent compression
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001569 'r:' open for reading exclusively uncompressed
1570 'r:gz' open for reading with gzip compression
1571 'r:bz2' open for reading with bzip2 compression
Lars Gustäbel0a9dd2f2011-12-10 20:38:14 +01001572 'r:xz' open for reading with lzma compression
Thomas Wouterscf297e42007-02-23 15:07:44 +00001573 'a' or 'a:' open for appending, creating the file if necessary
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001574 'w' or 'w:' open for writing without compression
1575 'w:gz' open for writing with gzip compression
1576 'w:bz2' open for writing with bzip2 compression
Lars Gustäbel0a9dd2f2011-12-10 20:38:14 +01001577 'w:xz' open for writing with lzma compression
Martin v. Löwis78be7df2005-03-05 12:47:42 +00001578
1579 'r|*' open a stream of tar blocks with transparent compression
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001580 'r|' open an uncompressed stream of tar blocks for reading
1581 'r|gz' open a gzip compressed stream of tar blocks
1582 'r|bz2' open a bzip2 compressed stream of tar blocks
Lars Gustäbel0a9dd2f2011-12-10 20:38:14 +01001583 'r|xz' open an lzma compressed stream of tar blocks
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001584 'w|' open an uncompressed stream for writing
1585 'w|gz' open a gzip compressed stream for writing
1586 'w|bz2' open a bzip2 compressed stream for writing
Lars Gustäbel0a9dd2f2011-12-10 20:38:14 +01001587 'w|xz' open an lzma compressed stream for writing
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001588 """
1589
1590 if not name and not fileobj:
Thomas Wouters477c8d52006-05-27 19:21:47 +00001591 raise ValueError("nothing to open")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001592
Martin v. Löwis78be7df2005-03-05 12:47:42 +00001593 if mode in ("r", "r:*"):
1594 # Find out which *open() is appropriate for opening the file.
1595 for comptype in cls.OPEN_METH:
1596 func = getattr(cls, cls.OPEN_METH[comptype])
Thomas Wouters902d6eb2007-01-09 23:18:33 +00001597 if fileobj is not None:
1598 saved_pos = fileobj.tell()
Martin v. Löwis78be7df2005-03-05 12:47:42 +00001599 try:
Guido van Rossumd8faa362007-04-27 19:54:29 +00001600 return func(name, "r", fileobj, **kwargs)
1601 except (ReadError, CompressionError) as e:
Thomas Wouters902d6eb2007-01-09 23:18:33 +00001602 if fileobj is not None:
1603 fileobj.seek(saved_pos)
Martin v. Löwis78be7df2005-03-05 12:47:42 +00001604 continue
Thomas Wouters477c8d52006-05-27 19:21:47 +00001605 raise ReadError("file could not be opened successfully")
Martin v. Löwis78be7df2005-03-05 12:47:42 +00001606
1607 elif ":" in mode:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001608 filemode, comptype = mode.split(":", 1)
1609 filemode = filemode or "r"
1610 comptype = comptype or "tar"
1611
1612 # Select the *open() function according to
1613 # given compression.
1614 if comptype in cls.OPEN_METH:
1615 func = getattr(cls, cls.OPEN_METH[comptype])
1616 else:
Thomas Wouters477c8d52006-05-27 19:21:47 +00001617 raise CompressionError("unknown compression type %r" % comptype)
Guido van Rossumd8faa362007-04-27 19:54:29 +00001618 return func(name, filemode, fileobj, **kwargs)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001619
1620 elif "|" in mode:
1621 filemode, comptype = mode.split("|", 1)
1622 filemode = filemode or "r"
1623 comptype = comptype or "tar"
1624
1625 if filemode not in "rw":
Thomas Wouters477c8d52006-05-27 19:21:47 +00001626 raise ValueError("mode must be 'r' or 'w'")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001627
Antoine Pitrou605c2932010-09-23 20:15:14 +00001628 stream = _Stream(name, filemode, comptype, fileobj, bufsize)
1629 try:
1630 t = cls(name, filemode, stream, **kwargs)
1631 except:
1632 stream.close()
1633 raise
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001634 t._extfileobj = False
1635 return t
1636
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001637 elif mode in "aw":
Guido van Rossumd8faa362007-04-27 19:54:29 +00001638 return cls.taropen(name, mode, fileobj, **kwargs)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001639
Thomas Wouters477c8d52006-05-27 19:21:47 +00001640 raise ValueError("undiscernible mode")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001641
Guido van Rossum75b64e62005-01-16 00:16:11 +00001642 @classmethod
Guido van Rossumd8faa362007-04-27 19:54:29 +00001643 def taropen(cls, name, mode="r", fileobj=None, **kwargs):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001644 """Open uncompressed tar archive name for reading or writing.
1645 """
1646 if len(mode) > 1 or mode not in "raw":
Thomas Wouters477c8d52006-05-27 19:21:47 +00001647 raise ValueError("mode must be 'r', 'a' or 'w'")
Guido van Rossumd8faa362007-04-27 19:54:29 +00001648 return cls(name, mode, fileobj, **kwargs)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001649
Guido van Rossum75b64e62005-01-16 00:16:11 +00001650 @classmethod
Guido van Rossumd8faa362007-04-27 19:54:29 +00001651 def gzopen(cls, name, mode="r", fileobj=None, compresslevel=9, **kwargs):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001652 """Open gzip compressed tar archive name for reading or writing.
1653 Appending is not allowed.
1654 """
1655 if len(mode) > 1 or mode not in "rw":
Thomas Wouters477c8d52006-05-27 19:21:47 +00001656 raise ValueError("mode must be 'r' or 'w'")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001657
1658 try:
1659 import gzip
Neal Norwitz4ec68242003-04-11 03:05:56 +00001660 gzip.GzipFile
1661 except (ImportError, AttributeError):
Thomas Wouters477c8d52006-05-27 19:21:47 +00001662 raise CompressionError("gzip module is not available")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001663
Antoine Pitroue1eca4e2010-10-29 23:49:49 +00001664 extfileobj = fileobj is not None
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001665 try:
Antoine Pitroue1eca4e2010-10-29 23:49:49 +00001666 fileobj = gzip.GzipFile(name, mode + "b", compresslevel, fileobj)
1667 t = cls.taropen(name, mode, fileobj, **kwargs)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001668 except IOError:
Georg Brandl3abb3722011-08-13 11:48:12 +02001669 if not extfileobj and fileobj is not None:
Antoine Pitrou95f55602010-09-23 18:36:46 +00001670 fileobj.close()
Georg Brandl3abb3722011-08-13 11:48:12 +02001671 if fileobj is None:
1672 raise
Thomas Wouters477c8d52006-05-27 19:21:47 +00001673 raise ReadError("not a gzip file")
Antoine Pitroue1eca4e2010-10-29 23:49:49 +00001674 except:
Georg Brandl3abb3722011-08-13 11:48:12 +02001675 if not extfileobj and fileobj is not None:
Antoine Pitroue1eca4e2010-10-29 23:49:49 +00001676 fileobj.close()
1677 raise
Antoine Pitrou95f55602010-09-23 18:36:46 +00001678 t._extfileobj = extfileobj
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001679 return t
1680
Guido van Rossum75b64e62005-01-16 00:16:11 +00001681 @classmethod
Guido van Rossumd8faa362007-04-27 19:54:29 +00001682 def bz2open(cls, name, mode="r", fileobj=None, compresslevel=9, **kwargs):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001683 """Open bzip2 compressed tar archive name for reading or writing.
1684 Appending is not allowed.
1685 """
1686 if len(mode) > 1 or mode not in "rw":
Thomas Wouters477c8d52006-05-27 19:21:47 +00001687 raise ValueError("mode must be 'r' or 'w'.")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001688
1689 try:
1690 import bz2
1691 except ImportError:
Thomas Wouters477c8d52006-05-27 19:21:47 +00001692 raise CompressionError("bz2 module is not available")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001693
Lars Gustäbelbb44b732011-12-06 13:44:10 +01001694 fileobj = bz2.BZ2File(filename=name if fileobj is None else None,
1695 mode=mode, fileobj=fileobj, compresslevel=compresslevel)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001696
1697 try:
Guido van Rossumd8faa362007-04-27 19:54:29 +00001698 t = cls.taropen(name, mode, fileobj, **kwargs)
Lars Gustäbel9520a432009-11-22 18:48:49 +00001699 except (IOError, EOFError):
Antoine Pitrou95f55602010-09-23 18:36:46 +00001700 fileobj.close()
Thomas Wouters477c8d52006-05-27 19:21:47 +00001701 raise ReadError("not a bzip2 file")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001702 t._extfileobj = False
1703 return t
1704
Lars Gustäbel0a9dd2f2011-12-10 20:38:14 +01001705 @classmethod
Lars Gustäbelc5e11992012-01-18 14:01:17 +01001706 def xzopen(cls, name, mode="r", fileobj=None, preset=None, **kwargs):
Lars Gustäbel0a9dd2f2011-12-10 20:38:14 +01001707 """Open lzma compressed tar archive name for reading or writing.
1708 Appending is not allowed.
1709 """
1710 if mode not in ("r", "w"):
1711 raise ValueError("mode must be 'r' or 'w'")
1712
1713 try:
1714 import lzma
1715 except ImportError:
1716 raise CompressionError("lzma module is not available")
1717
Lars Gustäbel0a9dd2f2011-12-10 20:38:14 +01001718 fileobj = lzma.LZMAFile(filename=name if fileobj is None else None,
1719 mode=mode, fileobj=fileobj, preset=preset)
1720
1721 try:
1722 t = cls.taropen(name, mode, fileobj, **kwargs)
1723 except (lzma.LZMAError, EOFError):
1724 fileobj.close()
1725 raise ReadError("not an lzma file")
1726 t._extfileobj = False
1727 return t
1728
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001729 # All *open() methods are registered here.
1730 OPEN_METH = {
1731 "tar": "taropen", # uncompressed tar
1732 "gz": "gzopen", # gzip compressed tar
Lars Gustäbel0a9dd2f2011-12-10 20:38:14 +01001733 "bz2": "bz2open", # bzip2 compressed tar
1734 "xz": "xzopen" # lzma compressed tar
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001735 }
1736
1737 #--------------------------------------------------------------------------
1738 # The public methods which TarFile provides:
1739
1740 def close(self):
1741 """Close the TarFile. In write-mode, two finishing zero blocks are
1742 appended to the archive.
1743 """
1744 if self.closed:
1745 return
1746
Guido van Rossumd8faa362007-04-27 19:54:29 +00001747 if self.mode in "aw":
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001748 self.fileobj.write(NUL * (BLOCKSIZE * 2))
1749 self.offset += (BLOCKSIZE * 2)
1750 # fill up the end with zero-blocks
1751 # (like option -b20 for tar does)
1752 blocks, remainder = divmod(self.offset, RECORDSIZE)
1753 if remainder > 0:
1754 self.fileobj.write(NUL * (RECORDSIZE - remainder))
1755
1756 if not self._extfileobj:
1757 self.fileobj.close()
1758 self.closed = True
1759
1760 def getmember(self, name):
1761 """Return a TarInfo object for member `name'. If `name' can not be
1762 found in the archive, KeyError is raised. If a member occurs more
Mark Dickinson934896d2009-02-21 20:59:32 +00001763 than once in the archive, its last occurrence is assumed to be the
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001764 most up-to-date version.
1765 """
Martin v. Löwisf3c56112004-09-18 09:08:52 +00001766 tarinfo = self._getmember(name)
1767 if tarinfo is None:
Thomas Wouters477c8d52006-05-27 19:21:47 +00001768 raise KeyError("filename %r not found" % name)
Martin v. Löwisf3c56112004-09-18 09:08:52 +00001769 return tarinfo
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001770
1771 def getmembers(self):
1772 """Return the members of the archive as a list of TarInfo objects. The
1773 list has the same order as the members in the archive.
1774 """
1775 self._check()
1776 if not self._loaded: # if we want to obtain a list of
1777 self._load() # all members, we first have to
1778 # scan the whole archive.
1779 return self.members
1780
1781 def getnames(self):
1782 """Return the members of the archive as a list of their names. It has
1783 the same order as the list returned by getmembers().
1784 """
Martin v. Löwisf3c56112004-09-18 09:08:52 +00001785 return [tarinfo.name for tarinfo in self.getmembers()]
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001786
1787 def gettarinfo(self, name=None, arcname=None, fileobj=None):
1788 """Create a TarInfo object for either the file `name' or the file
1789 object `fileobj' (using os.fstat on its file descriptor). You can
1790 modify some of the TarInfo's attributes before you add it using
1791 addfile(). If given, `arcname' specifies an alternative name for the
1792 file in the archive.
1793 """
1794 self._check("aw")
1795
1796 # When fileobj is given, replace name by
1797 # fileobj's real name.
1798 if fileobj is not None:
1799 name = fileobj.name
1800
1801 # Building the name of the member in the archive.
1802 # Backward slashes are converted to forward slashes,
1803 # Absolute paths are turned to relative paths.
1804 if arcname is None:
1805 arcname = name
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001806 drv, arcname = os.path.splitdrive(arcname)
Lars Gustäbelbfdfdda2009-08-28 19:59:59 +00001807 arcname = arcname.replace(os.sep, "/")
1808 arcname = arcname.lstrip("/")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001809
1810 # Now, fill the TarInfo object with
1811 # information specific for the file.
Guido van Rossumd8faa362007-04-27 19:54:29 +00001812 tarinfo = self.tarinfo()
1813 tarinfo.tarfile = self
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001814
1815 # Use os.stat or os.lstat, depending on platform
1816 # and if symlinks shall be resolved.
1817 if fileobj is None:
1818 if hasattr(os, "lstat") and not self.dereference:
1819 statres = os.lstat(name)
1820 else:
1821 statres = os.stat(name)
1822 else:
1823 statres = os.fstat(fileobj.fileno())
1824 linkname = ""
1825
1826 stmd = statres.st_mode
1827 if stat.S_ISREG(stmd):
1828 inode = (statres.st_ino, statres.st_dev)
Guido van Rossumd8faa362007-04-27 19:54:29 +00001829 if not self.dereference and statres.st_nlink > 1 and \
1830 inode in self.inodes and arcname != self.inodes[inode]:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001831 # Is it a hardlink to an already
1832 # archived file?
1833 type = LNKTYPE
1834 linkname = self.inodes[inode]
1835 else:
1836 # The inode is added only if its valid.
1837 # For win32 it is always 0.
1838 type = REGTYPE
1839 if inode[0]:
1840 self.inodes[inode] = arcname
1841 elif stat.S_ISDIR(stmd):
1842 type = DIRTYPE
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001843 elif stat.S_ISFIFO(stmd):
1844 type = FIFOTYPE
1845 elif stat.S_ISLNK(stmd):
1846 type = SYMTYPE
1847 linkname = os.readlink(name)
1848 elif stat.S_ISCHR(stmd):
1849 type = CHRTYPE
1850 elif stat.S_ISBLK(stmd):
1851 type = BLKTYPE
1852 else:
1853 return None
1854
1855 # Fill the TarInfo object with all
1856 # information we can get.
Martin v. Löwis5dbdc592005-08-27 10:07:56 +00001857 tarinfo.name = arcname
1858 tarinfo.mode = stmd
1859 tarinfo.uid = statres.st_uid
1860 tarinfo.gid = statres.st_gid
Lars Gustäbel2470ff12010-06-03 10:11:52 +00001861 if type == REGTYPE:
Martin v. Löwis61d77e02004-08-20 06:35:46 +00001862 tarinfo.size = statres.st_size
Martin v. Löwis5dbdc592005-08-27 10:07:56 +00001863 else:
Guido van Rossume2a383d2007-01-15 16:59:06 +00001864 tarinfo.size = 0
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001865 tarinfo.mtime = statres.st_mtime
Martin v. Löwis5dbdc592005-08-27 10:07:56 +00001866 tarinfo.type = type
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001867 tarinfo.linkname = linkname
1868 if pwd:
1869 try:
1870 tarinfo.uname = pwd.getpwuid(tarinfo.uid)[0]
1871 except KeyError:
1872 pass
1873 if grp:
1874 try:
1875 tarinfo.gname = grp.getgrgid(tarinfo.gid)[0]
1876 except KeyError:
1877 pass
1878
1879 if type in (CHRTYPE, BLKTYPE):
1880 if hasattr(os, "major") and hasattr(os, "minor"):
1881 tarinfo.devmajor = os.major(statres.st_rdev)
1882 tarinfo.devminor = os.minor(statres.st_rdev)
1883 return tarinfo
1884
1885 def list(self, verbose=True):
1886 """Print a table of contents to sys.stdout. If `verbose' is False, only
1887 the names of the members are printed. If it is True, an `ls -l'-like
1888 output is produced.
1889 """
1890 self._check()
1891
1892 for tarinfo in self:
1893 if verbose:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001894 print(filemode(tarinfo.mode), end=' ')
1895 print("%s/%s" % (tarinfo.uname or tarinfo.uid,
1896 tarinfo.gname or tarinfo.gid), end=' ')
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001897 if tarinfo.ischr() or tarinfo.isblk():
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001898 print("%10s" % ("%d,%d" \
1899 % (tarinfo.devmajor, tarinfo.devminor)), end=' ')
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001900 else:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001901 print("%10d" % tarinfo.size, end=' ')
1902 print("%d-%02d-%02d %02d:%02d:%02d" \
1903 % time.localtime(tarinfo.mtime)[:6], end=' ')
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001904
Guido van Rossumd8faa362007-04-27 19:54:29 +00001905 print(tarinfo.name + ("/" if tarinfo.isdir() else ""), end=' ')
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001906
1907 if verbose:
1908 if tarinfo.issym():
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001909 print("->", tarinfo.linkname, end=' ')
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001910 if tarinfo.islnk():
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001911 print("link to", tarinfo.linkname, end=' ')
1912 print()
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001913
Raymond Hettingera63a3122011-01-26 20:34:14 +00001914 def add(self, name, arcname=None, recursive=True, exclude=None, *, filter=None):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001915 """Add the file `name' to the archive. `name' may be any type of file
1916 (directory, fifo, symbolic link, etc.). If given, `arcname'
1917 specifies an alternative name for the file in the archive.
1918 Directories are added recursively by default. This can be avoided by
Guido van Rossum486364b2007-06-30 05:01:58 +00001919 setting `recursive' to False. `exclude' is a function that should
Lars Gustäbel049d2aa2009-09-12 10:44:00 +00001920 return True for each filename to be excluded. `filter' is a function
1921 that expects a TarInfo object argument and returns the changed
1922 TarInfo object, if it returns None the TarInfo object will be
1923 excluded from the archive.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001924 """
1925 self._check("aw")
1926
1927 if arcname is None:
1928 arcname = name
1929
Guido van Rossum486364b2007-06-30 05:01:58 +00001930 # Exclude pathnames.
Lars Gustäbel049d2aa2009-09-12 10:44:00 +00001931 if exclude is not None:
1932 import warnings
1933 warnings.warn("use the filter argument instead",
1934 DeprecationWarning, 2)
1935 if exclude(name):
1936 self._dbg(2, "tarfile: Excluded %r" % name)
1937 return
Guido van Rossum486364b2007-06-30 05:01:58 +00001938
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001939 # Skip if somebody tries to archive the archive...
Thomas Wouters902d6eb2007-01-09 23:18:33 +00001940 if self.name is not None and os.path.abspath(name) == self.name:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001941 self._dbg(2, "tarfile: Skipped %r" % name)
1942 return
1943
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001944 self._dbg(1, name)
1945
1946 # Create a TarInfo object from the file.
1947 tarinfo = self.gettarinfo(name, arcname)
1948
1949 if tarinfo is None:
1950 self._dbg(1, "tarfile: Unsupported type %r" % name)
1951 return
1952
Lars Gustäbel049d2aa2009-09-12 10:44:00 +00001953 # Change or exclude the TarInfo object.
1954 if filter is not None:
1955 tarinfo = filter(tarinfo)
1956 if tarinfo is None:
1957 self._dbg(2, "tarfile: Excluded %r" % name)
1958 return
1959
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001960 # Append the tar header and data to the archive.
1961 if tarinfo.isreg():
Guido van Rossume7ba4952007-06-06 23:52:48 +00001962 f = bltn_open(name, "rb")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001963 self.addfile(tarinfo, f)
1964 f.close()
1965
Martin v. Löwis5dbdc592005-08-27 10:07:56 +00001966 elif tarinfo.isdir():
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001967 self.addfile(tarinfo)
1968 if recursive:
1969 for f in os.listdir(name):
Lars Gustäbel049d2aa2009-09-12 10:44:00 +00001970 self.add(os.path.join(name, f), os.path.join(arcname, f),
Raymond Hettingera63a3122011-01-26 20:34:14 +00001971 recursive, exclude, filter=filter)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001972
Martin v. Löwis5dbdc592005-08-27 10:07:56 +00001973 else:
1974 self.addfile(tarinfo)
1975
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001976 def addfile(self, tarinfo, fileobj=None):
1977 """Add the TarInfo object `tarinfo' to the archive. If `fileobj' is
1978 given, tarinfo.size bytes are read from it and added to the archive.
1979 You can create TarInfo objects using gettarinfo().
1980 On Windows platforms, `fileobj' should always be opened with mode
1981 'rb' to avoid irritation about the file size.
1982 """
1983 self._check("aw")
1984
Thomas Wouters89f507f2006-12-13 04:49:30 +00001985 tarinfo = copy.copy(tarinfo)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001986
Guido van Rossume7ba4952007-06-06 23:52:48 +00001987 buf = tarinfo.tobuf(self.format, self.encoding, self.errors)
Thomas Wouters89f507f2006-12-13 04:49:30 +00001988 self.fileobj.write(buf)
1989 self.offset += len(buf)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001990
1991 # If there's data to follow, append it.
1992 if fileobj is not None:
1993 copyfileobj(fileobj, self.fileobj, tarinfo.size)
1994 blocks, remainder = divmod(tarinfo.size, BLOCKSIZE)
1995 if remainder > 0:
1996 self.fileobj.write(NUL * (BLOCKSIZE - remainder))
1997 blocks += 1
1998 self.offset += blocks * BLOCKSIZE
1999
Martin v. Löwisf3c56112004-09-18 09:08:52 +00002000 self.members.append(tarinfo)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002001
Martin v. Löwis00a73e72005-03-04 19:40:34 +00002002 def extractall(self, path=".", members=None):
2003 """Extract all members from the archive to the current working
2004 directory and set owner, modification time and permissions on
2005 directories afterwards. `path' specifies a different directory
2006 to extract to. `members' is optional and must be a subset of the
2007 list returned by getmembers().
2008 """
2009 directories = []
2010
2011 if members is None:
2012 members = self
2013
2014 for tarinfo in members:
2015 if tarinfo.isdir():
Christian Heimes2202f872008-02-06 14:31:34 +00002016 # Extract directories with a safe mode.
Martin v. Löwis00a73e72005-03-04 19:40:34 +00002017 directories.append(tarinfo)
Christian Heimes2202f872008-02-06 14:31:34 +00002018 tarinfo = copy.copy(tarinfo)
2019 tarinfo.mode = 0o700
Martin v. Löwis16f344d2010-11-01 21:39:13 +00002020 # Do not set_attrs directories, as we will do that further down
2021 self.extract(tarinfo, path, set_attrs=not tarinfo.isdir())
Martin v. Löwis00a73e72005-03-04 19:40:34 +00002022
2023 # Reverse sort directories.
Raymond Hettingerd4cb56d2008-01-30 02:55:10 +00002024 directories.sort(key=lambda a: a.name)
Martin v. Löwis00a73e72005-03-04 19:40:34 +00002025 directories.reverse()
2026
2027 # Set correct owner, mtime and filemode on directories.
2028 for tarinfo in directories:
Christian Heimesfaf2f632008-01-06 16:59:19 +00002029 dirpath = os.path.join(path, tarinfo.name)
Martin v. Löwis00a73e72005-03-04 19:40:34 +00002030 try:
Christian Heimesfaf2f632008-01-06 16:59:19 +00002031 self.chown(tarinfo, dirpath)
2032 self.utime(tarinfo, dirpath)
2033 self.chmod(tarinfo, dirpath)
Guido van Rossumb940e112007-01-10 16:19:56 +00002034 except ExtractError as e:
Martin v. Löwis00a73e72005-03-04 19:40:34 +00002035 if self.errorlevel > 1:
2036 raise
2037 else:
2038 self._dbg(1, "tarfile: %s" % e)
2039
Martin v. Löwis16f344d2010-11-01 21:39:13 +00002040 def extract(self, member, path="", set_attrs=True):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002041 """Extract a member from the archive to the current working directory,
2042 using its full name. Its file information is extracted as accurately
2043 as possible. `member' may be a filename or a TarInfo object. You can
Martin v. Löwis16f344d2010-11-01 21:39:13 +00002044 specify a different directory using `path'. File attributes (owner,
2045 mtime, mode) are set unless `set_attrs' is False.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002046 """
2047 self._check("r")
2048
Guido van Rossum3172c5d2007-10-16 18:12:55 +00002049 if isinstance(member, str):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002050 tarinfo = self.getmember(member)
Guido van Rossumd8faa362007-04-27 19:54:29 +00002051 else:
2052 tarinfo = member
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002053
Neal Norwitza4f651a2004-07-20 22:07:44 +00002054 # Prepare the link target for makelink().
2055 if tarinfo.islnk():
2056 tarinfo._link_target = os.path.join(path, tarinfo.linkname)
2057
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002058 try:
Martin v. Löwis16f344d2010-11-01 21:39:13 +00002059 self._extract_member(tarinfo, os.path.join(path, tarinfo.name),
2060 set_attrs=set_attrs)
Guido van Rossumb940e112007-01-10 16:19:56 +00002061 except EnvironmentError as e:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002062 if self.errorlevel > 0:
2063 raise
2064 else:
2065 if e.filename is None:
2066 self._dbg(1, "tarfile: %s" % e.strerror)
2067 else:
2068 self._dbg(1, "tarfile: %s %r" % (e.strerror, e.filename))
Guido van Rossumb940e112007-01-10 16:19:56 +00002069 except ExtractError as e:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002070 if self.errorlevel > 1:
2071 raise
2072 else:
2073 self._dbg(1, "tarfile: %s" % e)
2074
2075 def extractfile(self, member):
2076 """Extract a member from the archive as a file object. `member' may be
Lars Gustäbel7a919e92012-05-05 18:15:03 +02002077 a filename or a TarInfo object. If `member' is a regular file or a
2078 link, an io.BufferedReader object is returned. Otherwise, None is
2079 returned.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002080 """
2081 self._check("r")
2082
Guido van Rossum3172c5d2007-10-16 18:12:55 +00002083 if isinstance(member, str):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002084 tarinfo = self.getmember(member)
Guido van Rossumd8faa362007-04-27 19:54:29 +00002085 else:
2086 tarinfo = member
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002087
Lars Gustäbel7a919e92012-05-05 18:15:03 +02002088 if tarinfo.isreg() or tarinfo.type not in SUPPORTED_TYPES:
2089 # Members with unknown types are treated as regular files.
Lars Gustäbelb062a2f2012-05-14 13:18:16 +02002090 return self.fileobject(self, tarinfo)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002091
2092 elif tarinfo.islnk() or tarinfo.issym():
2093 if isinstance(self.fileobj, _Stream):
2094 # A small but ugly workaround for the case that someone tries
2095 # to extract a (sym)link as a file-object from a non-seekable
2096 # stream of tar blocks.
Thomas Wouters477c8d52006-05-27 19:21:47 +00002097 raise StreamError("cannot extract (sym)link as file object")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002098 else:
Georg Brandl7eb4b7d2005-07-22 21:49:32 +00002099 # A (sym)link's file object is its target's file object.
Lars Gustäbel1b512722010-06-03 12:45:16 +00002100 return self.extractfile(self._find_link_target(tarinfo))
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002101 else:
2102 # If there's no data associated with the member (directory, chrdev,
2103 # blkdev, etc.), return None instead of a file object.
2104 return None
2105
Martin v. Löwis16f344d2010-11-01 21:39:13 +00002106 def _extract_member(self, tarinfo, targetpath, set_attrs=True):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002107 """Extract the TarInfo object tarinfo to a physical
2108 file called targetpath.
2109 """
2110 # Fetch the TarInfo object for the given name
2111 # and build the destination pathname, replacing
2112 # forward slashes to platform specific separators.
Lars Gustäbelbfdfdda2009-08-28 19:59:59 +00002113 targetpath = targetpath.rstrip("/")
2114 targetpath = targetpath.replace("/", os.sep)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002115
2116 # Create all upper directories.
2117 upperdirs = os.path.dirname(targetpath)
2118 if upperdirs and not os.path.exists(upperdirs):
Christian Heimes2202f872008-02-06 14:31:34 +00002119 # Create directories that are not part of the archive with
2120 # default permissions.
Thomas Woutersb2137042007-02-01 18:02:27 +00002121 os.makedirs(upperdirs)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002122
2123 if tarinfo.islnk() or tarinfo.issym():
2124 self._dbg(1, "%s -> %s" % (tarinfo.name, tarinfo.linkname))
2125 else:
2126 self._dbg(1, tarinfo.name)
2127
2128 if tarinfo.isreg():
2129 self.makefile(tarinfo, targetpath)
2130 elif tarinfo.isdir():
2131 self.makedir(tarinfo, targetpath)
2132 elif tarinfo.isfifo():
2133 self.makefifo(tarinfo, targetpath)
2134 elif tarinfo.ischr() or tarinfo.isblk():
2135 self.makedev(tarinfo, targetpath)
2136 elif tarinfo.islnk() or tarinfo.issym():
2137 self.makelink(tarinfo, targetpath)
2138 elif tarinfo.type not in SUPPORTED_TYPES:
2139 self.makeunknown(tarinfo, targetpath)
2140 else:
2141 self.makefile(tarinfo, targetpath)
2142
Martin v. Löwis16f344d2010-11-01 21:39:13 +00002143 if set_attrs:
2144 self.chown(tarinfo, targetpath)
2145 if not tarinfo.issym():
2146 self.chmod(tarinfo, targetpath)
2147 self.utime(tarinfo, targetpath)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002148
2149 #--------------------------------------------------------------------------
2150 # Below are the different file methods. They are called via
2151 # _extract_member() when extract() is called. They can be replaced in a
2152 # subclass to implement other functionality.
2153
2154 def makedir(self, tarinfo, targetpath):
2155 """Make a directory called targetpath.
2156 """
2157 try:
Christian Heimes2202f872008-02-06 14:31:34 +00002158 # Use a safe mode for the directory, the real mode is set
2159 # later in _extract_member().
2160 os.mkdir(targetpath, 0o700)
Florent Xicluna68f71a32011-10-28 16:06:23 +02002161 except FileExistsError:
2162 pass
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002163
2164 def makefile(self, tarinfo, targetpath):
2165 """Make a file called targetpath.
2166 """
Lars Gustäbel9cbdd752010-10-29 09:08:19 +00002167 source = self.fileobj
2168 source.seek(tarinfo.offset_data)
Guido van Rossume7ba4952007-06-06 23:52:48 +00002169 target = bltn_open(targetpath, "wb")
Lars Gustäbel9cbdd752010-10-29 09:08:19 +00002170 if tarinfo.sparse is not None:
2171 for offset, size in tarinfo.sparse:
2172 target.seek(offset)
2173 copyfileobj(source, target, size)
2174 else:
2175 copyfileobj(source, target, tarinfo.size)
2176 target.seek(tarinfo.size)
2177 target.truncate()
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002178 target.close()
2179
2180 def makeunknown(self, tarinfo, targetpath):
2181 """Make a file from a TarInfo object with an unknown type
2182 at targetpath.
2183 """
2184 self.makefile(tarinfo, targetpath)
2185 self._dbg(1, "tarfile: Unknown file type %r, " \
2186 "extracted as regular file." % tarinfo.type)
2187
2188 def makefifo(self, tarinfo, targetpath):
2189 """Make a fifo called targetpath.
2190 """
2191 if hasattr(os, "mkfifo"):
2192 os.mkfifo(targetpath)
2193 else:
Thomas Wouters477c8d52006-05-27 19:21:47 +00002194 raise ExtractError("fifo not supported by system")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002195
2196 def makedev(self, tarinfo, targetpath):
2197 """Make a character or block device called targetpath.
2198 """
2199 if not hasattr(os, "mknod") or not hasattr(os, "makedev"):
Thomas Wouters477c8d52006-05-27 19:21:47 +00002200 raise ExtractError("special devices not supported by system")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002201
2202 mode = tarinfo.mode
2203 if tarinfo.isblk():
2204 mode |= stat.S_IFBLK
2205 else:
2206 mode |= stat.S_IFCHR
2207
2208 os.mknod(targetpath, mode,
2209 os.makedev(tarinfo.devmajor, tarinfo.devminor))
2210
2211 def makelink(self, tarinfo, targetpath):
2212 """Make a (symbolic) link called targetpath. If it cannot be created
2213 (platform limitation), we try to make a copy of the referenced file
2214 instead of a link.
2215 """
Brian Curtind40e6f72010-07-08 21:39:08 +00002216 try:
Lars Gustäbel1b512722010-06-03 12:45:16 +00002217 # For systems that support symbolic and hard links.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002218 if tarinfo.issym():
Lars Gustäbelbfdfdda2009-08-28 19:59:59 +00002219 os.symlink(tarinfo.linkname, targetpath)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002220 else:
Neal Norwitza4f651a2004-07-20 22:07:44 +00002221 # See extract().
Lars Gustäbel1b512722010-06-03 12:45:16 +00002222 if os.path.exists(tarinfo._link_target):
2223 os.link(tarinfo._link_target, targetpath)
2224 else:
Brian Curtin82df53e2010-09-24 21:04:05 +00002225 self._extract_member(self._find_link_target(tarinfo),
2226 targetpath)
Brian Curtin16633fa2010-07-09 13:54:27 +00002227 except symlink_exception:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002228 try:
Brian Curtin16633fa2010-07-09 13:54:27 +00002229 self._extract_member(self._find_link_target(tarinfo),
2230 targetpath)
Lars Gustäbel1b512722010-06-03 12:45:16 +00002231 except KeyError:
2232 raise ExtractError("unable to resolve link inside archive")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002233
2234 def chown(self, tarinfo, targetpath):
2235 """Set owner of targetpath according to tarinfo.
2236 """
2237 if pwd and hasattr(os, "geteuid") and os.geteuid() == 0:
2238 # We have to be root to do so.
2239 try:
2240 g = grp.getgrnam(tarinfo.gname)[2]
2241 except KeyError:
Lars Gustäbel2e7ddd32011-09-05 16:58:14 +02002242 g = tarinfo.gid
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002243 try:
2244 u = pwd.getpwnam(tarinfo.uname)[2]
2245 except KeyError:
Lars Gustäbel2e7ddd32011-09-05 16:58:14 +02002246 u = tarinfo.uid
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002247 try:
2248 if tarinfo.issym() and hasattr(os, "lchown"):
2249 os.lchown(targetpath, u, g)
2250 else:
Andrew MacIntyre7970d202003-02-19 12:51:34 +00002251 if sys.platform != "os2emx":
2252 os.chown(targetpath, u, g)
Guido van Rossumb940e112007-01-10 16:19:56 +00002253 except EnvironmentError as e:
Thomas Wouters477c8d52006-05-27 19:21:47 +00002254 raise ExtractError("could not change owner")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002255
2256 def chmod(self, tarinfo, targetpath):
2257 """Set file permissions of targetpath according to tarinfo.
2258 """
Jack Jansen834eff62003-03-07 12:47:06 +00002259 if hasattr(os, 'chmod'):
2260 try:
2261 os.chmod(targetpath, tarinfo.mode)
Guido van Rossumb940e112007-01-10 16:19:56 +00002262 except EnvironmentError as e:
Thomas Wouters477c8d52006-05-27 19:21:47 +00002263 raise ExtractError("could not change mode")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002264
2265 def utime(self, tarinfo, targetpath):
2266 """Set modification time of targetpath according to tarinfo.
2267 """
Jack Jansen834eff62003-03-07 12:47:06 +00002268 if not hasattr(os, 'utime'):
Tim Petersf9347782003-03-07 15:36:41 +00002269 return
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002270 try:
2271 os.utime(targetpath, (tarinfo.mtime, tarinfo.mtime))
Guido van Rossumb940e112007-01-10 16:19:56 +00002272 except EnvironmentError as e:
Thomas Wouters477c8d52006-05-27 19:21:47 +00002273 raise ExtractError("could not change modification time")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002274
2275 #--------------------------------------------------------------------------
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002276 def next(self):
2277 """Return the next member of the archive as a TarInfo object, when
2278 TarFile is opened for reading. Return None if there is no more
2279 available.
2280 """
2281 self._check("ra")
2282 if self.firstmember is not None:
2283 m = self.firstmember
2284 self.firstmember = None
2285 return m
2286
2287 # Read the next block.
Andrew M. Kuchling864bba12004-07-10 22:02:11 +00002288 self.fileobj.seek(self.offset)
Lars Gustäbel9520a432009-11-22 18:48:49 +00002289 tarinfo = None
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002290 while True:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002291 try:
Guido van Rossumd8faa362007-04-27 19:54:29 +00002292 tarinfo = self.tarinfo.fromtarfile(self)
Lars Gustäbel9520a432009-11-22 18:48:49 +00002293 except EOFHeaderError as e:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002294 if self.ignore_zeros:
Thomas Wouters902d6eb2007-01-09 23:18:33 +00002295 self._dbg(2, "0x%X: %s" % (self.offset, e))
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002296 self.offset += BLOCKSIZE
2297 continue
Lars Gustäbel9520a432009-11-22 18:48:49 +00002298 except InvalidHeaderError as e:
2299 if self.ignore_zeros:
2300 self._dbg(2, "0x%X: %s" % (self.offset, e))
2301 self.offset += BLOCKSIZE
2302 continue
2303 elif self.offset == 0:
2304 raise ReadError(str(e))
2305 except EmptyHeaderError:
2306 if self.offset == 0:
2307 raise ReadError("empty file")
2308 except TruncatedHeaderError as e:
2309 if self.offset == 0:
2310 raise ReadError(str(e))
2311 except SubsequentHeaderError as e:
2312 raise ReadError(str(e))
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002313 break
2314
Lars Gustäbel9520a432009-11-22 18:48:49 +00002315 if tarinfo is not None:
2316 self.members.append(tarinfo)
2317 else:
2318 self._loaded = True
2319
Thomas Wouters477c8d52006-05-27 19:21:47 +00002320 return tarinfo
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002321
2322 #--------------------------------------------------------------------------
2323 # Little helper methods:
2324
Lars Gustäbel1b512722010-06-03 12:45:16 +00002325 def _getmember(self, name, tarinfo=None, normalize=False):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002326 """Find an archive member by name from bottom to top.
2327 If tarinfo is given, it is used as the starting point.
2328 """
Martin v. Löwisf3c56112004-09-18 09:08:52 +00002329 # Ensure that all members have been loaded.
2330 members = self.getmembers()
2331
Lars Gustäbel1b512722010-06-03 12:45:16 +00002332 # Limit the member search list up to tarinfo.
2333 if tarinfo is not None:
2334 members = members[:members.index(tarinfo)]
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002335
Lars Gustäbel1b512722010-06-03 12:45:16 +00002336 if normalize:
2337 name = os.path.normpath(name)
2338
2339 for member in reversed(members):
2340 if normalize:
2341 member_name = os.path.normpath(member.name)
2342 else:
2343 member_name = member.name
2344
2345 if name == member_name:
2346 return member
Andrew M. Kuchling864bba12004-07-10 22:02:11 +00002347
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002348 def _load(self):
2349 """Read through the entire archive file and look for readable
2350 members.
2351 """
2352 while True:
2353 tarinfo = self.next()
2354 if tarinfo is None:
2355 break
2356 self._loaded = True
2357
2358 def _check(self, mode=None):
2359 """Check if TarFile is still open, and if the operation's mode
2360 corresponds to TarFile's mode.
2361 """
2362 if self.closed:
Thomas Wouters477c8d52006-05-27 19:21:47 +00002363 raise IOError("%s is closed" % self.__class__.__name__)
Guido van Rossumd8faa362007-04-27 19:54:29 +00002364 if mode is not None and self.mode not in mode:
2365 raise IOError("bad operation for mode %r" % self.mode)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002366
Lars Gustäbel1b512722010-06-03 12:45:16 +00002367 def _find_link_target(self, tarinfo):
2368 """Find the target member of a symlink or hardlink member in the
2369 archive.
2370 """
2371 if tarinfo.issym():
2372 # Always search the entire archive.
Lars Gustäbel1ef9eda2012-04-24 21:04:40 +02002373 linkname = "/".join(filter(None, (os.path.dirname(tarinfo.name), tarinfo.linkname)))
Lars Gustäbel1b512722010-06-03 12:45:16 +00002374 limit = None
2375 else:
2376 # Search the archive before the link, because a hard link is
2377 # just a reference to an already archived file.
2378 linkname = tarinfo.linkname
2379 limit = tarinfo
2380
2381 member = self._getmember(linkname, tarinfo=limit, normalize=True)
2382 if member is None:
2383 raise KeyError("linkname %r not found" % linkname)
2384 return member
2385
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002386 def __iter__(self):
2387 """Provide an iterator object.
2388 """
2389 if self._loaded:
2390 return iter(self.members)
2391 else:
2392 return TarIter(self)
2393
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002394 def _dbg(self, level, msg):
2395 """Write debugging output to sys.stderr.
2396 """
2397 if level <= self.debug:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00002398 print(msg, file=sys.stderr)
Lars Gustäbel01385812010-03-03 12:08:54 +00002399
2400 def __enter__(self):
2401 self._check()
2402 return self
2403
2404 def __exit__(self, type, value, traceback):
2405 if type is None:
2406 self.close()
2407 else:
2408 # An exception occurred. We must not call close() because
2409 # it would try to write end-of-archive blocks and padding.
2410 if not self._extfileobj:
2411 self.fileobj.close()
2412 self.closed = True
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002413# class TarFile
2414
2415class TarIter:
2416 """Iterator Class.
2417
2418 for tarinfo in TarFile(...):
2419 suite...
2420 """
2421
2422 def __init__(self, tarfile):
2423 """Construct a TarIter object.
2424 """
2425 self.tarfile = tarfile
Martin v. Löwis637431b2005-03-03 23:12:42 +00002426 self.index = 0
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002427 def __iter__(self):
2428 """Return iterator object.
2429 """
2430 return self
Georg Brandla18af4e2007-04-21 15:47:16 +00002431 def __next__(self):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002432 """Return the next item using TarFile's next() method.
2433 When all members have been read, set TarFile as _loaded.
2434 """
Martin v. Löwis637431b2005-03-03 23:12:42 +00002435 # Fix for SF #1100429: Under rare circumstances it can
2436 # happen that getmembers() is called during iteration,
2437 # which will cause TarIter to stop prematurely.
2438 if not self.tarfile._loaded:
2439 tarinfo = self.tarfile.next()
2440 if not tarinfo:
2441 self.tarfile._loaded = True
2442 raise StopIteration
2443 else:
2444 try:
2445 tarinfo = self.tarfile.members[self.index]
2446 except IndexError:
2447 raise StopIteration
2448 self.index += 1
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002449 return tarinfo
2450
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002451#--------------------
2452# exported functions
2453#--------------------
2454def is_tarfile(name):
2455 """Return True if name points to a tar archive that we
2456 are able to handle, else return False.
2457 """
2458 try:
2459 t = open(name)
2460 t.close()
2461 return True
2462 except TarError:
2463 return False
2464
Guido van Rossume7ba4952007-06-06 23:52:48 +00002465bltn_open = open
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002466open = TarFile.open