blob: aca934a9b7b2715e39e1a5871170a124e5436762 [file] [log] [blame]
Benjamin Peterson90f5ba52010-03-11 22:53:45 +00001#!/usr/bin/env python3
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002#-------------------------------------------------------------------
3# tarfile.py
4#-------------------------------------------------------------------
Christian Heimes9c1257e2007-11-04 11:37:22 +00005# Copyright (C) 2002 Lars Gustaebel <lars@gustaebel.de>
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00006# All rights reserved.
7#
8# Permission is hereby granted, free of charge, to any person
9# obtaining a copy of this software and associated documentation
10# files (the "Software"), to deal in the Software without
11# restriction, including without limitation the rights to use,
12# copy, modify, merge, publish, distribute, sublicense, and/or sell
13# copies of the Software, and to permit persons to whom the
14# Software is furnished to do so, subject to the following
15# conditions:
16#
17# The above copyright notice and this permission notice shall be
18# included in all copies or substantial portions of the Software.
19#
20# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
21# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
22# OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
23# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
24# HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
25# WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
26# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
27# OTHER DEALINGS IN THE SOFTWARE.
28#
29"""Read from and write to tar format archives.
30"""
31
32__version__ = "$Revision$"
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +000033
Guido van Rossumd8faa362007-04-27 19:54:29 +000034version = "0.9.0"
Guido van Rossum98297ee2007-11-06 21:34:58 +000035__author__ = "Lars Gust\u00e4bel (lars@gustaebel.de)"
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +000036__date__ = "$Date$"
37__cvsid__ = "$Id$"
Guido van Rossum98297ee2007-11-06 21:34:58 +000038__credits__ = "Gustavo Niemeyer, Niels Gust\u00e4bel, Richard Townsend."
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +000039
40#---------
41# Imports
42#---------
43import sys
44import os
45import shutil
46import stat
47import errno
48import time
49import struct
Thomas Wouters89f507f2006-12-13 04:49:30 +000050import copy
Guido van Rossumd8faa362007-04-27 19:54:29 +000051import re
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +000052
53try:
54 import grp, pwd
55except ImportError:
56 grp = pwd = None
57
58# from tarfile import *
59__all__ = ["TarFile", "TarInfo", "is_tarfile", "TarError"]
60
Georg Brandl1a3284e2007-12-02 09:40:06 +000061from builtins import open as _open # Since 'open' is TarFile.open
Guido van Rossum8f78fe92006-08-24 04:03:53 +000062
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +000063#---------------------------------------------------------
64# tar constants
65#---------------------------------------------------------
Lars Gustäbelb506dc32007-08-07 18:36:16 +000066NUL = b"\0" # the null character
Guido van Rossumd8faa362007-04-27 19:54:29 +000067BLOCKSIZE = 512 # length of processing blocks
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +000068RECORDSIZE = BLOCKSIZE * 20 # length of records
Lars Gustäbelb506dc32007-08-07 18:36:16 +000069GNU_MAGIC = b"ustar \0" # magic gnu tar string
70POSIX_MAGIC = b"ustar\x0000" # magic posix tar string
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +000071
Guido van Rossumd8faa362007-04-27 19:54:29 +000072LENGTH_NAME = 100 # maximum length of a filename
73LENGTH_LINK = 100 # maximum length of a linkname
74LENGTH_PREFIX = 155 # maximum length of the prefix field
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +000075
Lars Gustäbelb506dc32007-08-07 18:36:16 +000076REGTYPE = b"0" # regular file
77AREGTYPE = b"\0" # regular file
78LNKTYPE = b"1" # link (inside tarfile)
79SYMTYPE = b"2" # symbolic link
80CHRTYPE = b"3" # character special device
81BLKTYPE = b"4" # block special device
82DIRTYPE = b"5" # directory
83FIFOTYPE = b"6" # fifo special device
84CONTTYPE = b"7" # contiguous file
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +000085
Lars Gustäbelb506dc32007-08-07 18:36:16 +000086GNUTYPE_LONGNAME = b"L" # GNU tar longname
87GNUTYPE_LONGLINK = b"K" # GNU tar longlink
88GNUTYPE_SPARSE = b"S" # GNU tar sparse file
Guido van Rossumd8faa362007-04-27 19:54:29 +000089
Lars Gustäbelb506dc32007-08-07 18:36:16 +000090XHDTYPE = b"x" # POSIX.1-2001 extended header
91XGLTYPE = b"g" # POSIX.1-2001 global header
92SOLARIS_XHDTYPE = b"X" # Solaris extended header
Guido van Rossumd8faa362007-04-27 19:54:29 +000093
94USTAR_FORMAT = 0 # POSIX.1-1988 (ustar) format
95GNU_FORMAT = 1 # GNU tar format
96PAX_FORMAT = 2 # POSIX.1-2001 (pax) format
97DEFAULT_FORMAT = GNU_FORMAT
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +000098
99#---------------------------------------------------------
100# tarfile constants
101#---------------------------------------------------------
Guido van Rossumd8faa362007-04-27 19:54:29 +0000102# File types that tarfile supports:
103SUPPORTED_TYPES = (REGTYPE, AREGTYPE, LNKTYPE,
104 SYMTYPE, DIRTYPE, FIFOTYPE,
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000105 CONTTYPE, CHRTYPE, BLKTYPE,
106 GNUTYPE_LONGNAME, GNUTYPE_LONGLINK,
107 GNUTYPE_SPARSE)
108
Guido van Rossumd8faa362007-04-27 19:54:29 +0000109# File types that will be treated as a regular file.
110REGULAR_TYPES = (REGTYPE, AREGTYPE,
111 CONTTYPE, GNUTYPE_SPARSE)
112
113# File types that are part of the GNU tar format.
114GNU_TYPES = (GNUTYPE_LONGNAME, GNUTYPE_LONGLINK,
115 GNUTYPE_SPARSE)
116
117# Fields from a pax header that override a TarInfo attribute.
118PAX_FIELDS = ("path", "linkpath", "size", "mtime",
119 "uid", "gid", "uname", "gname")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000120
Lars Gustäbel1465cc22010-05-17 18:02:50 +0000121# Fields from a pax header that are affected by hdrcharset.
122PAX_NAME_FIELDS = {"path", "linkpath", "uname", "gname"}
123
Guido van Rossume7ba4952007-06-06 23:52:48 +0000124# Fields in a pax header that are numbers, all other fields
125# are treated as strings.
126PAX_NUMBER_FIELDS = {
127 "atime": float,
128 "ctime": float,
129 "mtime": float,
130 "uid": int,
131 "gid": int,
132 "size": int
133}
134
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000135#---------------------------------------------------------
136# Bits used in the mode field, values in octal.
137#---------------------------------------------------------
Guido van Rossumcd16bf62007-06-13 18:07:49 +0000138S_IFLNK = 0o120000 # symbolic link
139S_IFREG = 0o100000 # regular file
140S_IFBLK = 0o060000 # block device
141S_IFDIR = 0o040000 # directory
142S_IFCHR = 0o020000 # character device
143S_IFIFO = 0o010000 # fifo
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000144
Guido van Rossumcd16bf62007-06-13 18:07:49 +0000145TSUID = 0o4000 # set UID on execution
146TSGID = 0o2000 # set GID on execution
147TSVTX = 0o1000 # reserved
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000148
Guido van Rossumcd16bf62007-06-13 18:07:49 +0000149TUREAD = 0o400 # read by owner
150TUWRITE = 0o200 # write by owner
151TUEXEC = 0o100 # execute/search by owner
152TGREAD = 0o040 # read by group
153TGWRITE = 0o020 # write by group
154TGEXEC = 0o010 # execute/search by group
155TOREAD = 0o004 # read by other
156TOWRITE = 0o002 # write by other
157TOEXEC = 0o001 # execute/search by other
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000158
159#---------------------------------------------------------
Guido van Rossumd8faa362007-04-27 19:54:29 +0000160# initialization
161#---------------------------------------------------------
Victor Stinner0f35e2c2010-06-11 23:46:47 +0000162if os.name in ("nt", "ce"):
163 ENCODING = "utf-8"
164else:
165 ENCODING = sys.getfilesystemencoding()
Guido van Rossumd8faa362007-04-27 19:54:29 +0000166
167#---------------------------------------------------------
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000168# Some useful functions
169#---------------------------------------------------------
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000170
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000171def stn(s, length, encoding, errors):
172 """Convert a string to a null-terminated bytes object.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000173 """
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000174 s = s.encode(encoding, errors)
Thomas Wouters89f507f2006-12-13 04:49:30 +0000175 return s[:length] + (length - len(s)) * NUL
Thomas Wouters477c8d52006-05-27 19:21:47 +0000176
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000177def nts(s, encoding, errors):
178 """Convert a null-terminated bytes object to a string.
Guido van Rossumd8faa362007-04-27 19:54:29 +0000179 """
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000180 p = s.find(b"\0")
181 if p != -1:
182 s = s[:p]
183 return s.decode(encoding, errors)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000184
Thomas Wouters477c8d52006-05-27 19:21:47 +0000185def nti(s):
186 """Convert a number field to a python number.
187 """
188 # There are two possible encodings for a number field, see
189 # itn() below.
Guido van Rossumcd16bf62007-06-13 18:07:49 +0000190 if s[0] != chr(0o200):
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000191 try:
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000192 n = int(nts(s, "ascii", "strict") or "0", 8)
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000193 except ValueError:
Lars Gustäbel9520a432009-11-22 18:48:49 +0000194 raise InvalidHeaderError("invalid header")
Thomas Wouters477c8d52006-05-27 19:21:47 +0000195 else:
Guido van Rossume2a383d2007-01-15 16:59:06 +0000196 n = 0
Guido van Rossum805365e2007-05-07 22:24:25 +0000197 for i in range(len(s) - 1):
Thomas Wouters477c8d52006-05-27 19:21:47 +0000198 n <<= 8
199 n += ord(s[i + 1])
200 return n
201
Guido van Rossumd8faa362007-04-27 19:54:29 +0000202def itn(n, digits=8, format=DEFAULT_FORMAT):
Thomas Wouters477c8d52006-05-27 19:21:47 +0000203 """Convert a python number to a number field.
204 """
205 # POSIX 1003.1-1988 requires numbers to be encoded as a string of
206 # octal digits followed by a null-byte, this allows values up to
207 # (8**(digits-1))-1. GNU tar allows storing numbers greater than
Guido van Rossumcd16bf62007-06-13 18:07:49 +0000208 # that if necessary. A leading 0o200 byte indicates this particular
Thomas Wouters477c8d52006-05-27 19:21:47 +0000209 # encoding, the following digits-1 bytes are a big-endian
210 # representation. This allows values up to (256**(digits-1))-1.
211 if 0 <= n < 8 ** (digits - 1):
Lars Gustäbela280ca752007-08-28 07:34:33 +0000212 s = bytes("%0*o" % (digits - 1, n), "ascii") + NUL
Thomas Wouters477c8d52006-05-27 19:21:47 +0000213 else:
Guido van Rossumd8faa362007-04-27 19:54:29 +0000214 if format != GNU_FORMAT or n >= 256 ** (digits - 1):
Thomas Wouters477c8d52006-05-27 19:21:47 +0000215 raise ValueError("overflow in number field")
216
217 if n < 0:
218 # XXX We mimic GNU tar's behaviour with negative numbers,
219 # this could raise OverflowError.
220 n = struct.unpack("L", struct.pack("l", n))[0]
221
Guido van Rossum254348e2007-11-21 19:29:53 +0000222 s = bytearray()
Guido van Rossum805365e2007-05-07 22:24:25 +0000223 for i in range(digits - 1):
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000224 s.insert(0, n & 0o377)
Thomas Wouters477c8d52006-05-27 19:21:47 +0000225 n >>= 8
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000226 s.insert(0, 0o200)
Thomas Wouters477c8d52006-05-27 19:21:47 +0000227 return s
228
229def calc_chksums(buf):
230 """Calculate the checksum for a member's header by summing up all
231 characters except for the chksum field which is treated as if
232 it was filled with spaces. According to the GNU tar sources,
233 some tars (Sun and NeXT) calculate chksum with signed char,
234 which will be different if there are chars in the buffer with
235 the high bit set. So we calculate two checksums, unsigned and
236 signed.
237 """
238 unsigned_chksum = 256 + sum(struct.unpack("148B", buf[:148]) + struct.unpack("356B", buf[156:512]))
239 signed_chksum = 256 + sum(struct.unpack("148b", buf[:148]) + struct.unpack("356b", buf[156:512]))
240 return unsigned_chksum, signed_chksum
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000241
242def copyfileobj(src, dst, length=None):
243 """Copy length bytes from fileobj src to fileobj dst.
244 If length is None, copy the entire content.
245 """
246 if length == 0:
247 return
248 if length is None:
249 shutil.copyfileobj(src, dst)
250 return
251
252 BUFSIZE = 16 * 1024
253 blocks, remainder = divmod(length, BUFSIZE)
Guido van Rossum805365e2007-05-07 22:24:25 +0000254 for b in range(blocks):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000255 buf = src.read(BUFSIZE)
256 if len(buf) < BUFSIZE:
Thomas Wouters477c8d52006-05-27 19:21:47 +0000257 raise IOError("end of file reached")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000258 dst.write(buf)
259
260 if remainder != 0:
261 buf = src.read(remainder)
262 if len(buf) < remainder:
Thomas Wouters477c8d52006-05-27 19:21:47 +0000263 raise IOError("end of file reached")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000264 dst.write(buf)
265 return
266
267filemode_table = (
Andrew M. Kuchling8bc462f2004-10-20 11:48:42 +0000268 ((S_IFLNK, "l"),
269 (S_IFREG, "-"),
270 (S_IFBLK, "b"),
271 (S_IFDIR, "d"),
272 (S_IFCHR, "c"),
273 (S_IFIFO, "p")),
274
275 ((TUREAD, "r"),),
276 ((TUWRITE, "w"),),
277 ((TUEXEC|TSUID, "s"),
278 (TSUID, "S"),
279 (TUEXEC, "x")),
280
281 ((TGREAD, "r"),),
282 ((TGWRITE, "w"),),
283 ((TGEXEC|TSGID, "s"),
284 (TSGID, "S"),
285 (TGEXEC, "x")),
286
287 ((TOREAD, "r"),),
288 ((TOWRITE, "w"),),
289 ((TOEXEC|TSVTX, "t"),
290 (TSVTX, "T"),
291 (TOEXEC, "x"))
292)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000293
294def filemode(mode):
295 """Convert a file's mode to a string of the form
296 -rwxrwxrwx.
297 Used by TarFile.list()
298 """
Andrew M. Kuchling8bc462f2004-10-20 11:48:42 +0000299 perm = []
300 for table in filemode_table:
301 for bit, char in table:
302 if mode & bit == bit:
303 perm.append(char)
304 break
305 else:
306 perm.append("-")
307 return "".join(perm)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000308
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000309class TarError(Exception):
310 """Base exception."""
311 pass
312class ExtractError(TarError):
313 """General exception for extract errors."""
314 pass
315class ReadError(TarError):
316 """Exception for unreadble tar archives."""
317 pass
318class CompressionError(TarError):
319 """Exception for unavailable compression methods."""
320 pass
321class StreamError(TarError):
322 """Exception for unsupported operations on stream-like TarFiles."""
323 pass
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000324class HeaderError(TarError):
Lars Gustäbel9520a432009-11-22 18:48:49 +0000325 """Base exception for header errors."""
326 pass
327class EmptyHeaderError(HeaderError):
328 """Exception for empty headers."""
329 pass
330class TruncatedHeaderError(HeaderError):
331 """Exception for truncated headers."""
332 pass
333class EOFHeaderError(HeaderError):
334 """Exception for end of file headers."""
335 pass
336class InvalidHeaderError(HeaderError):
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000337 """Exception for invalid headers."""
338 pass
Lars Gustäbel9520a432009-11-22 18:48:49 +0000339class SubsequentHeaderError(HeaderError):
340 """Exception for missing and invalid extended headers."""
341 pass
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000342
343#---------------------------
344# internal stream interface
345#---------------------------
346class _LowLevelFile:
347 """Low-level file object. Supports reading and writing.
348 It is used instead of a regular file object for streaming
349 access.
350 """
351
352 def __init__(self, name, mode):
353 mode = {
354 "r": os.O_RDONLY,
355 "w": os.O_WRONLY | os.O_CREAT | os.O_TRUNC,
356 }[mode]
357 if hasattr(os, "O_BINARY"):
358 mode |= os.O_BINARY
Lars Gustäbeld6eb70b2010-04-29 15:37:02 +0000359 self.fd = os.open(name, mode, 0o666)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000360
361 def close(self):
362 os.close(self.fd)
363
364 def read(self, size):
365 return os.read(self.fd, size)
366
367 def write(self, s):
368 os.write(self.fd, s)
369
370class _Stream:
371 """Class that serves as an adapter between TarFile and
372 a stream-like object. The stream-like object only
373 needs to have a read() or write() method and is accessed
374 blockwise. Use of gzip or bzip2 compression is possible.
375 A stream-like object could be for example: sys.stdin,
376 sys.stdout, a socket, a tape device etc.
377
378 _Stream is intended to be used only internally.
379 """
380
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000381 def __init__(self, name, mode, comptype, fileobj, bufsize):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000382 """Construct a _Stream object.
383 """
384 self._extfileobj = True
385 if fileobj is None:
386 fileobj = _LowLevelFile(name, mode)
387 self._extfileobj = False
388
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000389 if comptype == '*':
390 # Enable transparent compression detection for the
391 # stream interface
392 fileobj = _StreamProxy(fileobj)
393 comptype = fileobj.getcomptype()
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000394
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000395 self.name = name or ""
396 self.mode = mode
397 self.comptype = comptype
398 self.fileobj = fileobj
399 self.bufsize = bufsize
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000400 self.buf = b""
Guido van Rossume2a383d2007-01-15 16:59:06 +0000401 self.pos = 0
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000402 self.closed = False
403
404 if comptype == "gz":
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000405 try:
406 import zlib
407 except ImportError:
Thomas Wouters477c8d52006-05-27 19:21:47 +0000408 raise CompressionError("zlib module is not available")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000409 self.zlib = zlib
Antoine Pitrou77b338b2009-12-14 18:00:06 +0000410 self.crc = zlib.crc32(b"")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000411 if mode == "r":
412 self._init_read_gz()
413 else:
414 self._init_write_gz()
415
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000416 if comptype == "bz2":
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000417 try:
418 import bz2
419 except ImportError:
Thomas Wouters477c8d52006-05-27 19:21:47 +0000420 raise CompressionError("bz2 module is not available")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000421 if mode == "r":
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000422 self.dbuf = b""
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000423 self.cmp = bz2.BZ2Decompressor()
424 else:
425 self.cmp = bz2.BZ2Compressor()
426
427 def __del__(self):
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000428 if hasattr(self, "closed") and not self.closed:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000429 self.close()
430
431 def _init_write_gz(self):
432 """Initialize for writing with gzip compression.
433 """
434 self.cmp = self.zlib.compressobj(9, self.zlib.DEFLATED,
435 -self.zlib.MAX_WBITS,
436 self.zlib.DEF_MEM_LEVEL,
437 0)
Guido van Rossume2a383d2007-01-15 16:59:06 +0000438 timestamp = struct.pack("<L", int(time.time()))
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000439 self.__write(b"\037\213\010\010" + timestamp + b"\002\377")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000440 if self.name.endswith(".gz"):
441 self.name = self.name[:-3]
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000442 # RFC1952 says we must use ISO-8859-1 for the FNAME field.
443 self.__write(self.name.encode("iso-8859-1", "replace") + NUL)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000444
445 def write(self, s):
446 """Write string s to the stream.
447 """
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000448 if self.comptype == "gz":
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000449 self.crc = self.zlib.crc32(s, self.crc)
450 self.pos += len(s)
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000451 if self.comptype != "tar":
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000452 s = self.cmp.compress(s)
453 self.__write(s)
454
455 def __write(self, s):
456 """Write string s to the stream if a whole new block
457 is ready to be written.
458 """
459 self.buf += s
460 while len(self.buf) > self.bufsize:
461 self.fileobj.write(self.buf[:self.bufsize])
462 self.buf = self.buf[self.bufsize:]
463
464 def close(self):
465 """Close the _Stream object. No operation should be
466 done on it afterwards.
467 """
468 if self.closed:
469 return
470
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000471 if self.mode == "w" and self.comptype != "tar":
Martin v. Löwisc234a522004-08-22 21:28:33 +0000472 self.buf += self.cmp.flush()
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000473
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000474 if self.mode == "w" and self.buf:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000475 self.fileobj.write(self.buf)
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000476 self.buf = b""
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000477 if self.comptype == "gz":
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000478 # The native zlib crc is an unsigned 32-bit integer, but
479 # the Python wrapper implicitly casts that to a signed C
480 # long. So, on a 32-bit box self.crc may "look negative",
481 # while the same crc on a 64-bit box may "look positive".
482 # To avoid irksome warnings from the `struct` module, force
483 # it to look positive on all boxes.
Guido van Rossume2a383d2007-01-15 16:59:06 +0000484 self.fileobj.write(struct.pack("<L", self.crc & 0xffffffff))
485 self.fileobj.write(struct.pack("<L", self.pos & 0xffffFFFF))
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000486
487 if not self._extfileobj:
488 self.fileobj.close()
489
490 self.closed = True
491
492 def _init_read_gz(self):
493 """Initialize for reading a gzip compressed fileobj.
494 """
495 self.cmp = self.zlib.decompressobj(-self.zlib.MAX_WBITS)
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000496 self.dbuf = b""
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000497
498 # taken from gzip.GzipFile with some alterations
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000499 if self.__read(2) != b"\037\213":
Thomas Wouters477c8d52006-05-27 19:21:47 +0000500 raise ReadError("not a gzip file")
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000501 if self.__read(1) != b"\010":
Thomas Wouters477c8d52006-05-27 19:21:47 +0000502 raise CompressionError("unsupported compression method")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000503
504 flag = ord(self.__read(1))
505 self.__read(6)
506
507 if flag & 4:
508 xlen = ord(self.__read(1)) + 256 * ord(self.__read(1))
509 self.read(xlen)
510 if flag & 8:
511 while True:
512 s = self.__read(1)
513 if not s or s == NUL:
514 break
515 if flag & 16:
516 while True:
517 s = self.__read(1)
518 if not s or s == NUL:
519 break
520 if flag & 2:
521 self.__read(2)
522
523 def tell(self):
524 """Return the stream's file pointer position.
525 """
526 return self.pos
527
528 def seek(self, pos=0):
529 """Set the stream's file pointer to pos. Negative seeking
530 is forbidden.
531 """
532 if pos - self.pos >= 0:
533 blocks, remainder = divmod(pos - self.pos, self.bufsize)
Guido van Rossum805365e2007-05-07 22:24:25 +0000534 for i in range(blocks):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000535 self.read(self.bufsize)
536 self.read(remainder)
537 else:
Thomas Wouters477c8d52006-05-27 19:21:47 +0000538 raise StreamError("seeking backwards is not allowed")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000539 return self.pos
540
541 def read(self, size=None):
542 """Return the next size number of bytes from the stream.
543 If size is not defined, return all bytes of the stream
544 up to EOF.
545 """
546 if size is None:
547 t = []
548 while True:
549 buf = self._read(self.bufsize)
550 if not buf:
551 break
552 t.append(buf)
553 buf = "".join(t)
554 else:
555 buf = self._read(size)
556 self.pos += len(buf)
557 return buf
558
559 def _read(self, size):
560 """Return size bytes from the stream.
561 """
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000562 if self.comptype == "tar":
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000563 return self.__read(size)
564
565 c = len(self.dbuf)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000566 while c < size:
567 buf = self.__read(self.bufsize)
568 if not buf:
569 break
Guido van Rossumd8faa362007-04-27 19:54:29 +0000570 try:
571 buf = self.cmp.decompress(buf)
572 except IOError:
573 raise ReadError("invalid compressed data")
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000574 self.dbuf += buf
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000575 c += len(buf)
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000576 buf = self.dbuf[:size]
577 self.dbuf = self.dbuf[size:]
578 return buf
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000579
580 def __read(self, size):
581 """Return size bytes from stream. If internal buffer is empty,
582 read another block from the stream.
583 """
584 c = len(self.buf)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000585 while c < size:
586 buf = self.fileobj.read(self.bufsize)
587 if not buf:
588 break
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000589 self.buf += buf
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000590 c += len(buf)
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000591 buf = self.buf[:size]
592 self.buf = self.buf[size:]
593 return buf
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000594# class _Stream
595
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000596class _StreamProxy(object):
597 """Small proxy class that enables transparent compression
598 detection for the Stream interface (mode 'r|*').
599 """
600
601 def __init__(self, fileobj):
602 self.fileobj = fileobj
603 self.buf = self.fileobj.read(BLOCKSIZE)
604
605 def read(self, size):
606 self.read = self.fileobj.read
607 return self.buf
608
609 def getcomptype(self):
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000610 if self.buf.startswith(b"\037\213\010"):
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000611 return "gz"
Lars Gustäbela280ca752007-08-28 07:34:33 +0000612 if self.buf.startswith(b"BZh91"):
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000613 return "bz2"
614 return "tar"
615
616 def close(self):
617 self.fileobj.close()
618# class StreamProxy
619
Thomas Wouters477c8d52006-05-27 19:21:47 +0000620class _BZ2Proxy(object):
621 """Small proxy class that enables external file object
622 support for "r:bz2" and "w:bz2" modes. This is actually
623 a workaround for a limitation in bz2 module's BZ2File
624 class which (unlike gzip.GzipFile) has no support for
625 a file object argument.
626 """
627
628 blocksize = 16 * 1024
629
630 def __init__(self, fileobj, mode):
631 self.fileobj = fileobj
632 self.mode = mode
Guido van Rossumd8faa362007-04-27 19:54:29 +0000633 self.name = getattr(self.fileobj, "name", None)
Thomas Wouters477c8d52006-05-27 19:21:47 +0000634 self.init()
635
636 def init(self):
637 import bz2
638 self.pos = 0
639 if self.mode == "r":
640 self.bz2obj = bz2.BZ2Decompressor()
641 self.fileobj.seek(0)
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000642 self.buf = b""
Thomas Wouters477c8d52006-05-27 19:21:47 +0000643 else:
644 self.bz2obj = bz2.BZ2Compressor()
645
646 def read(self, size):
Thomas Wouters477c8d52006-05-27 19:21:47 +0000647 x = len(self.buf)
648 while x < size:
Lars Gustäbel42e00912009-03-22 20:34:29 +0000649 raw = self.fileobj.read(self.blocksize)
650 if not raw:
Thomas Wouters477c8d52006-05-27 19:21:47 +0000651 break
Lars Gustäbel42e00912009-03-22 20:34:29 +0000652 data = self.bz2obj.decompress(raw)
653 self.buf += data
Thomas Wouters477c8d52006-05-27 19:21:47 +0000654 x += len(data)
Thomas Wouters477c8d52006-05-27 19:21:47 +0000655
656 buf = self.buf[:size]
657 self.buf = self.buf[size:]
658 self.pos += len(buf)
659 return buf
660
661 def seek(self, pos):
662 if pos < self.pos:
663 self.init()
664 self.read(pos - self.pos)
665
666 def tell(self):
667 return self.pos
668
669 def write(self, data):
670 self.pos += len(data)
671 raw = self.bz2obj.compress(data)
672 self.fileobj.write(raw)
673
674 def close(self):
675 if self.mode == "w":
676 raw = self.bz2obj.flush()
677 self.fileobj.write(raw)
Thomas Wouters477c8d52006-05-27 19:21:47 +0000678# class _BZ2Proxy
679
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000680#------------------------
681# Extraction file object
682#------------------------
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000683class _FileInFile(object):
684 """A thin wrapper around an existing file object that
685 provides a part of its data as an individual file
686 object.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000687 """
688
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000689 def __init__(self, fileobj, offset, size, sparse=None):
690 self.fileobj = fileobj
691 self.offset = offset
692 self.size = size
693 self.sparse = sparse
694 self.position = 0
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000695
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000696 def seekable(self):
697 if not hasattr(self.fileobj, "seekable"):
698 # XXX gzip.GzipFile and bz2.BZ2File
699 return True
700 return self.fileobj.seekable()
701
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000702 def tell(self):
703 """Return the current file position.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000704 """
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000705 return self.position
706
707 def seek(self, position):
708 """Seek to a position in the file.
709 """
710 self.position = position
711
712 def read(self, size=None):
713 """Read data from the file.
714 """
715 if size is None:
716 size = self.size - self.position
717 else:
718 size = min(size, self.size - self.position)
719
720 if self.sparse is None:
721 return self.readnormal(size)
722 else:
723 return self.readsparse(size)
724
725 def readnormal(self, size):
726 """Read operation for regular files.
727 """
728 self.fileobj.seek(self.offset + self.position)
729 self.position += size
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000730 return self.fileobj.read(size)
731
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000732 def readsparse(self, size):
733 """Read operation for sparse files.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000734 """
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000735 data = b""
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000736 while size > 0:
737 buf = self.readsparsesection(size)
738 if not buf:
739 break
740 size -= len(buf)
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000741 data += buf
742 return data
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000743
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000744 def readsparsesection(self, size):
745 """Read a single section of a sparse file.
746 """
747 section = self.sparse.find(self.position)
748
749 if section is None:
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000750 return b""
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000751
752 size = min(size, section.offset + section.size - self.position)
753
754 if isinstance(section, _data):
755 realpos = section.realpos + self.position - section.offset
756 self.fileobj.seek(self.offset + realpos)
757 self.position += size
758 return self.fileobj.read(size)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000759 else:
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000760 self.position += size
761 return NUL * size
762#class _FileInFile
763
764
765class ExFileObject(object):
766 """File-like object for reading an archive member.
767 Is returned by TarFile.extractfile().
768 """
769 blocksize = 1024
770
771 def __init__(self, tarfile, tarinfo):
772 self.fileobj = _FileInFile(tarfile.fileobj,
773 tarinfo.offset_data,
774 tarinfo.size,
Lars Gustäbelc2ea8c62008-04-14 10:05:48 +0000775 tarinfo.sparse)
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000776 self.name = tarinfo.name
777 self.mode = "r"
778 self.closed = False
779 self.size = tarinfo.size
780
781 self.position = 0
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000782 self.buffer = b""
783
784 def readable(self):
785 return True
786
787 def writable(self):
788 return False
789
790 def seekable(self):
791 return self.fileobj.seekable()
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000792
793 def read(self, size=None):
794 """Read at most size bytes from the file. If size is not
795 present or None, read all data until EOF is reached.
796 """
797 if self.closed:
798 raise ValueError("I/O operation on closed file")
799
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000800 buf = b""
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000801 if self.buffer:
802 if size is None:
803 buf = self.buffer
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000804 self.buffer = b""
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000805 else:
806 buf = self.buffer[:size]
807 self.buffer = self.buffer[size:]
808
809 if size is None:
810 buf += self.fileobj.read()
811 else:
812 buf += self.fileobj.read(size - len(buf))
813
814 self.position += len(buf)
815 return buf
816
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000817 # XXX TextIOWrapper uses the read1() method.
818 read1 = read
819
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000820 def readline(self, size=-1):
821 """Read one entire line from the file. If size is present
822 and non-negative, return a string with at most that
823 size, which may be an incomplete line.
824 """
825 if self.closed:
826 raise ValueError("I/O operation on closed file")
827
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000828 pos = self.buffer.find(b"\n") + 1
829 if pos == 0:
830 # no newline found.
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000831 while True:
832 buf = self.fileobj.read(self.blocksize)
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000833 self.buffer += buf
834 if not buf or b"\n" in buf:
835 pos = self.buffer.find(b"\n") + 1
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000836 if pos == 0:
837 # no newline found.
838 pos = len(self.buffer)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000839 break
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000840
841 if size != -1:
842 pos = min(size, pos)
843
844 buf = self.buffer[:pos]
845 self.buffer = self.buffer[pos:]
846 self.position += len(buf)
847 return buf
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000848
849 def readlines(self):
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000850 """Return a list with all remaining lines.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000851 """
852 result = []
853 while True:
854 line = self.readline()
855 if not line: break
856 result.append(line)
857 return result
858
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000859 def tell(self):
860 """Return the current file position.
861 """
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000862 if self.closed:
863 raise ValueError("I/O operation on closed file")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000864
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000865 return self.position
866
867 def seek(self, pos, whence=os.SEEK_SET):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000868 """Seek to a position in the file.
869 """
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000870 if self.closed:
871 raise ValueError("I/O operation on closed file")
872
873 if whence == os.SEEK_SET:
874 self.position = min(max(pos, 0), self.size)
875 elif whence == os.SEEK_CUR:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000876 if pos < 0:
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000877 self.position = max(self.position + pos, 0)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000878 else:
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000879 self.position = min(self.position + pos, self.size)
880 elif whence == os.SEEK_END:
881 self.position = max(min(self.size + pos, self.size), 0)
882 else:
883 raise ValueError("Invalid argument")
884
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000885 self.buffer = b""
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000886 self.fileobj.seek(self.position)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000887
888 def close(self):
889 """Close the file object.
890 """
891 self.closed = True
Martin v. Löwisdf241532005-03-03 08:17:42 +0000892
893 def __iter__(self):
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000894 """Get an iterator over the file's lines.
Martin v. Löwisdf241532005-03-03 08:17:42 +0000895 """
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000896 while True:
897 line = self.readline()
898 if not line:
899 break
900 yield line
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000901#class ExFileObject
902
903#------------------
904# Exported Classes
905#------------------
906class TarInfo(object):
907 """Informational class which holds the details about an
908 archive member given by a tar header block.
909 TarInfo objects are returned by TarFile.getmember(),
910 TarFile.getmembers() and TarFile.gettarinfo() and are
911 usually created internally.
912 """
913
Lars Gustäbelc2ea8c62008-04-14 10:05:48 +0000914 __slots__ = ("name", "mode", "uid", "gid", "size", "mtime",
915 "chksum", "type", "linkname", "uname", "gname",
916 "devmajor", "devminor",
917 "offset", "offset_data", "pax_headers", "sparse",
918 "tarfile", "_sparse_structs", "_link_target")
919
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000920 def __init__(self, name=""):
921 """Construct a TarInfo object. name is the optional name
922 of the member.
923 """
Guido van Rossumd8faa362007-04-27 19:54:29 +0000924 self.name = name # member name
Guido van Rossumcd16bf62007-06-13 18:07:49 +0000925 self.mode = 0o644 # file permissions
Thomas Wouters477c8d52006-05-27 19:21:47 +0000926 self.uid = 0 # user id
927 self.gid = 0 # group id
928 self.size = 0 # file size
929 self.mtime = 0 # modification time
930 self.chksum = 0 # header checksum
931 self.type = REGTYPE # member type
932 self.linkname = "" # link name
Guido van Rossumd8faa362007-04-27 19:54:29 +0000933 self.uname = "root" # user name
934 self.gname = "root" # group name
Thomas Wouters477c8d52006-05-27 19:21:47 +0000935 self.devmajor = 0 # device major number
936 self.devminor = 0 # device minor number
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000937
Thomas Wouters477c8d52006-05-27 19:21:47 +0000938 self.offset = 0 # the tar header starts here
939 self.offset_data = 0 # the file's data starts here
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000940
Lars Gustäbelc2ea8c62008-04-14 10:05:48 +0000941 self.sparse = None # sparse member information
Guido van Rossumd8faa362007-04-27 19:54:29 +0000942 self.pax_headers = {} # pax header information
943
944 # In pax headers the "name" and "linkname" field are called
945 # "path" and "linkpath".
946 def _getpath(self):
947 return self.name
948 def _setpath(self, name):
949 self.name = name
950 path = property(_getpath, _setpath)
951
952 def _getlinkpath(self):
953 return self.linkname
954 def _setlinkpath(self, linkname):
955 self.linkname = linkname
956 linkpath = property(_getlinkpath, _setlinkpath)
957
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000958 def __repr__(self):
959 return "<%s %r at %#x>" % (self.__class__.__name__,self.name,id(self))
960
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000961 def get_info(self):
Guido van Rossumd8faa362007-04-27 19:54:29 +0000962 """Return the TarInfo's attributes as a dictionary.
963 """
964 info = {
Lars Gustäbelbfdfdda2009-08-28 19:59:59 +0000965 "name": self.name,
Guido van Rossumcd16bf62007-06-13 18:07:49 +0000966 "mode": self.mode & 0o7777,
Guido van Rossumd8faa362007-04-27 19:54:29 +0000967 "uid": self.uid,
968 "gid": self.gid,
969 "size": self.size,
970 "mtime": self.mtime,
971 "chksum": self.chksum,
972 "type": self.type,
Lars Gustäbelbfdfdda2009-08-28 19:59:59 +0000973 "linkname": self.linkname,
Guido van Rossumd8faa362007-04-27 19:54:29 +0000974 "uname": self.uname,
975 "gname": self.gname,
976 "devmajor": self.devmajor,
977 "devminor": self.devminor
978 }
979
980 if info["type"] == DIRTYPE and not info["name"].endswith("/"):
981 info["name"] += "/"
982
983 return info
984
Victor Stinnerde629d42010-05-05 21:43:57 +0000985 def tobuf(self, format=DEFAULT_FORMAT, encoding=ENCODING, errors="surrogateescape"):
Guido van Rossumd8faa362007-04-27 19:54:29 +0000986 """Return a tar header as a string of 512 byte blocks.
987 """
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000988 info = self.get_info()
Guido van Rossume7ba4952007-06-06 23:52:48 +0000989
Guido van Rossumd8faa362007-04-27 19:54:29 +0000990 if format == USTAR_FORMAT:
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000991 return self.create_ustar_header(info, encoding, errors)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000992 elif format == GNU_FORMAT:
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000993 return self.create_gnu_header(info, encoding, errors)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000994 elif format == PAX_FORMAT:
Lars Gustäbel1465cc22010-05-17 18:02:50 +0000995 return self.create_pax_header(info, encoding)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000996 else:
997 raise ValueError("invalid format")
998
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000999 def create_ustar_header(self, info, encoding, errors):
Guido van Rossumd8faa362007-04-27 19:54:29 +00001000 """Return the object as a ustar header block.
1001 """
Guido van Rossumd8faa362007-04-27 19:54:29 +00001002 info["magic"] = POSIX_MAGIC
1003
1004 if len(info["linkname"]) > LENGTH_LINK:
1005 raise ValueError("linkname is too long")
1006
1007 if len(info["name"]) > LENGTH_NAME:
1008 info["prefix"], info["name"] = self._posix_split_name(info["name"])
1009
Lars Gustäbelb506dc32007-08-07 18:36:16 +00001010 return self._create_header(info, USTAR_FORMAT, encoding, errors)
Guido van Rossumd8faa362007-04-27 19:54:29 +00001011
Lars Gustäbelb506dc32007-08-07 18:36:16 +00001012 def create_gnu_header(self, info, encoding, errors):
Guido van Rossumd8faa362007-04-27 19:54:29 +00001013 """Return the object as a GNU header block sequence.
1014 """
Guido van Rossumd8faa362007-04-27 19:54:29 +00001015 info["magic"] = GNU_MAGIC
1016
Lars Gustäbelb506dc32007-08-07 18:36:16 +00001017 buf = b""
Guido van Rossumd8faa362007-04-27 19:54:29 +00001018 if len(info["linkname"]) > LENGTH_LINK:
Lars Gustäbelb506dc32007-08-07 18:36:16 +00001019 buf += self._create_gnu_long_header(info["linkname"], GNUTYPE_LONGLINK, encoding, errors)
Guido van Rossumd8faa362007-04-27 19:54:29 +00001020
1021 if len(info["name"]) > LENGTH_NAME:
Lars Gustäbelb506dc32007-08-07 18:36:16 +00001022 buf += self._create_gnu_long_header(info["name"], GNUTYPE_LONGNAME, encoding, errors)
Guido van Rossumd8faa362007-04-27 19:54:29 +00001023
Lars Gustäbelb506dc32007-08-07 18:36:16 +00001024 return buf + self._create_header(info, GNU_FORMAT, encoding, errors)
Guido van Rossumd8faa362007-04-27 19:54:29 +00001025
Lars Gustäbel1465cc22010-05-17 18:02:50 +00001026 def create_pax_header(self, info, encoding):
Guido van Rossumd8faa362007-04-27 19:54:29 +00001027 """Return the object as a ustar header block. If it cannot be
1028 represented this way, prepend a pax extended header sequence
1029 with supplement information.
1030 """
Guido van Rossumd8faa362007-04-27 19:54:29 +00001031 info["magic"] = POSIX_MAGIC
1032 pax_headers = self.pax_headers.copy()
1033
1034 # Test string fields for values that exceed the field length or cannot
1035 # be represented in ASCII encoding.
1036 for name, hname, length in (
1037 ("name", "path", LENGTH_NAME), ("linkname", "linkpath", LENGTH_LINK),
1038 ("uname", "uname", 32), ("gname", "gname", 32)):
1039
Guido van Rossume7ba4952007-06-06 23:52:48 +00001040 if hname in pax_headers:
1041 # The pax header has priority.
1042 continue
1043
Guido van Rossumd8faa362007-04-27 19:54:29 +00001044 # Try to encode the string as ASCII.
1045 try:
Lars Gustäbelb506dc32007-08-07 18:36:16 +00001046 info[name].encode("ascii", "strict")
Guido van Rossumd8faa362007-04-27 19:54:29 +00001047 except UnicodeEncodeError:
Lars Gustäbelb506dc32007-08-07 18:36:16 +00001048 pax_headers[hname] = info[name]
Guido van Rossumd8faa362007-04-27 19:54:29 +00001049 continue
1050
Guido van Rossume7ba4952007-06-06 23:52:48 +00001051 if len(info[name]) > length:
Lars Gustäbelb506dc32007-08-07 18:36:16 +00001052 pax_headers[hname] = info[name]
Guido van Rossumd8faa362007-04-27 19:54:29 +00001053
1054 # Test number fields for values that exceed the field limit or values
1055 # that like to be stored as float.
1056 for name, digits in (("uid", 8), ("gid", 8), ("size", 12), ("mtime", 12)):
Guido van Rossume7ba4952007-06-06 23:52:48 +00001057 if name in pax_headers:
1058 # The pax header has priority. Avoid overflow.
1059 info[name] = 0
1060 continue
1061
Guido van Rossumd8faa362007-04-27 19:54:29 +00001062 val = info[name]
1063 if not 0 <= val < 8 ** (digits - 1) or isinstance(val, float):
Guido van Rossumef87d6e2007-05-02 19:09:54 +00001064 pax_headers[name] = str(val)
Guido van Rossumd8faa362007-04-27 19:54:29 +00001065 info[name] = 0
1066
Guido van Rossume7ba4952007-06-06 23:52:48 +00001067 # Create a pax extended header if necessary.
Guido van Rossumd8faa362007-04-27 19:54:29 +00001068 if pax_headers:
Lars Gustäbel1465cc22010-05-17 18:02:50 +00001069 buf = self._create_pax_generic_header(pax_headers, XHDTYPE, encoding)
Guido van Rossumd8faa362007-04-27 19:54:29 +00001070 else:
Lars Gustäbelb506dc32007-08-07 18:36:16 +00001071 buf = b""
Guido van Rossumd8faa362007-04-27 19:54:29 +00001072
Lars Gustäbel3741eff2007-08-21 12:17:05 +00001073 return buf + self._create_header(info, USTAR_FORMAT, "ascii", "replace")
Guido van Rossumd8faa362007-04-27 19:54:29 +00001074
1075 @classmethod
Lars Gustäbel3741eff2007-08-21 12:17:05 +00001076 def create_pax_global_header(cls, pax_headers):
Guido van Rossumd8faa362007-04-27 19:54:29 +00001077 """Return the object as a pax global header block sequence.
1078 """
Lars Gustäbel1465cc22010-05-17 18:02:50 +00001079 return cls._create_pax_generic_header(pax_headers, XGLTYPE, "utf8")
Guido van Rossumd8faa362007-04-27 19:54:29 +00001080
1081 def _posix_split_name(self, name):
1082 """Split a name longer than 100 chars into a prefix
1083 and a name part.
1084 """
1085 prefix = name[:LENGTH_PREFIX + 1]
1086 while prefix and prefix[-1] != "/":
1087 prefix = prefix[:-1]
1088
1089 name = name[len(prefix):]
1090 prefix = prefix[:-1]
1091
1092 if not prefix or len(name) > LENGTH_NAME:
1093 raise ValueError("name is too long")
1094 return prefix, name
1095
1096 @staticmethod
Lars Gustäbelb506dc32007-08-07 18:36:16 +00001097 def _create_header(info, format, encoding, errors):
Guido van Rossumd8faa362007-04-27 19:54:29 +00001098 """Return a header block. info is a dictionary with file
1099 information, format must be one of the *_FORMAT constants.
1100 """
1101 parts = [
Lars Gustäbelb506dc32007-08-07 18:36:16 +00001102 stn(info.get("name", ""), 100, encoding, errors),
Guido van Rossumcd16bf62007-06-13 18:07:49 +00001103 itn(info.get("mode", 0) & 0o7777, 8, format),
Guido van Rossumd8faa362007-04-27 19:54:29 +00001104 itn(info.get("uid", 0), 8, format),
1105 itn(info.get("gid", 0), 8, format),
1106 itn(info.get("size", 0), 12, format),
1107 itn(info.get("mtime", 0), 12, format),
Lars Gustäbelb506dc32007-08-07 18:36:16 +00001108 b" ", # checksum field
Guido van Rossumd8faa362007-04-27 19:54:29 +00001109 info.get("type", REGTYPE),
Lars Gustäbelb506dc32007-08-07 18:36:16 +00001110 stn(info.get("linkname", ""), 100, encoding, errors),
1111 info.get("magic", POSIX_MAGIC),
1112 stn(info.get("uname", "root"), 32, encoding, errors),
1113 stn(info.get("gname", "root"), 32, encoding, errors),
Guido van Rossumd8faa362007-04-27 19:54:29 +00001114 itn(info.get("devmajor", 0), 8, format),
1115 itn(info.get("devminor", 0), 8, format),
Lars Gustäbelb506dc32007-08-07 18:36:16 +00001116 stn(info.get("prefix", ""), 155, encoding, errors)
Guido van Rossumd8faa362007-04-27 19:54:29 +00001117 ]
1118
Lars Gustäbelb506dc32007-08-07 18:36:16 +00001119 buf = struct.pack("%ds" % BLOCKSIZE, b"".join(parts))
Guido van Rossumd8faa362007-04-27 19:54:29 +00001120 chksum = calc_chksums(buf[-BLOCKSIZE:])[0]
Lars Gustäbela280ca752007-08-28 07:34:33 +00001121 buf = buf[:-364] + bytes("%06o\0" % chksum, "ascii") + buf[-357:]
Guido van Rossumd8faa362007-04-27 19:54:29 +00001122 return buf
1123
1124 @staticmethod
1125 def _create_payload(payload):
1126 """Return the string payload filled with zero bytes
1127 up to the next 512 byte border.
1128 """
1129 blocks, remainder = divmod(len(payload), BLOCKSIZE)
1130 if remainder > 0:
1131 payload += (BLOCKSIZE - remainder) * NUL
1132 return payload
1133
1134 @classmethod
Lars Gustäbelb506dc32007-08-07 18:36:16 +00001135 def _create_gnu_long_header(cls, name, type, encoding, errors):
Guido van Rossumd8faa362007-04-27 19:54:29 +00001136 """Return a GNUTYPE_LONGNAME or GNUTYPE_LONGLINK sequence
1137 for name.
1138 """
Lars Gustäbelb506dc32007-08-07 18:36:16 +00001139 name = name.encode(encoding, errors) + NUL
Guido van Rossumd8faa362007-04-27 19:54:29 +00001140
1141 info = {}
1142 info["name"] = "././@LongLink"
1143 info["type"] = type
1144 info["size"] = len(name)
1145 info["magic"] = GNU_MAGIC
1146
1147 # create extended header + name blocks.
Lars Gustäbelb506dc32007-08-07 18:36:16 +00001148 return cls._create_header(info, USTAR_FORMAT, encoding, errors) + \
Guido van Rossumd8faa362007-04-27 19:54:29 +00001149 cls._create_payload(name)
1150
1151 @classmethod
Lars Gustäbel1465cc22010-05-17 18:02:50 +00001152 def _create_pax_generic_header(cls, pax_headers, type, encoding):
1153 """Return a POSIX.1-2008 extended or global header sequence
Guido van Rossumd8faa362007-04-27 19:54:29 +00001154 that contains a list of keyword, value pairs. The values
Lars Gustäbelb506dc32007-08-07 18:36:16 +00001155 must be strings.
Guido van Rossumd8faa362007-04-27 19:54:29 +00001156 """
Lars Gustäbel1465cc22010-05-17 18:02:50 +00001157 # Check if one of the fields contains surrogate characters and thereby
1158 # forces hdrcharset=BINARY, see _proc_pax() for more information.
1159 binary = False
1160 for keyword, value in pax_headers.items():
1161 try:
1162 value.encode("utf8", "strict")
1163 except UnicodeEncodeError:
1164 binary = True
1165 break
1166
Lars Gustäbelb506dc32007-08-07 18:36:16 +00001167 records = b""
Lars Gustäbel1465cc22010-05-17 18:02:50 +00001168 if binary:
1169 # Put the hdrcharset field at the beginning of the header.
1170 records += b"21 hdrcharset=BINARY\n"
1171
Guido van Rossumd8faa362007-04-27 19:54:29 +00001172 for keyword, value in pax_headers.items():
1173 keyword = keyword.encode("utf8")
Lars Gustäbel1465cc22010-05-17 18:02:50 +00001174 if binary:
1175 # Try to restore the original byte representation of `value'.
1176 # Needless to say, that the encoding must match the string.
1177 value = value.encode(encoding, "surrogateescape")
1178 else:
1179 value = value.encode("utf8")
1180
Guido van Rossumd8faa362007-04-27 19:54:29 +00001181 l = len(keyword) + len(value) + 3 # ' ' + '=' + '\n'
1182 n = p = 0
1183 while True:
1184 n = l + len(str(p))
1185 if n == p:
1186 break
1187 p = n
Lars Gustäbela280ca752007-08-28 07:34:33 +00001188 records += bytes(str(p), "ascii") + b" " + keyword + b"=" + value + b"\n"
Guido van Rossumd8faa362007-04-27 19:54:29 +00001189
1190 # We use a hardcoded "././@PaxHeader" name like star does
1191 # instead of the one that POSIX recommends.
1192 info = {}
1193 info["name"] = "././@PaxHeader"
1194 info["type"] = type
1195 info["size"] = len(records)
1196 info["magic"] = POSIX_MAGIC
1197
1198 # Create pax header + record blocks.
Lars Gustäbel3741eff2007-08-21 12:17:05 +00001199 return cls._create_header(info, USTAR_FORMAT, "ascii", "replace") + \
Guido van Rossumd8faa362007-04-27 19:54:29 +00001200 cls._create_payload(records)
1201
Guido van Rossum75b64e62005-01-16 00:16:11 +00001202 @classmethod
Lars Gustäbelb506dc32007-08-07 18:36:16 +00001203 def frombuf(cls, buf, encoding, errors):
1204 """Construct a TarInfo object from a 512 byte bytes object.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001205 """
Lars Gustäbel9520a432009-11-22 18:48:49 +00001206 if len(buf) == 0:
1207 raise EmptyHeaderError("empty header")
Thomas Wouters477c8d52006-05-27 19:21:47 +00001208 if len(buf) != BLOCKSIZE:
Lars Gustäbel9520a432009-11-22 18:48:49 +00001209 raise TruncatedHeaderError("truncated header")
Thomas Wouters477c8d52006-05-27 19:21:47 +00001210 if buf.count(NUL) == BLOCKSIZE:
Lars Gustäbel9520a432009-11-22 18:48:49 +00001211 raise EOFHeaderError("end of file header")
Thomas Wouters902d6eb2007-01-09 23:18:33 +00001212
1213 chksum = nti(buf[148:156])
1214 if chksum not in calc_chksums(buf):
Lars Gustäbel9520a432009-11-22 18:48:49 +00001215 raise InvalidHeaderError("bad checksum")
Thomas Wouters477c8d52006-05-27 19:21:47 +00001216
Guido van Rossumd8faa362007-04-27 19:54:29 +00001217 obj = cls()
Lars Gustäbelb506dc32007-08-07 18:36:16 +00001218 obj.name = nts(buf[0:100], encoding, errors)
Guido van Rossumd8faa362007-04-27 19:54:29 +00001219 obj.mode = nti(buf[100:108])
1220 obj.uid = nti(buf[108:116])
1221 obj.gid = nti(buf[116:124])
1222 obj.size = nti(buf[124:136])
1223 obj.mtime = nti(buf[136:148])
1224 obj.chksum = chksum
1225 obj.type = buf[156:157]
Lars Gustäbelb506dc32007-08-07 18:36:16 +00001226 obj.linkname = nts(buf[157:257], encoding, errors)
1227 obj.uname = nts(buf[265:297], encoding, errors)
1228 obj.gname = nts(buf[297:329], encoding, errors)
Guido van Rossumd8faa362007-04-27 19:54:29 +00001229 obj.devmajor = nti(buf[329:337])
1230 obj.devminor = nti(buf[337:345])
Lars Gustäbelb506dc32007-08-07 18:36:16 +00001231 prefix = nts(buf[345:500], encoding, errors)
Thomas Wouters89f507f2006-12-13 04:49:30 +00001232
Guido van Rossumd8faa362007-04-27 19:54:29 +00001233 # Old V7 tar format represents a directory as a regular
1234 # file with a trailing slash.
1235 if obj.type == AREGTYPE and obj.name.endswith("/"):
1236 obj.type = DIRTYPE
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001237
Lars Gustäbelc2ea8c62008-04-14 10:05:48 +00001238 # The old GNU sparse format occupies some of the unused
1239 # space in the buffer for up to 4 sparse structures.
1240 # Save the them for later processing in _proc_sparse().
1241 if obj.type == GNUTYPE_SPARSE:
1242 pos = 386
1243 structs = []
1244 for i in range(4):
1245 try:
1246 offset = nti(buf[pos:pos + 12])
1247 numbytes = nti(buf[pos + 12:pos + 24])
1248 except ValueError:
1249 break
1250 structs.append((offset, numbytes))
1251 pos += 24
1252 isextended = bool(buf[482])
1253 origsize = nti(buf[483:495])
1254 obj._sparse_structs = (structs, isextended, origsize)
1255
Guido van Rossumd8faa362007-04-27 19:54:29 +00001256 # Remove redundant slashes from directories.
1257 if obj.isdir():
1258 obj.name = obj.name.rstrip("/")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001259
Guido van Rossumd8faa362007-04-27 19:54:29 +00001260 # Reconstruct a ustar longname.
1261 if prefix and obj.type not in GNU_TYPES:
1262 obj.name = prefix + "/" + obj.name
1263 return obj
1264
1265 @classmethod
1266 def fromtarfile(cls, tarfile):
1267 """Return the next TarInfo object from TarFile object
1268 tarfile.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001269 """
Guido van Rossumd8faa362007-04-27 19:54:29 +00001270 buf = tarfile.fileobj.read(BLOCKSIZE)
Lars Gustäbelb506dc32007-08-07 18:36:16 +00001271 obj = cls.frombuf(buf, tarfile.encoding, tarfile.errors)
Guido van Rossumd8faa362007-04-27 19:54:29 +00001272 obj.offset = tarfile.fileobj.tell() - BLOCKSIZE
1273 return obj._proc_member(tarfile)
Thomas Wouters89f507f2006-12-13 04:49:30 +00001274
Guido van Rossumd8faa362007-04-27 19:54:29 +00001275 #--------------------------------------------------------------------------
1276 # The following are methods that are called depending on the type of a
1277 # member. The entry point is _proc_member() which can be overridden in a
1278 # subclass to add custom _proc_*() methods. A _proc_*() method MUST
1279 # implement the following
1280 # operations:
1281 # 1. Set self.offset_data to the position where the data blocks begin,
1282 # if there is data that follows.
1283 # 2. Set tarfile.offset to the position where the next member's header will
1284 # begin.
1285 # 3. Return self or another valid TarInfo object.
1286 def _proc_member(self, tarfile):
1287 """Choose the right processing method depending on
1288 the type and call it.
Thomas Wouters89f507f2006-12-13 04:49:30 +00001289 """
Guido van Rossumd8faa362007-04-27 19:54:29 +00001290 if self.type in (GNUTYPE_LONGNAME, GNUTYPE_LONGLINK):
1291 return self._proc_gnulong(tarfile)
1292 elif self.type == GNUTYPE_SPARSE:
1293 return self._proc_sparse(tarfile)
1294 elif self.type in (XHDTYPE, XGLTYPE, SOLARIS_XHDTYPE):
1295 return self._proc_pax(tarfile)
1296 else:
1297 return self._proc_builtin(tarfile)
Thomas Wouters89f507f2006-12-13 04:49:30 +00001298
Guido van Rossumd8faa362007-04-27 19:54:29 +00001299 def _proc_builtin(self, tarfile):
1300 """Process a builtin type or an unknown type which
1301 will be treated as a regular file.
1302 """
1303 self.offset_data = tarfile.fileobj.tell()
1304 offset = self.offset_data
1305 if self.isreg() or self.type not in SUPPORTED_TYPES:
1306 # Skip the following data blocks.
1307 offset += self._block(self.size)
1308 tarfile.offset = offset
Thomas Wouters89f507f2006-12-13 04:49:30 +00001309
Guido van Rossume7ba4952007-06-06 23:52:48 +00001310 # Patch the TarInfo object with saved global
Guido van Rossumd8faa362007-04-27 19:54:29 +00001311 # header information.
Guido van Rossume7ba4952007-06-06 23:52:48 +00001312 self._apply_pax_info(tarfile.pax_headers, tarfile.encoding, tarfile.errors)
Guido van Rossumd8faa362007-04-27 19:54:29 +00001313
1314 return self
1315
1316 def _proc_gnulong(self, tarfile):
1317 """Process the blocks that hold a GNU longname
1318 or longlink member.
1319 """
1320 buf = tarfile.fileobj.read(self._block(self.size))
1321
1322 # Fetch the next header and process it.
Lars Gustäbel9520a432009-11-22 18:48:49 +00001323 try:
1324 next = self.fromtarfile(tarfile)
1325 except HeaderError:
1326 raise SubsequentHeaderError("missing or bad subsequent header")
Guido van Rossumd8faa362007-04-27 19:54:29 +00001327
1328 # Patch the TarInfo object from the next header with
1329 # the longname information.
1330 next.offset = self.offset
1331 if self.type == GNUTYPE_LONGNAME:
Lars Gustäbelb506dc32007-08-07 18:36:16 +00001332 next.name = nts(buf, tarfile.encoding, tarfile.errors)
Guido van Rossumd8faa362007-04-27 19:54:29 +00001333 elif self.type == GNUTYPE_LONGLINK:
Lars Gustäbelb506dc32007-08-07 18:36:16 +00001334 next.linkname = nts(buf, tarfile.encoding, tarfile.errors)
Guido van Rossumd8faa362007-04-27 19:54:29 +00001335
1336 return next
1337
1338 def _proc_sparse(self, tarfile):
1339 """Process a GNU sparse header plus extra headers.
1340 """
Lars Gustäbelc2ea8c62008-04-14 10:05:48 +00001341 # We already collected some sparse structures in frombuf().
1342 structs, isextended, origsize = self._sparse_structs
1343 del self._sparse_structs
Guido van Rossumd8faa362007-04-27 19:54:29 +00001344
Lars Gustäbelc2ea8c62008-04-14 10:05:48 +00001345 # Collect sparse structures from extended header blocks.
Lars Gustäbelb506dc32007-08-07 18:36:16 +00001346 while isextended:
Guido van Rossumd8faa362007-04-27 19:54:29 +00001347 buf = tarfile.fileobj.read(BLOCKSIZE)
1348 pos = 0
Guido van Rossum805365e2007-05-07 22:24:25 +00001349 for i in range(21):
Guido van Rossumd8faa362007-04-27 19:54:29 +00001350 try:
1351 offset = nti(buf[pos:pos + 12])
1352 numbytes = nti(buf[pos + 12:pos + 24])
1353 except ValueError:
1354 break
Lars Gustäbelc2ea8c62008-04-14 10:05:48 +00001355 structs.append((offset, numbytes))
Guido van Rossumd8faa362007-04-27 19:54:29 +00001356 pos += 24
Lars Gustäbelb506dc32007-08-07 18:36:16 +00001357 isextended = bool(buf[504])
Guido van Rossumd8faa362007-04-27 19:54:29 +00001358
Lars Gustäbelc2ea8c62008-04-14 10:05:48 +00001359 # Transform the sparse structures to something we can use
1360 # in ExFileObject.
1361 self.sparse = _ringbuffer()
1362 lastpos = 0
1363 realpos = 0
1364 for offset, numbytes in structs:
1365 if offset > lastpos:
1366 self.sparse.append(_hole(lastpos, offset - lastpos))
1367 self.sparse.append(_data(offset, numbytes, realpos))
1368 realpos += numbytes
1369 lastpos = offset + numbytes
Guido van Rossumd8faa362007-04-27 19:54:29 +00001370 if lastpos < origsize:
Lars Gustäbelc2ea8c62008-04-14 10:05:48 +00001371 self.sparse.append(_hole(lastpos, origsize - lastpos))
Guido van Rossumd8faa362007-04-27 19:54:29 +00001372
1373 self.offset_data = tarfile.fileobj.tell()
1374 tarfile.offset = self.offset_data + self._block(self.size)
1375 self.size = origsize
1376
1377 return self
1378
1379 def _proc_pax(self, tarfile):
1380 """Process an extended or global header as described in
Lars Gustäbel1465cc22010-05-17 18:02:50 +00001381 POSIX.1-2008.
Guido van Rossumd8faa362007-04-27 19:54:29 +00001382 """
1383 # Read the header information.
1384 buf = tarfile.fileobj.read(self._block(self.size))
1385
1386 # A pax header stores supplemental information for either
1387 # the following file (extended) or all following files
1388 # (global).
1389 if self.type == XGLTYPE:
1390 pax_headers = tarfile.pax_headers
1391 else:
1392 pax_headers = tarfile.pax_headers.copy()
1393
Lars Gustäbel1465cc22010-05-17 18:02:50 +00001394 # Check if the pax header contains a hdrcharset field. This tells us
1395 # the encoding of the path, linkpath, uname and gname fields. Normally,
1396 # these fields are UTF-8 encoded but since POSIX.1-2008 tar
1397 # implementations are allowed to store them as raw binary strings if
1398 # the translation to UTF-8 fails.
1399 match = re.search(br"\d+ hdrcharset=([^\n]+)\n", buf)
1400 if match is not None:
1401 pax_headers["hdrcharset"] = match.group(1).decode("utf8")
1402
1403 # For the time being, we don't care about anything other than "BINARY".
1404 # The only other value that is currently allowed by the standard is
1405 # "ISO-IR 10646 2000 UTF-8" in other words UTF-8.
1406 hdrcharset = pax_headers.get("hdrcharset")
1407 if hdrcharset == "BINARY":
1408 encoding = tarfile.encoding
1409 else:
1410 encoding = "utf8"
1411
Guido van Rossumd8faa362007-04-27 19:54:29 +00001412 # Parse pax header information. A record looks like that:
1413 # "%d %s=%s\n" % (length, keyword, value). length is the size
1414 # of the complete record including the length field itself and
Guido van Rossume7ba4952007-06-06 23:52:48 +00001415 # the newline. keyword and value are both UTF-8 encoded strings.
Antoine Pitroufd036452008-08-19 17:56:33 +00001416 regex = re.compile(br"(\d+) ([^=]+)=")
Guido van Rossumd8faa362007-04-27 19:54:29 +00001417 pos = 0
1418 while True:
1419 match = regex.match(buf, pos)
1420 if not match:
1421 break
1422
1423 length, keyword = match.groups()
1424 length = int(length)
1425 value = buf[match.end(2) + 1:match.start(1) + length - 1]
1426
Lars Gustäbel1465cc22010-05-17 18:02:50 +00001427 # Normally, we could just use "utf8" as the encoding and "strict"
1428 # as the error handler, but we better not take the risk. For
1429 # example, GNU tar <= 1.23 is known to store filenames it cannot
1430 # translate to UTF-8 as raw strings (unfortunately without a
1431 # hdrcharset=BINARY header).
1432 # We first try the strict standard encoding, and if that fails we
1433 # fall back on the user's encoding and error handler.
1434 keyword = self._decode_pax_field(keyword, "utf8", "utf8",
1435 tarfile.errors)
1436 if keyword in PAX_NAME_FIELDS:
1437 value = self._decode_pax_field(value, encoding, tarfile.encoding,
1438 tarfile.errors)
1439 else:
1440 value = self._decode_pax_field(value, "utf8", "utf8",
1441 tarfile.errors)
Guido van Rossumd8faa362007-04-27 19:54:29 +00001442
1443 pax_headers[keyword] = value
1444 pos += length
1445
Guido van Rossume7ba4952007-06-06 23:52:48 +00001446 # Fetch the next header.
Lars Gustäbel9520a432009-11-22 18:48:49 +00001447 try:
1448 next = self.fromtarfile(tarfile)
1449 except HeaderError:
1450 raise SubsequentHeaderError("missing or bad subsequent header")
Guido van Rossumd8faa362007-04-27 19:54:29 +00001451
Guido van Rossume7ba4952007-06-06 23:52:48 +00001452 if self.type in (XHDTYPE, SOLARIS_XHDTYPE):
Guido van Rossume7ba4952007-06-06 23:52:48 +00001453 # Patch the TarInfo object with the extended header info.
1454 next._apply_pax_info(pax_headers, tarfile.encoding, tarfile.errors)
1455 next.offset = self.offset
1456
1457 if "size" in pax_headers:
1458 # If the extended header replaces the size field,
1459 # we need to recalculate the offset where the next
1460 # header starts.
1461 offset = next.offset_data
1462 if next.isreg() or next.type not in SUPPORTED_TYPES:
1463 offset += next._block(next.size)
1464 tarfile.offset = offset
1465
1466 return next
1467
1468 def _apply_pax_info(self, pax_headers, encoding, errors):
1469 """Replace fields with supplemental information from a previous
1470 pax extended or global header.
1471 """
1472 for keyword, value in pax_headers.items():
1473 if keyword not in PAX_FIELDS:
1474 continue
1475
1476 if keyword == "path":
1477 value = value.rstrip("/")
1478
1479 if keyword in PAX_NUMBER_FIELDS:
1480 try:
1481 value = PAX_NUMBER_FIELDS[keyword](value)
1482 except ValueError:
1483 value = 0
Guido van Rossume7ba4952007-06-06 23:52:48 +00001484
1485 setattr(self, keyword, value)
1486
1487 self.pax_headers = pax_headers.copy()
Guido van Rossumd8faa362007-04-27 19:54:29 +00001488
Lars Gustäbel1465cc22010-05-17 18:02:50 +00001489 def _decode_pax_field(self, value, encoding, fallback_encoding, fallback_errors):
1490 """Decode a single field from a pax record.
1491 """
1492 try:
1493 return value.decode(encoding, "strict")
1494 except UnicodeDecodeError:
1495 return value.decode(fallback_encoding, fallback_errors)
1496
Guido van Rossumd8faa362007-04-27 19:54:29 +00001497 def _block(self, count):
1498 """Round up a byte count by BLOCKSIZE and return it,
1499 e.g. _block(834) => 1024.
1500 """
1501 blocks, remainder = divmod(count, BLOCKSIZE)
1502 if remainder:
1503 blocks += 1
1504 return blocks * BLOCKSIZE
Thomas Wouters89f507f2006-12-13 04:49:30 +00001505
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001506 def isreg(self):
1507 return self.type in REGULAR_TYPES
1508 def isfile(self):
1509 return self.isreg()
1510 def isdir(self):
1511 return self.type == DIRTYPE
1512 def issym(self):
1513 return self.type == SYMTYPE
1514 def islnk(self):
1515 return self.type == LNKTYPE
1516 def ischr(self):
1517 return self.type == CHRTYPE
1518 def isblk(self):
1519 return self.type == BLKTYPE
1520 def isfifo(self):
1521 return self.type == FIFOTYPE
1522 def issparse(self):
1523 return self.type == GNUTYPE_SPARSE
1524 def isdev(self):
1525 return self.type in (CHRTYPE, BLKTYPE, FIFOTYPE)
1526# class TarInfo
1527
1528class TarFile(object):
1529 """The TarFile Class provides an interface to tar archives.
1530 """
1531
1532 debug = 0 # May be set from 0 (no msgs) to 3 (all msgs)
1533
1534 dereference = False # If true, add content of linked file to the
1535 # tar file, else the link.
1536
1537 ignore_zeros = False # If true, skips empty or invalid blocks and
1538 # continues processing.
1539
Lars Gustäbel365aff32009-12-13 11:42:29 +00001540 errorlevel = 1 # If 0, fatal errors only appear in debug
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001541 # messages (if debug >= 0). If > 0, errors
1542 # are passed to the caller as exceptions.
1543
Guido van Rossumd8faa362007-04-27 19:54:29 +00001544 format = DEFAULT_FORMAT # The format to use when creating an archive.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001545
Guido van Rossume7ba4952007-06-06 23:52:48 +00001546 encoding = ENCODING # Encoding for 8-bit character strings.
1547
1548 errors = None # Error handler for unicode conversion.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001549
Guido van Rossumd8faa362007-04-27 19:54:29 +00001550 tarinfo = TarInfo # The default TarInfo class to use.
1551
1552 fileobject = ExFileObject # The default ExFileObject class to use.
1553
1554 def __init__(self, name=None, mode="r", fileobj=None, format=None,
1555 tarinfo=None, dereference=None, ignore_zeros=None, encoding=None,
Victor Stinnerde629d42010-05-05 21:43:57 +00001556 errors="surrogateescape", pax_headers=None, debug=None, errorlevel=None):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001557 """Open an (uncompressed) tar archive `name'. `mode' is either 'r' to
1558 read from an existing archive, 'a' to append data to an existing
1559 file or 'w' to create a new file overwriting an existing one. `mode'
1560 defaults to 'r'.
1561 If `fileobj' is given, it is used for reading or writing data. If it
1562 can be determined, `mode' is overridden by `fileobj's mode.
1563 `fileobj' is not closed, when TarFile is closed.
1564 """
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001565 if len(mode) > 1 or mode not in "raw":
Thomas Wouters477c8d52006-05-27 19:21:47 +00001566 raise ValueError("mode must be 'r', 'a' or 'w'")
Guido van Rossumd8faa362007-04-27 19:54:29 +00001567 self.mode = mode
1568 self._mode = {"r": "rb", "a": "r+b", "w": "wb"}[mode]
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001569
1570 if not fileobj:
Guido van Rossumd8faa362007-04-27 19:54:29 +00001571 if self.mode == "a" and not os.path.exists(name):
Thomas Wouterscf297e42007-02-23 15:07:44 +00001572 # Create nonexistent files in append mode.
Guido van Rossumd8faa362007-04-27 19:54:29 +00001573 self.mode = "w"
1574 self._mode = "wb"
Guido van Rossume7ba4952007-06-06 23:52:48 +00001575 fileobj = bltn_open(name, self._mode)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001576 self._extfileobj = False
1577 else:
Guido van Rossumd8faa362007-04-27 19:54:29 +00001578 if name is None and hasattr(fileobj, "name"):
1579 name = fileobj.name
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001580 if hasattr(fileobj, "mode"):
Guido van Rossumd8faa362007-04-27 19:54:29 +00001581 self._mode = fileobj.mode
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001582 self._extfileobj = True
Thomas Woutersed03b412007-08-28 21:37:11 +00001583 self.name = os.path.abspath(name) if name else None
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001584 self.fileobj = fileobj
1585
Guido van Rossumd8faa362007-04-27 19:54:29 +00001586 # Init attributes.
1587 if format is not None:
1588 self.format = format
1589 if tarinfo is not None:
1590 self.tarinfo = tarinfo
1591 if dereference is not None:
1592 self.dereference = dereference
1593 if ignore_zeros is not None:
1594 self.ignore_zeros = ignore_zeros
1595 if encoding is not None:
1596 self.encoding = encoding
Victor Stinnerde629d42010-05-05 21:43:57 +00001597 self.errors = errors
Guido van Rossume7ba4952007-06-06 23:52:48 +00001598
1599 if pax_headers is not None and self.format == PAX_FORMAT:
1600 self.pax_headers = pax_headers
1601 else:
1602 self.pax_headers = {}
1603
Guido van Rossumd8faa362007-04-27 19:54:29 +00001604 if debug is not None:
1605 self.debug = debug
1606 if errorlevel is not None:
1607 self.errorlevel = errorlevel
1608
1609 # Init datastructures.
Thomas Wouters477c8d52006-05-27 19:21:47 +00001610 self.closed = False
1611 self.members = [] # list of members as TarInfo objects
1612 self._loaded = False # flag if all members have been read
Christian Heimesd8654cf2007-12-02 15:22:16 +00001613 self.offset = self.fileobj.tell()
1614 # current position in the archive file
Thomas Wouters477c8d52006-05-27 19:21:47 +00001615 self.inodes = {} # dictionary caching the inodes of
1616 # archive members already added
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001617
Lars Gustäbel7b465392009-11-18 20:29:25 +00001618 try:
1619 if self.mode == "r":
1620 self.firstmember = None
1621 self.firstmember = self.next()
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001622
Lars Gustäbel7b465392009-11-18 20:29:25 +00001623 if self.mode == "a":
1624 # Move to the end of the archive,
1625 # before the first empty block.
Lars Gustäbel7b465392009-11-18 20:29:25 +00001626 while True:
Lars Gustäbel9520a432009-11-22 18:48:49 +00001627 self.fileobj.seek(self.offset)
1628 try:
1629 tarinfo = self.tarinfo.fromtarfile(self)
1630 self.members.append(tarinfo)
1631 except EOFHeaderError:
1632 self.fileobj.seek(self.offset)
Lars Gustäbel7b465392009-11-18 20:29:25 +00001633 break
Lars Gustäbel9520a432009-11-22 18:48:49 +00001634 except HeaderError as e:
1635 raise ReadError(str(e))
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001636
Lars Gustäbel7b465392009-11-18 20:29:25 +00001637 if self.mode in "aw":
1638 self._loaded = True
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001639
Lars Gustäbel7b465392009-11-18 20:29:25 +00001640 if self.pax_headers:
1641 buf = self.tarinfo.create_pax_global_header(self.pax_headers.copy())
1642 self.fileobj.write(buf)
1643 self.offset += len(buf)
1644 except:
1645 if not self._extfileobj:
1646 self.fileobj.close()
1647 self.closed = True
1648 raise
Guido van Rossumd8faa362007-04-27 19:54:29 +00001649
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001650 #--------------------------------------------------------------------------
1651 # Below are the classmethods which act as alternate constructors to the
1652 # TarFile class. The open() method is the only one that is needed for
1653 # public use; it is the "super"-constructor and is able to select an
1654 # adequate "sub"-constructor for a particular compression using the mapping
1655 # from OPEN_METH.
1656 #
1657 # This concept allows one to subclass TarFile without losing the comfort of
1658 # the super-constructor. A sub-constructor is registered and made available
1659 # by adding it to the mapping in OPEN_METH.
1660
Guido van Rossum75b64e62005-01-16 00:16:11 +00001661 @classmethod
Guido van Rossumd8faa362007-04-27 19:54:29 +00001662 def open(cls, name=None, mode="r", fileobj=None, bufsize=RECORDSIZE, **kwargs):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001663 """Open a tar archive for reading, writing or appending. Return
1664 an appropriate TarFile class.
1665
1666 mode:
Martin v. Löwis78be7df2005-03-05 12:47:42 +00001667 'r' or 'r:*' open for reading with transparent compression
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001668 'r:' open for reading exclusively uncompressed
1669 'r:gz' open for reading with gzip compression
1670 'r:bz2' open for reading with bzip2 compression
Thomas Wouterscf297e42007-02-23 15:07:44 +00001671 'a' or 'a:' open for appending, creating the file if necessary
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001672 'w' or 'w:' open for writing without compression
1673 'w:gz' open for writing with gzip compression
1674 'w:bz2' open for writing with bzip2 compression
Martin v. Löwis78be7df2005-03-05 12:47:42 +00001675
1676 'r|*' open a stream of tar blocks with transparent compression
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001677 'r|' open an uncompressed stream of tar blocks for reading
1678 'r|gz' open a gzip compressed stream of tar blocks
1679 'r|bz2' open a bzip2 compressed stream of tar blocks
1680 'w|' open an uncompressed stream for writing
1681 'w|gz' open a gzip compressed stream for writing
1682 'w|bz2' open a bzip2 compressed stream for writing
1683 """
1684
1685 if not name and not fileobj:
Thomas Wouters477c8d52006-05-27 19:21:47 +00001686 raise ValueError("nothing to open")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001687
Martin v. Löwis78be7df2005-03-05 12:47:42 +00001688 if mode in ("r", "r:*"):
1689 # Find out which *open() is appropriate for opening the file.
1690 for comptype in cls.OPEN_METH:
1691 func = getattr(cls, cls.OPEN_METH[comptype])
Thomas Wouters902d6eb2007-01-09 23:18:33 +00001692 if fileobj is not None:
1693 saved_pos = fileobj.tell()
Martin v. Löwis78be7df2005-03-05 12:47:42 +00001694 try:
Guido van Rossumd8faa362007-04-27 19:54:29 +00001695 return func(name, "r", fileobj, **kwargs)
1696 except (ReadError, CompressionError) as e:
Thomas Wouters902d6eb2007-01-09 23:18:33 +00001697 if fileobj is not None:
1698 fileobj.seek(saved_pos)
Martin v. Löwis78be7df2005-03-05 12:47:42 +00001699 continue
Thomas Wouters477c8d52006-05-27 19:21:47 +00001700 raise ReadError("file could not be opened successfully")
Martin v. Löwis78be7df2005-03-05 12:47:42 +00001701
1702 elif ":" in mode:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001703 filemode, comptype = mode.split(":", 1)
1704 filemode = filemode or "r"
1705 comptype = comptype or "tar"
1706
1707 # Select the *open() function according to
1708 # given compression.
1709 if comptype in cls.OPEN_METH:
1710 func = getattr(cls, cls.OPEN_METH[comptype])
1711 else:
Thomas Wouters477c8d52006-05-27 19:21:47 +00001712 raise CompressionError("unknown compression type %r" % comptype)
Guido van Rossumd8faa362007-04-27 19:54:29 +00001713 return func(name, filemode, fileobj, **kwargs)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001714
1715 elif "|" in mode:
1716 filemode, comptype = mode.split("|", 1)
1717 filemode = filemode or "r"
1718 comptype = comptype or "tar"
1719
1720 if filemode not in "rw":
Thomas Wouters477c8d52006-05-27 19:21:47 +00001721 raise ValueError("mode must be 'r' or 'w'")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001722
1723 t = cls(name, filemode,
Guido van Rossumd8faa362007-04-27 19:54:29 +00001724 _Stream(name, filemode, comptype, fileobj, bufsize),
1725 **kwargs)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001726 t._extfileobj = False
1727 return t
1728
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001729 elif mode in "aw":
Guido van Rossumd8faa362007-04-27 19:54:29 +00001730 return cls.taropen(name, mode, fileobj, **kwargs)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001731
Thomas Wouters477c8d52006-05-27 19:21:47 +00001732 raise ValueError("undiscernible mode")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001733
Guido van Rossum75b64e62005-01-16 00:16:11 +00001734 @classmethod
Guido van Rossumd8faa362007-04-27 19:54:29 +00001735 def taropen(cls, name, mode="r", fileobj=None, **kwargs):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001736 """Open uncompressed tar archive name for reading or writing.
1737 """
1738 if len(mode) > 1 or mode not in "raw":
Thomas Wouters477c8d52006-05-27 19:21:47 +00001739 raise ValueError("mode must be 'r', 'a' or 'w'")
Guido van Rossumd8faa362007-04-27 19:54:29 +00001740 return cls(name, mode, fileobj, **kwargs)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001741
Guido van Rossum75b64e62005-01-16 00:16:11 +00001742 @classmethod
Guido van Rossumd8faa362007-04-27 19:54:29 +00001743 def gzopen(cls, name, mode="r", fileobj=None, compresslevel=9, **kwargs):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001744 """Open gzip compressed tar archive name for reading or writing.
1745 Appending is not allowed.
1746 """
1747 if len(mode) > 1 or mode not in "rw":
Thomas Wouters477c8d52006-05-27 19:21:47 +00001748 raise ValueError("mode must be 'r' or 'w'")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001749
1750 try:
1751 import gzip
Neal Norwitz4ec68242003-04-11 03:05:56 +00001752 gzip.GzipFile
1753 except (ImportError, AttributeError):
Thomas Wouters477c8d52006-05-27 19:21:47 +00001754 raise CompressionError("gzip module is not available")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001755
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001756 if fileobj is None:
Guido van Rossume7ba4952007-06-06 23:52:48 +00001757 fileobj = bltn_open(name, mode + "b")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001758
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001759 try:
Thomas Wouters902d6eb2007-01-09 23:18:33 +00001760 t = cls.taropen(name, mode,
Guido van Rossumd8faa362007-04-27 19:54:29 +00001761 gzip.GzipFile(name, mode, compresslevel, fileobj),
1762 **kwargs)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001763 except IOError:
Thomas Wouters477c8d52006-05-27 19:21:47 +00001764 raise ReadError("not a gzip file")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001765 t._extfileobj = False
1766 return t
1767
Guido van Rossum75b64e62005-01-16 00:16:11 +00001768 @classmethod
Guido van Rossumd8faa362007-04-27 19:54:29 +00001769 def bz2open(cls, name, mode="r", fileobj=None, compresslevel=9, **kwargs):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001770 """Open bzip2 compressed tar archive name for reading or writing.
1771 Appending is not allowed.
1772 """
1773 if len(mode) > 1 or mode not in "rw":
Thomas Wouters477c8d52006-05-27 19:21:47 +00001774 raise ValueError("mode must be 'r' or 'w'.")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001775
1776 try:
1777 import bz2
1778 except ImportError:
Thomas Wouters477c8d52006-05-27 19:21:47 +00001779 raise CompressionError("bz2 module is not available")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001780
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001781 if fileobj is not None:
Thomas Wouters477c8d52006-05-27 19:21:47 +00001782 fileobj = _BZ2Proxy(fileobj, mode)
1783 else:
1784 fileobj = bz2.BZ2File(name, mode, compresslevel=compresslevel)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001785
1786 try:
Guido van Rossumd8faa362007-04-27 19:54:29 +00001787 t = cls.taropen(name, mode, fileobj, **kwargs)
Lars Gustäbel9520a432009-11-22 18:48:49 +00001788 except (IOError, EOFError):
Thomas Wouters477c8d52006-05-27 19:21:47 +00001789 raise ReadError("not a bzip2 file")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001790 t._extfileobj = False
1791 return t
1792
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001793 # All *open() methods are registered here.
1794 OPEN_METH = {
1795 "tar": "taropen", # uncompressed tar
1796 "gz": "gzopen", # gzip compressed tar
1797 "bz2": "bz2open" # bzip2 compressed tar
1798 }
1799
1800 #--------------------------------------------------------------------------
1801 # The public methods which TarFile provides:
1802
1803 def close(self):
1804 """Close the TarFile. In write-mode, two finishing zero blocks are
1805 appended to the archive.
1806 """
1807 if self.closed:
1808 return
1809
Guido van Rossumd8faa362007-04-27 19:54:29 +00001810 if self.mode in "aw":
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001811 self.fileobj.write(NUL * (BLOCKSIZE * 2))
1812 self.offset += (BLOCKSIZE * 2)
1813 # fill up the end with zero-blocks
1814 # (like option -b20 for tar does)
1815 blocks, remainder = divmod(self.offset, RECORDSIZE)
1816 if remainder > 0:
1817 self.fileobj.write(NUL * (RECORDSIZE - remainder))
1818
1819 if not self._extfileobj:
1820 self.fileobj.close()
1821 self.closed = True
1822
1823 def getmember(self, name):
1824 """Return a TarInfo object for member `name'. If `name' can not be
1825 found in the archive, KeyError is raised. If a member occurs more
Mark Dickinson934896d2009-02-21 20:59:32 +00001826 than once in the archive, its last occurrence is assumed to be the
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001827 most up-to-date version.
1828 """
Martin v. Löwisf3c56112004-09-18 09:08:52 +00001829 tarinfo = self._getmember(name)
1830 if tarinfo is None:
Thomas Wouters477c8d52006-05-27 19:21:47 +00001831 raise KeyError("filename %r not found" % name)
Martin v. Löwisf3c56112004-09-18 09:08:52 +00001832 return tarinfo
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001833
1834 def getmembers(self):
1835 """Return the members of the archive as a list of TarInfo objects. The
1836 list has the same order as the members in the archive.
1837 """
1838 self._check()
1839 if not self._loaded: # if we want to obtain a list of
1840 self._load() # all members, we first have to
1841 # scan the whole archive.
1842 return self.members
1843
1844 def getnames(self):
1845 """Return the members of the archive as a list of their names. It has
1846 the same order as the list returned by getmembers().
1847 """
Martin v. Löwisf3c56112004-09-18 09:08:52 +00001848 return [tarinfo.name for tarinfo in self.getmembers()]
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001849
1850 def gettarinfo(self, name=None, arcname=None, fileobj=None):
1851 """Create a TarInfo object for either the file `name' or the file
1852 object `fileobj' (using os.fstat on its file descriptor). You can
1853 modify some of the TarInfo's attributes before you add it using
1854 addfile(). If given, `arcname' specifies an alternative name for the
1855 file in the archive.
1856 """
1857 self._check("aw")
1858
1859 # When fileobj is given, replace name by
1860 # fileobj's real name.
1861 if fileobj is not None:
1862 name = fileobj.name
1863
1864 # Building the name of the member in the archive.
1865 # Backward slashes are converted to forward slashes,
1866 # Absolute paths are turned to relative paths.
1867 if arcname is None:
1868 arcname = name
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001869 drv, arcname = os.path.splitdrive(arcname)
Lars Gustäbelbfdfdda2009-08-28 19:59:59 +00001870 arcname = arcname.replace(os.sep, "/")
1871 arcname = arcname.lstrip("/")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001872
1873 # Now, fill the TarInfo object with
1874 # information specific for the file.
Guido van Rossumd8faa362007-04-27 19:54:29 +00001875 tarinfo = self.tarinfo()
1876 tarinfo.tarfile = self
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001877
1878 # Use os.stat or os.lstat, depending on platform
1879 # and if symlinks shall be resolved.
1880 if fileobj is None:
1881 if hasattr(os, "lstat") and not self.dereference:
1882 statres = os.lstat(name)
1883 else:
1884 statres = os.stat(name)
1885 else:
1886 statres = os.fstat(fileobj.fileno())
1887 linkname = ""
1888
1889 stmd = statres.st_mode
1890 if stat.S_ISREG(stmd):
1891 inode = (statres.st_ino, statres.st_dev)
Guido van Rossumd8faa362007-04-27 19:54:29 +00001892 if not self.dereference and statres.st_nlink > 1 and \
1893 inode in self.inodes and arcname != self.inodes[inode]:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001894 # Is it a hardlink to an already
1895 # archived file?
1896 type = LNKTYPE
1897 linkname = self.inodes[inode]
1898 else:
1899 # The inode is added only if its valid.
1900 # For win32 it is always 0.
1901 type = REGTYPE
1902 if inode[0]:
1903 self.inodes[inode] = arcname
1904 elif stat.S_ISDIR(stmd):
1905 type = DIRTYPE
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001906 elif stat.S_ISFIFO(stmd):
1907 type = FIFOTYPE
1908 elif stat.S_ISLNK(stmd):
1909 type = SYMTYPE
1910 linkname = os.readlink(name)
1911 elif stat.S_ISCHR(stmd):
1912 type = CHRTYPE
1913 elif stat.S_ISBLK(stmd):
1914 type = BLKTYPE
1915 else:
1916 return None
1917
1918 # Fill the TarInfo object with all
1919 # information we can get.
Martin v. Löwis5dbdc592005-08-27 10:07:56 +00001920 tarinfo.name = arcname
1921 tarinfo.mode = stmd
1922 tarinfo.uid = statres.st_uid
1923 tarinfo.gid = statres.st_gid
Lars Gustäbel2470ff12010-06-03 10:11:52 +00001924 if type == REGTYPE:
Martin v. Löwis61d77e02004-08-20 06:35:46 +00001925 tarinfo.size = statres.st_size
Martin v. Löwis5dbdc592005-08-27 10:07:56 +00001926 else:
Guido van Rossume2a383d2007-01-15 16:59:06 +00001927 tarinfo.size = 0
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001928 tarinfo.mtime = statres.st_mtime
Martin v. Löwis5dbdc592005-08-27 10:07:56 +00001929 tarinfo.type = type
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001930 tarinfo.linkname = linkname
1931 if pwd:
1932 try:
1933 tarinfo.uname = pwd.getpwuid(tarinfo.uid)[0]
1934 except KeyError:
1935 pass
1936 if grp:
1937 try:
1938 tarinfo.gname = grp.getgrgid(tarinfo.gid)[0]
1939 except KeyError:
1940 pass
1941
1942 if type in (CHRTYPE, BLKTYPE):
1943 if hasattr(os, "major") and hasattr(os, "minor"):
1944 tarinfo.devmajor = os.major(statres.st_rdev)
1945 tarinfo.devminor = os.minor(statres.st_rdev)
1946 return tarinfo
1947
1948 def list(self, verbose=True):
1949 """Print a table of contents to sys.stdout. If `verbose' is False, only
1950 the names of the members are printed. If it is True, an `ls -l'-like
1951 output is produced.
1952 """
1953 self._check()
1954
1955 for tarinfo in self:
1956 if verbose:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001957 print(filemode(tarinfo.mode), end=' ')
1958 print("%s/%s" % (tarinfo.uname or tarinfo.uid,
1959 tarinfo.gname or tarinfo.gid), end=' ')
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001960 if tarinfo.ischr() or tarinfo.isblk():
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001961 print("%10s" % ("%d,%d" \
1962 % (tarinfo.devmajor, tarinfo.devminor)), end=' ')
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001963 else:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001964 print("%10d" % tarinfo.size, end=' ')
1965 print("%d-%02d-%02d %02d:%02d:%02d" \
1966 % time.localtime(tarinfo.mtime)[:6], end=' ')
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001967
Guido van Rossumd8faa362007-04-27 19:54:29 +00001968 print(tarinfo.name + ("/" if tarinfo.isdir() else ""), end=' ')
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001969
1970 if verbose:
1971 if tarinfo.issym():
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001972 print("->", tarinfo.linkname, end=' ')
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001973 if tarinfo.islnk():
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001974 print("link to", tarinfo.linkname, end=' ')
1975 print()
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001976
Lars Gustäbel049d2aa2009-09-12 10:44:00 +00001977 def add(self, name, arcname=None, recursive=True, exclude=None, filter=None):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001978 """Add the file `name' to the archive. `name' may be any type of file
1979 (directory, fifo, symbolic link, etc.). If given, `arcname'
1980 specifies an alternative name for the file in the archive.
1981 Directories are added recursively by default. This can be avoided by
Guido van Rossum486364b2007-06-30 05:01:58 +00001982 setting `recursive' to False. `exclude' is a function that should
Lars Gustäbel049d2aa2009-09-12 10:44:00 +00001983 return True for each filename to be excluded. `filter' is a function
1984 that expects a TarInfo object argument and returns the changed
1985 TarInfo object, if it returns None the TarInfo object will be
1986 excluded from the archive.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001987 """
1988 self._check("aw")
1989
1990 if arcname is None:
1991 arcname = name
1992
Guido van Rossum486364b2007-06-30 05:01:58 +00001993 # Exclude pathnames.
Lars Gustäbel049d2aa2009-09-12 10:44:00 +00001994 if exclude is not None:
1995 import warnings
1996 warnings.warn("use the filter argument instead",
1997 DeprecationWarning, 2)
1998 if exclude(name):
1999 self._dbg(2, "tarfile: Excluded %r" % name)
2000 return
Guido van Rossum486364b2007-06-30 05:01:58 +00002001
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002002 # Skip if somebody tries to archive the archive...
Thomas Wouters902d6eb2007-01-09 23:18:33 +00002003 if self.name is not None and os.path.abspath(name) == self.name:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002004 self._dbg(2, "tarfile: Skipped %r" % name)
2005 return
2006
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002007 self._dbg(1, name)
2008
2009 # Create a TarInfo object from the file.
2010 tarinfo = self.gettarinfo(name, arcname)
2011
2012 if tarinfo is None:
2013 self._dbg(1, "tarfile: Unsupported type %r" % name)
2014 return
2015
Lars Gustäbel049d2aa2009-09-12 10:44:00 +00002016 # Change or exclude the TarInfo object.
2017 if filter is not None:
2018 tarinfo = filter(tarinfo)
2019 if tarinfo is None:
2020 self._dbg(2, "tarfile: Excluded %r" % name)
2021 return
2022
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002023 # Append the tar header and data to the archive.
2024 if tarinfo.isreg():
Guido van Rossume7ba4952007-06-06 23:52:48 +00002025 f = bltn_open(name, "rb")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002026 self.addfile(tarinfo, f)
2027 f.close()
2028
Martin v. Löwis5dbdc592005-08-27 10:07:56 +00002029 elif tarinfo.isdir():
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002030 self.addfile(tarinfo)
2031 if recursive:
2032 for f in os.listdir(name):
Lars Gustäbel049d2aa2009-09-12 10:44:00 +00002033 self.add(os.path.join(name, f), os.path.join(arcname, f),
2034 recursive, exclude, filter)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002035
Martin v. Löwis5dbdc592005-08-27 10:07:56 +00002036 else:
2037 self.addfile(tarinfo)
2038
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002039 def addfile(self, tarinfo, fileobj=None):
2040 """Add the TarInfo object `tarinfo' to the archive. If `fileobj' is
2041 given, tarinfo.size bytes are read from it and added to the archive.
2042 You can create TarInfo objects using gettarinfo().
2043 On Windows platforms, `fileobj' should always be opened with mode
2044 'rb' to avoid irritation about the file size.
2045 """
2046 self._check("aw")
2047
Thomas Wouters89f507f2006-12-13 04:49:30 +00002048 tarinfo = copy.copy(tarinfo)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002049
Guido van Rossume7ba4952007-06-06 23:52:48 +00002050 buf = tarinfo.tobuf(self.format, self.encoding, self.errors)
Thomas Wouters89f507f2006-12-13 04:49:30 +00002051 self.fileobj.write(buf)
2052 self.offset += len(buf)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002053
2054 # If there's data to follow, append it.
2055 if fileobj is not None:
2056 copyfileobj(fileobj, self.fileobj, tarinfo.size)
2057 blocks, remainder = divmod(tarinfo.size, BLOCKSIZE)
2058 if remainder > 0:
2059 self.fileobj.write(NUL * (BLOCKSIZE - remainder))
2060 blocks += 1
2061 self.offset += blocks * BLOCKSIZE
2062
Martin v. Löwisf3c56112004-09-18 09:08:52 +00002063 self.members.append(tarinfo)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002064
Martin v. Löwis00a73e72005-03-04 19:40:34 +00002065 def extractall(self, path=".", members=None):
2066 """Extract all members from the archive to the current working
2067 directory and set owner, modification time and permissions on
2068 directories afterwards. `path' specifies a different directory
2069 to extract to. `members' is optional and must be a subset of the
2070 list returned by getmembers().
2071 """
2072 directories = []
2073
2074 if members is None:
2075 members = self
2076
2077 for tarinfo in members:
2078 if tarinfo.isdir():
Christian Heimes2202f872008-02-06 14:31:34 +00002079 # Extract directories with a safe mode.
Martin v. Löwis00a73e72005-03-04 19:40:34 +00002080 directories.append(tarinfo)
Christian Heimes2202f872008-02-06 14:31:34 +00002081 tarinfo = copy.copy(tarinfo)
2082 tarinfo.mode = 0o700
2083 self.extract(tarinfo, path)
Martin v. Löwis00a73e72005-03-04 19:40:34 +00002084
2085 # Reverse sort directories.
Raymond Hettingerd4cb56d2008-01-30 02:55:10 +00002086 directories.sort(key=lambda a: a.name)
Martin v. Löwis00a73e72005-03-04 19:40:34 +00002087 directories.reverse()
2088
2089 # Set correct owner, mtime and filemode on directories.
2090 for tarinfo in directories:
Christian Heimesfaf2f632008-01-06 16:59:19 +00002091 dirpath = os.path.join(path, tarinfo.name)
Martin v. Löwis00a73e72005-03-04 19:40:34 +00002092 try:
Christian Heimesfaf2f632008-01-06 16:59:19 +00002093 self.chown(tarinfo, dirpath)
2094 self.utime(tarinfo, dirpath)
2095 self.chmod(tarinfo, dirpath)
Guido van Rossumb940e112007-01-10 16:19:56 +00002096 except ExtractError as e:
Martin v. Löwis00a73e72005-03-04 19:40:34 +00002097 if self.errorlevel > 1:
2098 raise
2099 else:
2100 self._dbg(1, "tarfile: %s" % e)
2101
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002102 def extract(self, member, path=""):
2103 """Extract a member from the archive to the current working directory,
2104 using its full name. Its file information is extracted as accurately
2105 as possible. `member' may be a filename or a TarInfo object. You can
2106 specify a different directory using `path'.
2107 """
2108 self._check("r")
2109
Guido van Rossum3172c5d2007-10-16 18:12:55 +00002110 if isinstance(member, str):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002111 tarinfo = self.getmember(member)
Guido van Rossumd8faa362007-04-27 19:54:29 +00002112 else:
2113 tarinfo = member
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002114
Neal Norwitza4f651a2004-07-20 22:07:44 +00002115 # Prepare the link target for makelink().
2116 if tarinfo.islnk():
2117 tarinfo._link_target = os.path.join(path, tarinfo.linkname)
2118
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002119 try:
2120 self._extract_member(tarinfo, os.path.join(path, tarinfo.name))
Guido van Rossumb940e112007-01-10 16:19:56 +00002121 except EnvironmentError as e:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002122 if self.errorlevel > 0:
2123 raise
2124 else:
2125 if e.filename is None:
2126 self._dbg(1, "tarfile: %s" % e.strerror)
2127 else:
2128 self._dbg(1, "tarfile: %s %r" % (e.strerror, e.filename))
Guido van Rossumb940e112007-01-10 16:19:56 +00002129 except ExtractError as e:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002130 if self.errorlevel > 1:
2131 raise
2132 else:
2133 self._dbg(1, "tarfile: %s" % e)
2134
2135 def extractfile(self, member):
2136 """Extract a member from the archive as a file object. `member' may be
2137 a filename or a TarInfo object. If `member' is a regular file, a
2138 file-like object is returned. If `member' is a link, a file-like
2139 object is constructed from the link's target. If `member' is none of
2140 the above, None is returned.
2141 The file-like object is read-only and provides the following
2142 methods: read(), readline(), readlines(), seek() and tell()
2143 """
2144 self._check("r")
2145
Guido van Rossum3172c5d2007-10-16 18:12:55 +00002146 if isinstance(member, str):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002147 tarinfo = self.getmember(member)
Guido van Rossumd8faa362007-04-27 19:54:29 +00002148 else:
2149 tarinfo = member
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002150
2151 if tarinfo.isreg():
2152 return self.fileobject(self, tarinfo)
2153
2154 elif tarinfo.type not in SUPPORTED_TYPES:
2155 # If a member's type is unknown, it is treated as a
2156 # regular file.
2157 return self.fileobject(self, tarinfo)
2158
2159 elif tarinfo.islnk() or tarinfo.issym():
2160 if isinstance(self.fileobj, _Stream):
2161 # A small but ugly workaround for the case that someone tries
2162 # to extract a (sym)link as a file-object from a non-seekable
2163 # stream of tar blocks.
Thomas Wouters477c8d52006-05-27 19:21:47 +00002164 raise StreamError("cannot extract (sym)link as file object")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002165 else:
Georg Brandl7eb4b7d2005-07-22 21:49:32 +00002166 # A (sym)link's file object is its target's file object.
Lars Gustäbel1b512722010-06-03 12:45:16 +00002167 return self.extractfile(self._find_link_target(tarinfo))
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002168 else:
2169 # If there's no data associated with the member (directory, chrdev,
2170 # blkdev, etc.), return None instead of a file object.
2171 return None
2172
2173 def _extract_member(self, tarinfo, targetpath):
2174 """Extract the TarInfo object tarinfo to a physical
2175 file called targetpath.
2176 """
2177 # Fetch the TarInfo object for the given name
2178 # and build the destination pathname, replacing
2179 # forward slashes to platform specific separators.
Lars Gustäbelbfdfdda2009-08-28 19:59:59 +00002180 targetpath = targetpath.rstrip("/")
2181 targetpath = targetpath.replace("/", os.sep)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002182
2183 # Create all upper directories.
2184 upperdirs = os.path.dirname(targetpath)
2185 if upperdirs and not os.path.exists(upperdirs):
Christian Heimes2202f872008-02-06 14:31:34 +00002186 # Create directories that are not part of the archive with
2187 # default permissions.
Thomas Woutersb2137042007-02-01 18:02:27 +00002188 os.makedirs(upperdirs)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002189
2190 if tarinfo.islnk() or tarinfo.issym():
2191 self._dbg(1, "%s -> %s" % (tarinfo.name, tarinfo.linkname))
2192 else:
2193 self._dbg(1, tarinfo.name)
2194
2195 if tarinfo.isreg():
2196 self.makefile(tarinfo, targetpath)
2197 elif tarinfo.isdir():
2198 self.makedir(tarinfo, targetpath)
2199 elif tarinfo.isfifo():
2200 self.makefifo(tarinfo, targetpath)
2201 elif tarinfo.ischr() or tarinfo.isblk():
2202 self.makedev(tarinfo, targetpath)
2203 elif tarinfo.islnk() or tarinfo.issym():
2204 self.makelink(tarinfo, targetpath)
2205 elif tarinfo.type not in SUPPORTED_TYPES:
2206 self.makeunknown(tarinfo, targetpath)
2207 else:
2208 self.makefile(tarinfo, targetpath)
2209
2210 self.chown(tarinfo, targetpath)
2211 if not tarinfo.issym():
2212 self.chmod(tarinfo, targetpath)
2213 self.utime(tarinfo, targetpath)
2214
2215 #--------------------------------------------------------------------------
2216 # Below are the different file methods. They are called via
2217 # _extract_member() when extract() is called. They can be replaced in a
2218 # subclass to implement other functionality.
2219
2220 def makedir(self, tarinfo, targetpath):
2221 """Make a directory called targetpath.
2222 """
2223 try:
Christian Heimes2202f872008-02-06 14:31:34 +00002224 # Use a safe mode for the directory, the real mode is set
2225 # later in _extract_member().
2226 os.mkdir(targetpath, 0o700)
Guido van Rossumb940e112007-01-10 16:19:56 +00002227 except EnvironmentError as e:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002228 if e.errno != errno.EEXIST:
2229 raise
2230
2231 def makefile(self, tarinfo, targetpath):
2232 """Make a file called targetpath.
2233 """
2234 source = self.extractfile(tarinfo)
Guido van Rossume7ba4952007-06-06 23:52:48 +00002235 target = bltn_open(targetpath, "wb")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002236 copyfileobj(source, target)
2237 source.close()
2238 target.close()
2239
2240 def makeunknown(self, tarinfo, targetpath):
2241 """Make a file from a TarInfo object with an unknown type
2242 at targetpath.
2243 """
2244 self.makefile(tarinfo, targetpath)
2245 self._dbg(1, "tarfile: Unknown file type %r, " \
2246 "extracted as regular file." % tarinfo.type)
2247
2248 def makefifo(self, tarinfo, targetpath):
2249 """Make a fifo called targetpath.
2250 """
2251 if hasattr(os, "mkfifo"):
2252 os.mkfifo(targetpath)
2253 else:
Thomas Wouters477c8d52006-05-27 19:21:47 +00002254 raise ExtractError("fifo not supported by system")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002255
2256 def makedev(self, tarinfo, targetpath):
2257 """Make a character or block device called targetpath.
2258 """
2259 if not hasattr(os, "mknod") or not hasattr(os, "makedev"):
Thomas Wouters477c8d52006-05-27 19:21:47 +00002260 raise ExtractError("special devices not supported by system")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002261
2262 mode = tarinfo.mode
2263 if tarinfo.isblk():
2264 mode |= stat.S_IFBLK
2265 else:
2266 mode |= stat.S_IFCHR
2267
2268 os.mknod(targetpath, mode,
2269 os.makedev(tarinfo.devmajor, tarinfo.devminor))
2270
2271 def makelink(self, tarinfo, targetpath):
2272 """Make a (symbolic) link called targetpath. If it cannot be created
2273 (platform limitation), we try to make a copy of the referenced file
2274 instead of a link.
2275 """
Brian Curtind40e6f72010-07-08 21:39:08 +00002276 try:
Lars Gustäbel1b512722010-06-03 12:45:16 +00002277 # For systems that support symbolic and hard links.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002278 if tarinfo.issym():
Lars Gustäbelbfdfdda2009-08-28 19:59:59 +00002279 os.symlink(tarinfo.linkname, targetpath)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002280 else:
Neal Norwitza4f651a2004-07-20 22:07:44 +00002281 # See extract().
Lars Gustäbel1b512722010-06-03 12:45:16 +00002282 if os.path.exists(tarinfo._link_target):
2283 os.link(tarinfo._link_target, targetpath)
2284 else:
Brian Curtind40e6f72010-07-08 21:39:08 +00002285 self._extract_mem
2286 except (AttributeError, NotImplementedError, WindowsError):
2287 # AttributeError if no os.symlink
2288 # NotImplementedError if on Windows XP
2289 # WindowsError (1314) if the required privilege is not held by the client
2290 if tarinfo.issym():
2291 linkpath = os.path.join(os.path.dirname(tarinfo.name),tarinfo.linkname)
2292 else:
2293 linkpath = tarinfo.linkname
Lars Gustäbel1b512722010-06-03 12:45:16 +00002294 else:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002295 try:
Lars Gustäbel1b512722010-06-03 12:45:16 +00002296 self._extract_member(self._find_link_target(tarinfo), targetpath)
2297 except KeyError:
2298 raise ExtractError("unable to resolve link inside archive")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002299
2300 def chown(self, tarinfo, targetpath):
2301 """Set owner of targetpath according to tarinfo.
2302 """
2303 if pwd and hasattr(os, "geteuid") and os.geteuid() == 0:
2304 # We have to be root to do so.
2305 try:
2306 g = grp.getgrnam(tarinfo.gname)[2]
2307 except KeyError:
2308 try:
2309 g = grp.getgrgid(tarinfo.gid)[2]
2310 except KeyError:
2311 g = os.getgid()
2312 try:
2313 u = pwd.getpwnam(tarinfo.uname)[2]
2314 except KeyError:
2315 try:
2316 u = pwd.getpwuid(tarinfo.uid)[2]
2317 except KeyError:
2318 u = os.getuid()
2319 try:
2320 if tarinfo.issym() and hasattr(os, "lchown"):
2321 os.lchown(targetpath, u, g)
2322 else:
Andrew MacIntyre7970d202003-02-19 12:51:34 +00002323 if sys.platform != "os2emx":
2324 os.chown(targetpath, u, g)
Guido van Rossumb940e112007-01-10 16:19:56 +00002325 except EnvironmentError as e:
Thomas Wouters477c8d52006-05-27 19:21:47 +00002326 raise ExtractError("could not change owner")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002327
2328 def chmod(self, tarinfo, targetpath):
2329 """Set file permissions of targetpath according to tarinfo.
2330 """
Jack Jansen834eff62003-03-07 12:47:06 +00002331 if hasattr(os, 'chmod'):
2332 try:
2333 os.chmod(targetpath, tarinfo.mode)
Guido van Rossumb940e112007-01-10 16:19:56 +00002334 except EnvironmentError as e:
Thomas Wouters477c8d52006-05-27 19:21:47 +00002335 raise ExtractError("could not change mode")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002336
2337 def utime(self, tarinfo, targetpath):
2338 """Set modification time of targetpath according to tarinfo.
2339 """
Jack Jansen834eff62003-03-07 12:47:06 +00002340 if not hasattr(os, 'utime'):
Tim Petersf9347782003-03-07 15:36:41 +00002341 return
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002342 try:
2343 os.utime(targetpath, (tarinfo.mtime, tarinfo.mtime))
Guido van Rossumb940e112007-01-10 16:19:56 +00002344 except EnvironmentError as e:
Thomas Wouters477c8d52006-05-27 19:21:47 +00002345 raise ExtractError("could not change modification time")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002346
2347 #--------------------------------------------------------------------------
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002348 def next(self):
2349 """Return the next member of the archive as a TarInfo object, when
2350 TarFile is opened for reading. Return None if there is no more
2351 available.
2352 """
2353 self._check("ra")
2354 if self.firstmember is not None:
2355 m = self.firstmember
2356 self.firstmember = None
2357 return m
2358
2359 # Read the next block.
Andrew M. Kuchling864bba12004-07-10 22:02:11 +00002360 self.fileobj.seek(self.offset)
Lars Gustäbel9520a432009-11-22 18:48:49 +00002361 tarinfo = None
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002362 while True:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002363 try:
Guido van Rossumd8faa362007-04-27 19:54:29 +00002364 tarinfo = self.tarinfo.fromtarfile(self)
Lars Gustäbel9520a432009-11-22 18:48:49 +00002365 except EOFHeaderError as e:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002366 if self.ignore_zeros:
Thomas Wouters902d6eb2007-01-09 23:18:33 +00002367 self._dbg(2, "0x%X: %s" % (self.offset, e))
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002368 self.offset += BLOCKSIZE
2369 continue
Lars Gustäbel9520a432009-11-22 18:48:49 +00002370 except InvalidHeaderError as e:
2371 if self.ignore_zeros:
2372 self._dbg(2, "0x%X: %s" % (self.offset, e))
2373 self.offset += BLOCKSIZE
2374 continue
2375 elif self.offset == 0:
2376 raise ReadError(str(e))
2377 except EmptyHeaderError:
2378 if self.offset == 0:
2379 raise ReadError("empty file")
2380 except TruncatedHeaderError as e:
2381 if self.offset == 0:
2382 raise ReadError(str(e))
2383 except SubsequentHeaderError as e:
2384 raise ReadError(str(e))
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002385 break
2386
Lars Gustäbel9520a432009-11-22 18:48:49 +00002387 if tarinfo is not None:
2388 self.members.append(tarinfo)
2389 else:
2390 self._loaded = True
2391
Thomas Wouters477c8d52006-05-27 19:21:47 +00002392 return tarinfo
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002393
2394 #--------------------------------------------------------------------------
2395 # Little helper methods:
2396
Lars Gustäbel1b512722010-06-03 12:45:16 +00002397 def _getmember(self, name, tarinfo=None, normalize=False):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002398 """Find an archive member by name from bottom to top.
2399 If tarinfo is given, it is used as the starting point.
2400 """
Martin v. Löwisf3c56112004-09-18 09:08:52 +00002401 # Ensure that all members have been loaded.
2402 members = self.getmembers()
2403
Lars Gustäbel1b512722010-06-03 12:45:16 +00002404 # Limit the member search list up to tarinfo.
2405 if tarinfo is not None:
2406 members = members[:members.index(tarinfo)]
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002407
Lars Gustäbel1b512722010-06-03 12:45:16 +00002408 if normalize:
2409 name = os.path.normpath(name)
2410
2411 for member in reversed(members):
2412 if normalize:
2413 member_name = os.path.normpath(member.name)
2414 else:
2415 member_name = member.name
2416
2417 if name == member_name:
2418 return member
Andrew M. Kuchling864bba12004-07-10 22:02:11 +00002419
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002420 def _load(self):
2421 """Read through the entire archive file and look for readable
2422 members.
2423 """
2424 while True:
2425 tarinfo = self.next()
2426 if tarinfo is None:
2427 break
2428 self._loaded = True
2429
2430 def _check(self, mode=None):
2431 """Check if TarFile is still open, and if the operation's mode
2432 corresponds to TarFile's mode.
2433 """
2434 if self.closed:
Thomas Wouters477c8d52006-05-27 19:21:47 +00002435 raise IOError("%s is closed" % self.__class__.__name__)
Guido van Rossumd8faa362007-04-27 19:54:29 +00002436 if mode is not None and self.mode not in mode:
2437 raise IOError("bad operation for mode %r" % self.mode)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002438
Lars Gustäbel1b512722010-06-03 12:45:16 +00002439 def _find_link_target(self, tarinfo):
2440 """Find the target member of a symlink or hardlink member in the
2441 archive.
2442 """
2443 if tarinfo.issym():
2444 # Always search the entire archive.
2445 linkname = os.path.dirname(tarinfo.name) + "/" + tarinfo.linkname
2446 limit = None
2447 else:
2448 # Search the archive before the link, because a hard link is
2449 # just a reference to an already archived file.
2450 linkname = tarinfo.linkname
2451 limit = tarinfo
2452
2453 member = self._getmember(linkname, tarinfo=limit, normalize=True)
2454 if member is None:
2455 raise KeyError("linkname %r not found" % linkname)
2456 return member
2457
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002458 def __iter__(self):
2459 """Provide an iterator object.
2460 """
2461 if self._loaded:
2462 return iter(self.members)
2463 else:
2464 return TarIter(self)
2465
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002466 def _dbg(self, level, msg):
2467 """Write debugging output to sys.stderr.
2468 """
2469 if level <= self.debug:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00002470 print(msg, file=sys.stderr)
Lars Gustäbel01385812010-03-03 12:08:54 +00002471
2472 def __enter__(self):
2473 self._check()
2474 return self
2475
2476 def __exit__(self, type, value, traceback):
2477 if type is None:
2478 self.close()
2479 else:
2480 # An exception occurred. We must not call close() because
2481 # it would try to write end-of-archive blocks and padding.
2482 if not self._extfileobj:
2483 self.fileobj.close()
2484 self.closed = True
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002485# class TarFile
2486
2487class TarIter:
2488 """Iterator Class.
2489
2490 for tarinfo in TarFile(...):
2491 suite...
2492 """
2493
2494 def __init__(self, tarfile):
2495 """Construct a TarIter object.
2496 """
2497 self.tarfile = tarfile
Martin v. Löwis637431b2005-03-03 23:12:42 +00002498 self.index = 0
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002499 def __iter__(self):
2500 """Return iterator object.
2501 """
2502 return self
Georg Brandla18af4e2007-04-21 15:47:16 +00002503 def __next__(self):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002504 """Return the next item using TarFile's next() method.
2505 When all members have been read, set TarFile as _loaded.
2506 """
Martin v. Löwis637431b2005-03-03 23:12:42 +00002507 # Fix for SF #1100429: Under rare circumstances it can
2508 # happen that getmembers() is called during iteration,
2509 # which will cause TarIter to stop prematurely.
2510 if not self.tarfile._loaded:
2511 tarinfo = self.tarfile.next()
2512 if not tarinfo:
2513 self.tarfile._loaded = True
2514 raise StopIteration
2515 else:
2516 try:
2517 tarinfo = self.tarfile.members[self.index]
2518 except IndexError:
2519 raise StopIteration
2520 self.index += 1
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002521 return tarinfo
2522
2523# Helper classes for sparse file support
2524class _section:
2525 """Base class for _data and _hole.
2526 """
2527 def __init__(self, offset, size):
2528 self.offset = offset
2529 self.size = size
2530 def __contains__(self, offset):
2531 return self.offset <= offset < self.offset + self.size
2532
2533class _data(_section):
2534 """Represent a data section in a sparse file.
2535 """
2536 def __init__(self, offset, size, realpos):
2537 _section.__init__(self, offset, size)
2538 self.realpos = realpos
2539
2540class _hole(_section):
2541 """Represent a hole section in a sparse file.
2542 """
2543 pass
2544
2545class _ringbuffer(list):
2546 """Ringbuffer class which increases performance
2547 over a regular list.
2548 """
2549 def __init__(self):
2550 self.idx = 0
2551 def find(self, offset):
2552 idx = self.idx
2553 while True:
2554 item = self[idx]
2555 if offset in item:
2556 break
2557 idx += 1
2558 if idx == len(self):
2559 idx = 0
2560 if idx == self.idx:
2561 # End of File
2562 return None
2563 self.idx = idx
2564 return item
2565
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002566#--------------------
2567# exported functions
2568#--------------------
2569def is_tarfile(name):
2570 """Return True if name points to a tar archive that we
2571 are able to handle, else return False.
2572 """
2573 try:
2574 t = open(name)
2575 t.close()
2576 return True
2577 except TarError:
2578 return False
2579
Guido van Rossume7ba4952007-06-06 23:52:48 +00002580bltn_open = open
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002581open = TarFile.open