blob: 86844932a501185627fe3245358e835a07f1966e [file] [log] [blame]
Benjamin Peterson90f5ba52010-03-11 22:53:45 +00001#!/usr/bin/env python3
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002#-------------------------------------------------------------------
3# tarfile.py
4#-------------------------------------------------------------------
Christian Heimes9c1257e2007-11-04 11:37:22 +00005# Copyright (C) 2002 Lars Gustaebel <lars@gustaebel.de>
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00006# All rights reserved.
7#
8# Permission is hereby granted, free of charge, to any person
9# obtaining a copy of this software and associated documentation
10# files (the "Software"), to deal in the Software without
11# restriction, including without limitation the rights to use,
12# copy, modify, merge, publish, distribute, sublicense, and/or sell
13# copies of the Software, and to permit persons to whom the
14# Software is furnished to do so, subject to the following
15# conditions:
16#
17# The above copyright notice and this permission notice shall be
18# included in all copies or substantial portions of the Software.
19#
20# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
21# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
22# OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
23# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
24# HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
25# WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
26# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
27# OTHER DEALINGS IN THE SOFTWARE.
28#
29"""Read from and write to tar format archives.
30"""
31
32__version__ = "$Revision$"
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +000033
Guido van Rossumd8faa362007-04-27 19:54:29 +000034version = "0.9.0"
Guido van Rossum98297ee2007-11-06 21:34:58 +000035__author__ = "Lars Gust\u00e4bel (lars@gustaebel.de)"
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +000036__date__ = "$Date$"
37__cvsid__ = "$Id$"
Guido van Rossum98297ee2007-11-06 21:34:58 +000038__credits__ = "Gustavo Niemeyer, Niels Gust\u00e4bel, Richard Townsend."
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +000039
40#---------
41# Imports
42#---------
43import sys
44import os
45import shutil
46import stat
47import errno
48import time
49import struct
Thomas Wouters89f507f2006-12-13 04:49:30 +000050import copy
Guido van Rossumd8faa362007-04-27 19:54:29 +000051import re
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +000052
Jack Jansencfc49022003-03-07 13:37:32 +000053if sys.platform == 'mac':
54 # This module needs work for MacOS9, especially in the area of pathname
55 # handling. In many places it is assumed a simple substitution of / by the
56 # local os.path.sep is good enough to convert pathnames, but this does not
57 # work with the mac rooted:path:name versus :nonrooted:path:name syntax
Collin Winterce36ad82007-08-30 01:19:48 +000058 raise ImportError("tarfile does not work for platform==mac")
Jack Jansencfc49022003-03-07 13:37:32 +000059
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +000060try:
61 import grp, pwd
62except ImportError:
63 grp = pwd = None
64
65# from tarfile import *
66__all__ = ["TarFile", "TarInfo", "is_tarfile", "TarError"]
67
Georg Brandl1a3284e2007-12-02 09:40:06 +000068from builtins import open as _open # Since 'open' is TarFile.open
Guido van Rossum8f78fe92006-08-24 04:03:53 +000069
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +000070#---------------------------------------------------------
71# tar constants
72#---------------------------------------------------------
Lars Gustäbelb506dc32007-08-07 18:36:16 +000073NUL = b"\0" # the null character
Guido van Rossumd8faa362007-04-27 19:54:29 +000074BLOCKSIZE = 512 # length of processing blocks
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +000075RECORDSIZE = BLOCKSIZE * 20 # length of records
Lars Gustäbelb506dc32007-08-07 18:36:16 +000076GNU_MAGIC = b"ustar \0" # magic gnu tar string
77POSIX_MAGIC = b"ustar\x0000" # magic posix tar string
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +000078
Guido van Rossumd8faa362007-04-27 19:54:29 +000079LENGTH_NAME = 100 # maximum length of a filename
80LENGTH_LINK = 100 # maximum length of a linkname
81LENGTH_PREFIX = 155 # maximum length of the prefix field
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +000082
Lars Gustäbelb506dc32007-08-07 18:36:16 +000083REGTYPE = b"0" # regular file
84AREGTYPE = b"\0" # regular file
85LNKTYPE = b"1" # link (inside tarfile)
86SYMTYPE = b"2" # symbolic link
87CHRTYPE = b"3" # character special device
88BLKTYPE = b"4" # block special device
89DIRTYPE = b"5" # directory
90FIFOTYPE = b"6" # fifo special device
91CONTTYPE = b"7" # contiguous file
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +000092
Lars Gustäbelb506dc32007-08-07 18:36:16 +000093GNUTYPE_LONGNAME = b"L" # GNU tar longname
94GNUTYPE_LONGLINK = b"K" # GNU tar longlink
95GNUTYPE_SPARSE = b"S" # GNU tar sparse file
Guido van Rossumd8faa362007-04-27 19:54:29 +000096
Lars Gustäbelb506dc32007-08-07 18:36:16 +000097XHDTYPE = b"x" # POSIX.1-2001 extended header
98XGLTYPE = b"g" # POSIX.1-2001 global header
99SOLARIS_XHDTYPE = b"X" # Solaris extended header
Guido van Rossumd8faa362007-04-27 19:54:29 +0000100
101USTAR_FORMAT = 0 # POSIX.1-1988 (ustar) format
102GNU_FORMAT = 1 # GNU tar format
103PAX_FORMAT = 2 # POSIX.1-2001 (pax) format
104DEFAULT_FORMAT = GNU_FORMAT
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000105
106#---------------------------------------------------------
107# tarfile constants
108#---------------------------------------------------------
Guido van Rossumd8faa362007-04-27 19:54:29 +0000109# File types that tarfile supports:
110SUPPORTED_TYPES = (REGTYPE, AREGTYPE, LNKTYPE,
111 SYMTYPE, DIRTYPE, FIFOTYPE,
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000112 CONTTYPE, CHRTYPE, BLKTYPE,
113 GNUTYPE_LONGNAME, GNUTYPE_LONGLINK,
114 GNUTYPE_SPARSE)
115
Guido van Rossumd8faa362007-04-27 19:54:29 +0000116# File types that will be treated as a regular file.
117REGULAR_TYPES = (REGTYPE, AREGTYPE,
118 CONTTYPE, GNUTYPE_SPARSE)
119
120# File types that are part of the GNU tar format.
121GNU_TYPES = (GNUTYPE_LONGNAME, GNUTYPE_LONGLINK,
122 GNUTYPE_SPARSE)
123
124# Fields from a pax header that override a TarInfo attribute.
125PAX_FIELDS = ("path", "linkpath", "size", "mtime",
126 "uid", "gid", "uname", "gname")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000127
Guido van Rossume7ba4952007-06-06 23:52:48 +0000128# Fields in a pax header that are numbers, all other fields
129# are treated as strings.
130PAX_NUMBER_FIELDS = {
131 "atime": float,
132 "ctime": float,
133 "mtime": float,
134 "uid": int,
135 "gid": int,
136 "size": int
137}
138
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000139#---------------------------------------------------------
140# Bits used in the mode field, values in octal.
141#---------------------------------------------------------
Guido van Rossumcd16bf62007-06-13 18:07:49 +0000142S_IFLNK = 0o120000 # symbolic link
143S_IFREG = 0o100000 # regular file
144S_IFBLK = 0o060000 # block device
145S_IFDIR = 0o040000 # directory
146S_IFCHR = 0o020000 # character device
147S_IFIFO = 0o010000 # fifo
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000148
Guido van Rossumcd16bf62007-06-13 18:07:49 +0000149TSUID = 0o4000 # set UID on execution
150TSGID = 0o2000 # set GID on execution
151TSVTX = 0o1000 # reserved
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000152
Guido van Rossumcd16bf62007-06-13 18:07:49 +0000153TUREAD = 0o400 # read by owner
154TUWRITE = 0o200 # write by owner
155TUEXEC = 0o100 # execute/search by owner
156TGREAD = 0o040 # read by group
157TGWRITE = 0o020 # write by group
158TGEXEC = 0o010 # execute/search by group
159TOREAD = 0o004 # read by other
160TOWRITE = 0o002 # write by other
161TOEXEC = 0o001 # execute/search by other
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000162
163#---------------------------------------------------------
Guido van Rossumd8faa362007-04-27 19:54:29 +0000164# initialization
165#---------------------------------------------------------
166ENCODING = sys.getfilesystemencoding()
167if ENCODING is None:
Lars Gustäbel3741eff2007-08-21 12:17:05 +0000168 ENCODING = "ascii"
Guido van Rossumd8faa362007-04-27 19:54:29 +0000169
170#---------------------------------------------------------
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000171# Some useful functions
172#---------------------------------------------------------
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000173
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000174def stn(s, length, encoding, errors):
175 """Convert a string to a null-terminated bytes object.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000176 """
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000177 s = s.encode(encoding, errors)
Thomas Wouters89f507f2006-12-13 04:49:30 +0000178 return s[:length] + (length - len(s)) * NUL
Thomas Wouters477c8d52006-05-27 19:21:47 +0000179
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000180def nts(s, encoding, errors):
181 """Convert a null-terminated bytes object to a string.
Guido van Rossumd8faa362007-04-27 19:54:29 +0000182 """
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000183 p = s.find(b"\0")
184 if p != -1:
185 s = s[:p]
186 return s.decode(encoding, errors)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000187
Thomas Wouters477c8d52006-05-27 19:21:47 +0000188def nti(s):
189 """Convert a number field to a python number.
190 """
191 # There are two possible encodings for a number field, see
192 # itn() below.
Guido van Rossumcd16bf62007-06-13 18:07:49 +0000193 if s[0] != chr(0o200):
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000194 try:
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000195 n = int(nts(s, "ascii", "strict") or "0", 8)
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000196 except ValueError:
Lars Gustäbel9520a432009-11-22 18:48:49 +0000197 raise InvalidHeaderError("invalid header")
Thomas Wouters477c8d52006-05-27 19:21:47 +0000198 else:
Guido van Rossume2a383d2007-01-15 16:59:06 +0000199 n = 0
Guido van Rossum805365e2007-05-07 22:24:25 +0000200 for i in range(len(s) - 1):
Thomas Wouters477c8d52006-05-27 19:21:47 +0000201 n <<= 8
202 n += ord(s[i + 1])
203 return n
204
Guido van Rossumd8faa362007-04-27 19:54:29 +0000205def itn(n, digits=8, format=DEFAULT_FORMAT):
Thomas Wouters477c8d52006-05-27 19:21:47 +0000206 """Convert a python number to a number field.
207 """
208 # POSIX 1003.1-1988 requires numbers to be encoded as a string of
209 # octal digits followed by a null-byte, this allows values up to
210 # (8**(digits-1))-1. GNU tar allows storing numbers greater than
Guido van Rossumcd16bf62007-06-13 18:07:49 +0000211 # that if necessary. A leading 0o200 byte indicates this particular
Thomas Wouters477c8d52006-05-27 19:21:47 +0000212 # encoding, the following digits-1 bytes are a big-endian
213 # representation. This allows values up to (256**(digits-1))-1.
214 if 0 <= n < 8 ** (digits - 1):
Lars Gustäbela280ca752007-08-28 07:34:33 +0000215 s = bytes("%0*o" % (digits - 1, n), "ascii") + NUL
Thomas Wouters477c8d52006-05-27 19:21:47 +0000216 else:
Guido van Rossumd8faa362007-04-27 19:54:29 +0000217 if format != GNU_FORMAT or n >= 256 ** (digits - 1):
Thomas Wouters477c8d52006-05-27 19:21:47 +0000218 raise ValueError("overflow in number field")
219
220 if n < 0:
221 # XXX We mimic GNU tar's behaviour with negative numbers,
222 # this could raise OverflowError.
223 n = struct.unpack("L", struct.pack("l", n))[0]
224
Guido van Rossum254348e2007-11-21 19:29:53 +0000225 s = bytearray()
Guido van Rossum805365e2007-05-07 22:24:25 +0000226 for i in range(digits - 1):
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000227 s.insert(0, n & 0o377)
Thomas Wouters477c8d52006-05-27 19:21:47 +0000228 n >>= 8
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000229 s.insert(0, 0o200)
Thomas Wouters477c8d52006-05-27 19:21:47 +0000230 return s
231
232def calc_chksums(buf):
233 """Calculate the checksum for a member's header by summing up all
234 characters except for the chksum field which is treated as if
235 it was filled with spaces. According to the GNU tar sources,
236 some tars (Sun and NeXT) calculate chksum with signed char,
237 which will be different if there are chars in the buffer with
238 the high bit set. So we calculate two checksums, unsigned and
239 signed.
240 """
241 unsigned_chksum = 256 + sum(struct.unpack("148B", buf[:148]) + struct.unpack("356B", buf[156:512]))
242 signed_chksum = 256 + sum(struct.unpack("148b", buf[:148]) + struct.unpack("356b", buf[156:512]))
243 return unsigned_chksum, signed_chksum
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000244
245def copyfileobj(src, dst, length=None):
246 """Copy length bytes from fileobj src to fileobj dst.
247 If length is None, copy the entire content.
248 """
249 if length == 0:
250 return
251 if length is None:
252 shutil.copyfileobj(src, dst)
253 return
254
255 BUFSIZE = 16 * 1024
256 blocks, remainder = divmod(length, BUFSIZE)
Guido van Rossum805365e2007-05-07 22:24:25 +0000257 for b in range(blocks):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000258 buf = src.read(BUFSIZE)
259 if len(buf) < BUFSIZE:
Thomas Wouters477c8d52006-05-27 19:21:47 +0000260 raise IOError("end of file reached")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000261 dst.write(buf)
262
263 if remainder != 0:
264 buf = src.read(remainder)
265 if len(buf) < remainder:
Thomas Wouters477c8d52006-05-27 19:21:47 +0000266 raise IOError("end of file reached")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000267 dst.write(buf)
268 return
269
270filemode_table = (
Andrew M. Kuchling8bc462f2004-10-20 11:48:42 +0000271 ((S_IFLNK, "l"),
272 (S_IFREG, "-"),
273 (S_IFBLK, "b"),
274 (S_IFDIR, "d"),
275 (S_IFCHR, "c"),
276 (S_IFIFO, "p")),
277
278 ((TUREAD, "r"),),
279 ((TUWRITE, "w"),),
280 ((TUEXEC|TSUID, "s"),
281 (TSUID, "S"),
282 (TUEXEC, "x")),
283
284 ((TGREAD, "r"),),
285 ((TGWRITE, "w"),),
286 ((TGEXEC|TSGID, "s"),
287 (TSGID, "S"),
288 (TGEXEC, "x")),
289
290 ((TOREAD, "r"),),
291 ((TOWRITE, "w"),),
292 ((TOEXEC|TSVTX, "t"),
293 (TSVTX, "T"),
294 (TOEXEC, "x"))
295)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000296
297def filemode(mode):
298 """Convert a file's mode to a string of the form
299 -rwxrwxrwx.
300 Used by TarFile.list()
301 """
Andrew M. Kuchling8bc462f2004-10-20 11:48:42 +0000302 perm = []
303 for table in filemode_table:
304 for bit, char in table:
305 if mode & bit == bit:
306 perm.append(char)
307 break
308 else:
309 perm.append("-")
310 return "".join(perm)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000311
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000312class TarError(Exception):
313 """Base exception."""
314 pass
315class ExtractError(TarError):
316 """General exception for extract errors."""
317 pass
318class ReadError(TarError):
319 """Exception for unreadble tar archives."""
320 pass
321class CompressionError(TarError):
322 """Exception for unavailable compression methods."""
323 pass
324class StreamError(TarError):
325 """Exception for unsupported operations on stream-like TarFiles."""
326 pass
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000327class HeaderError(TarError):
Lars Gustäbel9520a432009-11-22 18:48:49 +0000328 """Base exception for header errors."""
329 pass
330class EmptyHeaderError(HeaderError):
331 """Exception for empty headers."""
332 pass
333class TruncatedHeaderError(HeaderError):
334 """Exception for truncated headers."""
335 pass
336class EOFHeaderError(HeaderError):
337 """Exception for end of file headers."""
338 pass
339class InvalidHeaderError(HeaderError):
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000340 """Exception for invalid headers."""
341 pass
Lars Gustäbel9520a432009-11-22 18:48:49 +0000342class SubsequentHeaderError(HeaderError):
343 """Exception for missing and invalid extended headers."""
344 pass
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000345
346#---------------------------
347# internal stream interface
348#---------------------------
349class _LowLevelFile:
350 """Low-level file object. Supports reading and writing.
351 It is used instead of a regular file object for streaming
352 access.
353 """
354
355 def __init__(self, name, mode):
356 mode = {
357 "r": os.O_RDONLY,
358 "w": os.O_WRONLY | os.O_CREAT | os.O_TRUNC,
359 }[mode]
360 if hasattr(os, "O_BINARY"):
361 mode |= os.O_BINARY
362 self.fd = os.open(name, mode)
363
364 def close(self):
365 os.close(self.fd)
366
367 def read(self, size):
368 return os.read(self.fd, size)
369
370 def write(self, s):
371 os.write(self.fd, s)
372
373class _Stream:
374 """Class that serves as an adapter between TarFile and
375 a stream-like object. The stream-like object only
376 needs to have a read() or write() method and is accessed
377 blockwise. Use of gzip or bzip2 compression is possible.
378 A stream-like object could be for example: sys.stdin,
379 sys.stdout, a socket, a tape device etc.
380
381 _Stream is intended to be used only internally.
382 """
383
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000384 def __init__(self, name, mode, comptype, fileobj, bufsize):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000385 """Construct a _Stream object.
386 """
387 self._extfileobj = True
388 if fileobj is None:
389 fileobj = _LowLevelFile(name, mode)
390 self._extfileobj = False
391
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000392 if comptype == '*':
393 # Enable transparent compression detection for the
394 # stream interface
395 fileobj = _StreamProxy(fileobj)
396 comptype = fileobj.getcomptype()
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000397
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000398 self.name = name or ""
399 self.mode = mode
400 self.comptype = comptype
401 self.fileobj = fileobj
402 self.bufsize = bufsize
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000403 self.buf = b""
Guido van Rossume2a383d2007-01-15 16:59:06 +0000404 self.pos = 0
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000405 self.closed = False
406
407 if comptype == "gz":
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000408 try:
409 import zlib
410 except ImportError:
Thomas Wouters477c8d52006-05-27 19:21:47 +0000411 raise CompressionError("zlib module is not available")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000412 self.zlib = zlib
Antoine Pitrou77b338b2009-12-14 18:00:06 +0000413 self.crc = zlib.crc32(b"")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000414 if mode == "r":
415 self._init_read_gz()
416 else:
417 self._init_write_gz()
418
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000419 if comptype == "bz2":
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000420 try:
421 import bz2
422 except ImportError:
Thomas Wouters477c8d52006-05-27 19:21:47 +0000423 raise CompressionError("bz2 module is not available")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000424 if mode == "r":
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000425 self.dbuf = b""
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000426 self.cmp = bz2.BZ2Decompressor()
427 else:
428 self.cmp = bz2.BZ2Compressor()
429
430 def __del__(self):
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000431 if hasattr(self, "closed") and not self.closed:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000432 self.close()
433
434 def _init_write_gz(self):
435 """Initialize for writing with gzip compression.
436 """
437 self.cmp = self.zlib.compressobj(9, self.zlib.DEFLATED,
438 -self.zlib.MAX_WBITS,
439 self.zlib.DEF_MEM_LEVEL,
440 0)
Guido van Rossume2a383d2007-01-15 16:59:06 +0000441 timestamp = struct.pack("<L", int(time.time()))
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000442 self.__write(b"\037\213\010\010" + timestamp + b"\002\377")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000443 if self.name.endswith(".gz"):
444 self.name = self.name[:-3]
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000445 # RFC1952 says we must use ISO-8859-1 for the FNAME field.
446 self.__write(self.name.encode("iso-8859-1", "replace") + NUL)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000447
448 def write(self, s):
449 """Write string s to the stream.
450 """
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000451 if self.comptype == "gz":
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000452 self.crc = self.zlib.crc32(s, self.crc)
453 self.pos += len(s)
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000454 if self.comptype != "tar":
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000455 s = self.cmp.compress(s)
456 self.__write(s)
457
458 def __write(self, s):
459 """Write string s to the stream if a whole new block
460 is ready to be written.
461 """
462 self.buf += s
463 while len(self.buf) > self.bufsize:
464 self.fileobj.write(self.buf[:self.bufsize])
465 self.buf = self.buf[self.bufsize:]
466
467 def close(self):
468 """Close the _Stream object. No operation should be
469 done on it afterwards.
470 """
471 if self.closed:
472 return
473
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000474 if self.mode == "w" and self.comptype != "tar":
Martin v. Löwisc234a522004-08-22 21:28:33 +0000475 self.buf += self.cmp.flush()
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000476
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000477 if self.mode == "w" and self.buf:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000478 self.fileobj.write(self.buf)
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000479 self.buf = b""
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000480 if self.comptype == "gz":
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000481 # The native zlib crc is an unsigned 32-bit integer, but
482 # the Python wrapper implicitly casts that to a signed C
483 # long. So, on a 32-bit box self.crc may "look negative",
484 # while the same crc on a 64-bit box may "look positive".
485 # To avoid irksome warnings from the `struct` module, force
486 # it to look positive on all boxes.
Guido van Rossume2a383d2007-01-15 16:59:06 +0000487 self.fileobj.write(struct.pack("<L", self.crc & 0xffffffff))
488 self.fileobj.write(struct.pack("<L", self.pos & 0xffffFFFF))
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000489
490 if not self._extfileobj:
491 self.fileobj.close()
492
493 self.closed = True
494
495 def _init_read_gz(self):
496 """Initialize for reading a gzip compressed fileobj.
497 """
498 self.cmp = self.zlib.decompressobj(-self.zlib.MAX_WBITS)
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000499 self.dbuf = b""
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000500
501 # taken from gzip.GzipFile with some alterations
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000502 if self.__read(2) != b"\037\213":
Thomas Wouters477c8d52006-05-27 19:21:47 +0000503 raise ReadError("not a gzip file")
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000504 if self.__read(1) != b"\010":
Thomas Wouters477c8d52006-05-27 19:21:47 +0000505 raise CompressionError("unsupported compression method")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000506
507 flag = ord(self.__read(1))
508 self.__read(6)
509
510 if flag & 4:
511 xlen = ord(self.__read(1)) + 256 * ord(self.__read(1))
512 self.read(xlen)
513 if flag & 8:
514 while True:
515 s = self.__read(1)
516 if not s or s == NUL:
517 break
518 if flag & 16:
519 while True:
520 s = self.__read(1)
521 if not s or s == NUL:
522 break
523 if flag & 2:
524 self.__read(2)
525
526 def tell(self):
527 """Return the stream's file pointer position.
528 """
529 return self.pos
530
531 def seek(self, pos=0):
532 """Set the stream's file pointer to pos. Negative seeking
533 is forbidden.
534 """
535 if pos - self.pos >= 0:
536 blocks, remainder = divmod(pos - self.pos, self.bufsize)
Guido van Rossum805365e2007-05-07 22:24:25 +0000537 for i in range(blocks):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000538 self.read(self.bufsize)
539 self.read(remainder)
540 else:
Thomas Wouters477c8d52006-05-27 19:21:47 +0000541 raise StreamError("seeking backwards is not allowed")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000542 return self.pos
543
544 def read(self, size=None):
545 """Return the next size number of bytes from the stream.
546 If size is not defined, return all bytes of the stream
547 up to EOF.
548 """
549 if size is None:
550 t = []
551 while True:
552 buf = self._read(self.bufsize)
553 if not buf:
554 break
555 t.append(buf)
556 buf = "".join(t)
557 else:
558 buf = self._read(size)
559 self.pos += len(buf)
560 return buf
561
562 def _read(self, size):
563 """Return size bytes from the stream.
564 """
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000565 if self.comptype == "tar":
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000566 return self.__read(size)
567
568 c = len(self.dbuf)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000569 while c < size:
570 buf = self.__read(self.bufsize)
571 if not buf:
572 break
Guido van Rossumd8faa362007-04-27 19:54:29 +0000573 try:
574 buf = self.cmp.decompress(buf)
575 except IOError:
576 raise ReadError("invalid compressed data")
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000577 self.dbuf += buf
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000578 c += len(buf)
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000579 buf = self.dbuf[:size]
580 self.dbuf = self.dbuf[size:]
581 return buf
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000582
583 def __read(self, size):
584 """Return size bytes from stream. If internal buffer is empty,
585 read another block from the stream.
586 """
587 c = len(self.buf)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000588 while c < size:
589 buf = self.fileobj.read(self.bufsize)
590 if not buf:
591 break
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000592 self.buf += buf
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000593 c += len(buf)
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000594 buf = self.buf[:size]
595 self.buf = self.buf[size:]
596 return buf
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000597# class _Stream
598
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000599class _StreamProxy(object):
600 """Small proxy class that enables transparent compression
601 detection for the Stream interface (mode 'r|*').
602 """
603
604 def __init__(self, fileobj):
605 self.fileobj = fileobj
606 self.buf = self.fileobj.read(BLOCKSIZE)
607
608 def read(self, size):
609 self.read = self.fileobj.read
610 return self.buf
611
612 def getcomptype(self):
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000613 if self.buf.startswith(b"\037\213\010"):
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000614 return "gz"
Lars Gustäbela280ca752007-08-28 07:34:33 +0000615 if self.buf.startswith(b"BZh91"):
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000616 return "bz2"
617 return "tar"
618
619 def close(self):
620 self.fileobj.close()
621# class StreamProxy
622
Thomas Wouters477c8d52006-05-27 19:21:47 +0000623class _BZ2Proxy(object):
624 """Small proxy class that enables external file object
625 support for "r:bz2" and "w:bz2" modes. This is actually
626 a workaround for a limitation in bz2 module's BZ2File
627 class which (unlike gzip.GzipFile) has no support for
628 a file object argument.
629 """
630
631 blocksize = 16 * 1024
632
633 def __init__(self, fileobj, mode):
634 self.fileobj = fileobj
635 self.mode = mode
Guido van Rossumd8faa362007-04-27 19:54:29 +0000636 self.name = getattr(self.fileobj, "name", None)
Thomas Wouters477c8d52006-05-27 19:21:47 +0000637 self.init()
638
639 def init(self):
640 import bz2
641 self.pos = 0
642 if self.mode == "r":
643 self.bz2obj = bz2.BZ2Decompressor()
644 self.fileobj.seek(0)
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000645 self.buf = b""
Thomas Wouters477c8d52006-05-27 19:21:47 +0000646 else:
647 self.bz2obj = bz2.BZ2Compressor()
648
649 def read(self, size):
Thomas Wouters477c8d52006-05-27 19:21:47 +0000650 x = len(self.buf)
651 while x < size:
Lars Gustäbel42e00912009-03-22 20:34:29 +0000652 raw = self.fileobj.read(self.blocksize)
653 if not raw:
Thomas Wouters477c8d52006-05-27 19:21:47 +0000654 break
Lars Gustäbel42e00912009-03-22 20:34:29 +0000655 data = self.bz2obj.decompress(raw)
656 self.buf += data
Thomas Wouters477c8d52006-05-27 19:21:47 +0000657 x += len(data)
Thomas Wouters477c8d52006-05-27 19:21:47 +0000658
659 buf = self.buf[:size]
660 self.buf = self.buf[size:]
661 self.pos += len(buf)
662 return buf
663
664 def seek(self, pos):
665 if pos < self.pos:
666 self.init()
667 self.read(pos - self.pos)
668
669 def tell(self):
670 return self.pos
671
672 def write(self, data):
673 self.pos += len(data)
674 raw = self.bz2obj.compress(data)
675 self.fileobj.write(raw)
676
677 def close(self):
678 if self.mode == "w":
679 raw = self.bz2obj.flush()
680 self.fileobj.write(raw)
Thomas Wouters477c8d52006-05-27 19:21:47 +0000681# class _BZ2Proxy
682
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000683#------------------------
684# Extraction file object
685#------------------------
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000686class _FileInFile(object):
687 """A thin wrapper around an existing file object that
688 provides a part of its data as an individual file
689 object.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000690 """
691
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000692 def __init__(self, fileobj, offset, size, sparse=None):
693 self.fileobj = fileobj
694 self.offset = offset
695 self.size = size
696 self.sparse = sparse
697 self.position = 0
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000698
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000699 def seekable(self):
700 if not hasattr(self.fileobj, "seekable"):
701 # XXX gzip.GzipFile and bz2.BZ2File
702 return True
703 return self.fileobj.seekable()
704
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000705 def tell(self):
706 """Return the current file position.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000707 """
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000708 return self.position
709
710 def seek(self, position):
711 """Seek to a position in the file.
712 """
713 self.position = position
714
715 def read(self, size=None):
716 """Read data from the file.
717 """
718 if size is None:
719 size = self.size - self.position
720 else:
721 size = min(size, self.size - self.position)
722
723 if self.sparse is None:
724 return self.readnormal(size)
725 else:
726 return self.readsparse(size)
727
728 def readnormal(self, size):
729 """Read operation for regular files.
730 """
731 self.fileobj.seek(self.offset + self.position)
732 self.position += size
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000733 return self.fileobj.read(size)
734
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000735 def readsparse(self, size):
736 """Read operation for sparse files.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000737 """
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000738 data = b""
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000739 while size > 0:
740 buf = self.readsparsesection(size)
741 if not buf:
742 break
743 size -= len(buf)
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000744 data += buf
745 return data
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000746
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000747 def readsparsesection(self, size):
748 """Read a single section of a sparse file.
749 """
750 section = self.sparse.find(self.position)
751
752 if section is None:
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000753 return b""
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000754
755 size = min(size, section.offset + section.size - self.position)
756
757 if isinstance(section, _data):
758 realpos = section.realpos + self.position - section.offset
759 self.fileobj.seek(self.offset + realpos)
760 self.position += size
761 return self.fileobj.read(size)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000762 else:
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000763 self.position += size
764 return NUL * size
765#class _FileInFile
766
767
768class ExFileObject(object):
769 """File-like object for reading an archive member.
770 Is returned by TarFile.extractfile().
771 """
772 blocksize = 1024
773
774 def __init__(self, tarfile, tarinfo):
775 self.fileobj = _FileInFile(tarfile.fileobj,
776 tarinfo.offset_data,
777 tarinfo.size,
Lars Gustäbelc2ea8c62008-04-14 10:05:48 +0000778 tarinfo.sparse)
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000779 self.name = tarinfo.name
780 self.mode = "r"
781 self.closed = False
782 self.size = tarinfo.size
783
784 self.position = 0
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000785 self.buffer = b""
786
787 def readable(self):
788 return True
789
790 def writable(self):
791 return False
792
793 def seekable(self):
794 return self.fileobj.seekable()
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000795
796 def read(self, size=None):
797 """Read at most size bytes from the file. If size is not
798 present or None, read all data until EOF is reached.
799 """
800 if self.closed:
801 raise ValueError("I/O operation on closed file")
802
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000803 buf = b""
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000804 if self.buffer:
805 if size is None:
806 buf = self.buffer
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000807 self.buffer = b""
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000808 else:
809 buf = self.buffer[:size]
810 self.buffer = self.buffer[size:]
811
812 if size is None:
813 buf += self.fileobj.read()
814 else:
815 buf += self.fileobj.read(size - len(buf))
816
817 self.position += len(buf)
818 return buf
819
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000820 # XXX TextIOWrapper uses the read1() method.
821 read1 = read
822
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000823 def readline(self, size=-1):
824 """Read one entire line from the file. If size is present
825 and non-negative, return a string with at most that
826 size, which may be an incomplete line.
827 """
828 if self.closed:
829 raise ValueError("I/O operation on closed file")
830
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000831 pos = self.buffer.find(b"\n") + 1
832 if pos == 0:
833 # no newline found.
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000834 while True:
835 buf = self.fileobj.read(self.blocksize)
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000836 self.buffer += buf
837 if not buf or b"\n" in buf:
838 pos = self.buffer.find(b"\n") + 1
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000839 if pos == 0:
840 # no newline found.
841 pos = len(self.buffer)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000842 break
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000843
844 if size != -1:
845 pos = min(size, pos)
846
847 buf = self.buffer[:pos]
848 self.buffer = self.buffer[pos:]
849 self.position += len(buf)
850 return buf
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000851
852 def readlines(self):
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000853 """Return a list with all remaining lines.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000854 """
855 result = []
856 while True:
857 line = self.readline()
858 if not line: break
859 result.append(line)
860 return result
861
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000862 def tell(self):
863 """Return the current file position.
864 """
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000865 if self.closed:
866 raise ValueError("I/O operation on closed file")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000867
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000868 return self.position
869
870 def seek(self, pos, whence=os.SEEK_SET):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000871 """Seek to a position in the file.
872 """
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000873 if self.closed:
874 raise ValueError("I/O operation on closed file")
875
876 if whence == os.SEEK_SET:
877 self.position = min(max(pos, 0), self.size)
878 elif whence == os.SEEK_CUR:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000879 if pos < 0:
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000880 self.position = max(self.position + pos, 0)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000881 else:
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000882 self.position = min(self.position + pos, self.size)
883 elif whence == os.SEEK_END:
884 self.position = max(min(self.size + pos, self.size), 0)
885 else:
886 raise ValueError("Invalid argument")
887
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000888 self.buffer = b""
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000889 self.fileobj.seek(self.position)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000890
891 def close(self):
892 """Close the file object.
893 """
894 self.closed = True
Martin v. Löwisdf241532005-03-03 08:17:42 +0000895
896 def __iter__(self):
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000897 """Get an iterator over the file's lines.
Martin v. Löwisdf241532005-03-03 08:17:42 +0000898 """
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000899 while True:
900 line = self.readline()
901 if not line:
902 break
903 yield line
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000904#class ExFileObject
905
906#------------------
907# Exported Classes
908#------------------
909class TarInfo(object):
910 """Informational class which holds the details about an
911 archive member given by a tar header block.
912 TarInfo objects are returned by TarFile.getmember(),
913 TarFile.getmembers() and TarFile.gettarinfo() and are
914 usually created internally.
915 """
916
Lars Gustäbelc2ea8c62008-04-14 10:05:48 +0000917 __slots__ = ("name", "mode", "uid", "gid", "size", "mtime",
918 "chksum", "type", "linkname", "uname", "gname",
919 "devmajor", "devminor",
920 "offset", "offset_data", "pax_headers", "sparse",
921 "tarfile", "_sparse_structs", "_link_target")
922
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000923 def __init__(self, name=""):
924 """Construct a TarInfo object. name is the optional name
925 of the member.
926 """
Guido van Rossumd8faa362007-04-27 19:54:29 +0000927 self.name = name # member name
Guido van Rossumcd16bf62007-06-13 18:07:49 +0000928 self.mode = 0o644 # file permissions
Thomas Wouters477c8d52006-05-27 19:21:47 +0000929 self.uid = 0 # user id
930 self.gid = 0 # group id
931 self.size = 0 # file size
932 self.mtime = 0 # modification time
933 self.chksum = 0 # header checksum
934 self.type = REGTYPE # member type
935 self.linkname = "" # link name
Guido van Rossumd8faa362007-04-27 19:54:29 +0000936 self.uname = "root" # user name
937 self.gname = "root" # group name
Thomas Wouters477c8d52006-05-27 19:21:47 +0000938 self.devmajor = 0 # device major number
939 self.devminor = 0 # device minor number
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000940
Thomas Wouters477c8d52006-05-27 19:21:47 +0000941 self.offset = 0 # the tar header starts here
942 self.offset_data = 0 # the file's data starts here
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000943
Lars Gustäbelc2ea8c62008-04-14 10:05:48 +0000944 self.sparse = None # sparse member information
Guido van Rossumd8faa362007-04-27 19:54:29 +0000945 self.pax_headers = {} # pax header information
946
947 # In pax headers the "name" and "linkname" field are called
948 # "path" and "linkpath".
949 def _getpath(self):
950 return self.name
951 def _setpath(self, name):
952 self.name = name
953 path = property(_getpath, _setpath)
954
955 def _getlinkpath(self):
956 return self.linkname
957 def _setlinkpath(self, linkname):
958 self.linkname = linkname
959 linkpath = property(_getlinkpath, _setlinkpath)
960
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000961 def __repr__(self):
962 return "<%s %r at %#x>" % (self.__class__.__name__,self.name,id(self))
963
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000964 def get_info(self):
Guido van Rossumd8faa362007-04-27 19:54:29 +0000965 """Return the TarInfo's attributes as a dictionary.
966 """
967 info = {
Lars Gustäbelbfdfdda2009-08-28 19:59:59 +0000968 "name": self.name,
Guido van Rossumcd16bf62007-06-13 18:07:49 +0000969 "mode": self.mode & 0o7777,
Guido van Rossumd8faa362007-04-27 19:54:29 +0000970 "uid": self.uid,
971 "gid": self.gid,
972 "size": self.size,
973 "mtime": self.mtime,
974 "chksum": self.chksum,
975 "type": self.type,
Lars Gustäbelbfdfdda2009-08-28 19:59:59 +0000976 "linkname": self.linkname,
Guido van Rossumd8faa362007-04-27 19:54:29 +0000977 "uname": self.uname,
978 "gname": self.gname,
979 "devmajor": self.devmajor,
980 "devminor": self.devminor
981 }
982
983 if info["type"] == DIRTYPE and not info["name"].endswith("/"):
984 info["name"] += "/"
985
986 return info
987
Guido van Rossume7ba4952007-06-06 23:52:48 +0000988 def tobuf(self, format=DEFAULT_FORMAT, encoding=ENCODING, errors="strict"):
Guido van Rossumd8faa362007-04-27 19:54:29 +0000989 """Return a tar header as a string of 512 byte blocks.
990 """
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000991 info = self.get_info()
Guido van Rossume7ba4952007-06-06 23:52:48 +0000992
Guido van Rossumd8faa362007-04-27 19:54:29 +0000993 if format == USTAR_FORMAT:
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000994 return self.create_ustar_header(info, encoding, errors)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000995 elif format == GNU_FORMAT:
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000996 return self.create_gnu_header(info, encoding, errors)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000997 elif format == PAX_FORMAT:
Lars Gustäbel3741eff2007-08-21 12:17:05 +0000998 return self.create_pax_header(info)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000999 else:
1000 raise ValueError("invalid format")
1001
Lars Gustäbelb506dc32007-08-07 18:36:16 +00001002 def create_ustar_header(self, info, encoding, errors):
Guido van Rossumd8faa362007-04-27 19:54:29 +00001003 """Return the object as a ustar header block.
1004 """
Guido van Rossumd8faa362007-04-27 19:54:29 +00001005 info["magic"] = POSIX_MAGIC
1006
1007 if len(info["linkname"]) > LENGTH_LINK:
1008 raise ValueError("linkname is too long")
1009
1010 if len(info["name"]) > LENGTH_NAME:
1011 info["prefix"], info["name"] = self._posix_split_name(info["name"])
1012
Lars Gustäbelb506dc32007-08-07 18:36:16 +00001013 return self._create_header(info, USTAR_FORMAT, encoding, errors)
Guido van Rossumd8faa362007-04-27 19:54:29 +00001014
Lars Gustäbelb506dc32007-08-07 18:36:16 +00001015 def create_gnu_header(self, info, encoding, errors):
Guido van Rossumd8faa362007-04-27 19:54:29 +00001016 """Return the object as a GNU header block sequence.
1017 """
Guido van Rossumd8faa362007-04-27 19:54:29 +00001018 info["magic"] = GNU_MAGIC
1019
Lars Gustäbelb506dc32007-08-07 18:36:16 +00001020 buf = b""
Guido van Rossumd8faa362007-04-27 19:54:29 +00001021 if len(info["linkname"]) > LENGTH_LINK:
Lars Gustäbelb506dc32007-08-07 18:36:16 +00001022 buf += self._create_gnu_long_header(info["linkname"], GNUTYPE_LONGLINK, encoding, errors)
Guido van Rossumd8faa362007-04-27 19:54:29 +00001023
1024 if len(info["name"]) > LENGTH_NAME:
Lars Gustäbelb506dc32007-08-07 18:36:16 +00001025 buf += self._create_gnu_long_header(info["name"], GNUTYPE_LONGNAME, encoding, errors)
Guido van Rossumd8faa362007-04-27 19:54:29 +00001026
Lars Gustäbelb506dc32007-08-07 18:36:16 +00001027 return buf + self._create_header(info, GNU_FORMAT, encoding, errors)
Guido van Rossumd8faa362007-04-27 19:54:29 +00001028
Lars Gustäbel3741eff2007-08-21 12:17:05 +00001029 def create_pax_header(self, info):
Guido van Rossumd8faa362007-04-27 19:54:29 +00001030 """Return the object as a ustar header block. If it cannot be
1031 represented this way, prepend a pax extended header sequence
1032 with supplement information.
1033 """
Guido van Rossumd8faa362007-04-27 19:54:29 +00001034 info["magic"] = POSIX_MAGIC
1035 pax_headers = self.pax_headers.copy()
1036
1037 # Test string fields for values that exceed the field length or cannot
1038 # be represented in ASCII encoding.
1039 for name, hname, length in (
1040 ("name", "path", LENGTH_NAME), ("linkname", "linkpath", LENGTH_LINK),
1041 ("uname", "uname", 32), ("gname", "gname", 32)):
1042
Guido van Rossume7ba4952007-06-06 23:52:48 +00001043 if hname in pax_headers:
1044 # The pax header has priority.
1045 continue
1046
Guido van Rossumd8faa362007-04-27 19:54:29 +00001047 # Try to encode the string as ASCII.
1048 try:
Lars Gustäbelb506dc32007-08-07 18:36:16 +00001049 info[name].encode("ascii", "strict")
Guido van Rossumd8faa362007-04-27 19:54:29 +00001050 except UnicodeEncodeError:
Lars Gustäbelb506dc32007-08-07 18:36:16 +00001051 pax_headers[hname] = info[name]
Guido van Rossumd8faa362007-04-27 19:54:29 +00001052 continue
1053
Guido van Rossume7ba4952007-06-06 23:52:48 +00001054 if len(info[name]) > length:
Lars Gustäbelb506dc32007-08-07 18:36:16 +00001055 pax_headers[hname] = info[name]
Guido van Rossumd8faa362007-04-27 19:54:29 +00001056
1057 # Test number fields for values that exceed the field limit or values
1058 # that like to be stored as float.
1059 for name, digits in (("uid", 8), ("gid", 8), ("size", 12), ("mtime", 12)):
Guido van Rossume7ba4952007-06-06 23:52:48 +00001060 if name in pax_headers:
1061 # The pax header has priority. Avoid overflow.
1062 info[name] = 0
1063 continue
1064
Guido van Rossumd8faa362007-04-27 19:54:29 +00001065 val = info[name]
1066 if not 0 <= val < 8 ** (digits - 1) or isinstance(val, float):
Guido van Rossumef87d6e2007-05-02 19:09:54 +00001067 pax_headers[name] = str(val)
Guido van Rossumd8faa362007-04-27 19:54:29 +00001068 info[name] = 0
1069
Guido van Rossume7ba4952007-06-06 23:52:48 +00001070 # Create a pax extended header if necessary.
Guido van Rossumd8faa362007-04-27 19:54:29 +00001071 if pax_headers:
Lars Gustäbel3741eff2007-08-21 12:17:05 +00001072 buf = self._create_pax_generic_header(pax_headers, XHDTYPE)
Guido van Rossumd8faa362007-04-27 19:54:29 +00001073 else:
Lars Gustäbelb506dc32007-08-07 18:36:16 +00001074 buf = b""
Guido van Rossumd8faa362007-04-27 19:54:29 +00001075
Lars Gustäbel3741eff2007-08-21 12:17:05 +00001076 return buf + self._create_header(info, USTAR_FORMAT, "ascii", "replace")
Guido van Rossumd8faa362007-04-27 19:54:29 +00001077
1078 @classmethod
Lars Gustäbel3741eff2007-08-21 12:17:05 +00001079 def create_pax_global_header(cls, pax_headers):
Guido van Rossumd8faa362007-04-27 19:54:29 +00001080 """Return the object as a pax global header block sequence.
1081 """
Lars Gustäbel3741eff2007-08-21 12:17:05 +00001082 return cls._create_pax_generic_header(pax_headers, XGLTYPE)
Guido van Rossumd8faa362007-04-27 19:54:29 +00001083
1084 def _posix_split_name(self, name):
1085 """Split a name longer than 100 chars into a prefix
1086 and a name part.
1087 """
1088 prefix = name[:LENGTH_PREFIX + 1]
1089 while prefix and prefix[-1] != "/":
1090 prefix = prefix[:-1]
1091
1092 name = name[len(prefix):]
1093 prefix = prefix[:-1]
1094
1095 if not prefix or len(name) > LENGTH_NAME:
1096 raise ValueError("name is too long")
1097 return prefix, name
1098
1099 @staticmethod
Lars Gustäbelb506dc32007-08-07 18:36:16 +00001100 def _create_header(info, format, encoding, errors):
Guido van Rossumd8faa362007-04-27 19:54:29 +00001101 """Return a header block. info is a dictionary with file
1102 information, format must be one of the *_FORMAT constants.
1103 """
1104 parts = [
Lars Gustäbelb506dc32007-08-07 18:36:16 +00001105 stn(info.get("name", ""), 100, encoding, errors),
Guido van Rossumcd16bf62007-06-13 18:07:49 +00001106 itn(info.get("mode", 0) & 0o7777, 8, format),
Guido van Rossumd8faa362007-04-27 19:54:29 +00001107 itn(info.get("uid", 0), 8, format),
1108 itn(info.get("gid", 0), 8, format),
1109 itn(info.get("size", 0), 12, format),
1110 itn(info.get("mtime", 0), 12, format),
Lars Gustäbelb506dc32007-08-07 18:36:16 +00001111 b" ", # checksum field
Guido van Rossumd8faa362007-04-27 19:54:29 +00001112 info.get("type", REGTYPE),
Lars Gustäbelb506dc32007-08-07 18:36:16 +00001113 stn(info.get("linkname", ""), 100, encoding, errors),
1114 info.get("magic", POSIX_MAGIC),
1115 stn(info.get("uname", "root"), 32, encoding, errors),
1116 stn(info.get("gname", "root"), 32, encoding, errors),
Guido van Rossumd8faa362007-04-27 19:54:29 +00001117 itn(info.get("devmajor", 0), 8, format),
1118 itn(info.get("devminor", 0), 8, format),
Lars Gustäbelb506dc32007-08-07 18:36:16 +00001119 stn(info.get("prefix", ""), 155, encoding, errors)
Guido van Rossumd8faa362007-04-27 19:54:29 +00001120 ]
1121
Lars Gustäbelb506dc32007-08-07 18:36:16 +00001122 buf = struct.pack("%ds" % BLOCKSIZE, b"".join(parts))
Guido van Rossumd8faa362007-04-27 19:54:29 +00001123 chksum = calc_chksums(buf[-BLOCKSIZE:])[0]
Lars Gustäbela280ca752007-08-28 07:34:33 +00001124 buf = buf[:-364] + bytes("%06o\0" % chksum, "ascii") + buf[-357:]
Guido van Rossumd8faa362007-04-27 19:54:29 +00001125 return buf
1126
1127 @staticmethod
1128 def _create_payload(payload):
1129 """Return the string payload filled with zero bytes
1130 up to the next 512 byte border.
1131 """
1132 blocks, remainder = divmod(len(payload), BLOCKSIZE)
1133 if remainder > 0:
1134 payload += (BLOCKSIZE - remainder) * NUL
1135 return payload
1136
1137 @classmethod
Lars Gustäbelb506dc32007-08-07 18:36:16 +00001138 def _create_gnu_long_header(cls, name, type, encoding, errors):
Guido van Rossumd8faa362007-04-27 19:54:29 +00001139 """Return a GNUTYPE_LONGNAME or GNUTYPE_LONGLINK sequence
1140 for name.
1141 """
Lars Gustäbelb506dc32007-08-07 18:36:16 +00001142 name = name.encode(encoding, errors) + NUL
Guido van Rossumd8faa362007-04-27 19:54:29 +00001143
1144 info = {}
1145 info["name"] = "././@LongLink"
1146 info["type"] = type
1147 info["size"] = len(name)
1148 info["magic"] = GNU_MAGIC
1149
1150 # create extended header + name blocks.
Lars Gustäbelb506dc32007-08-07 18:36:16 +00001151 return cls._create_header(info, USTAR_FORMAT, encoding, errors) + \
Guido van Rossumd8faa362007-04-27 19:54:29 +00001152 cls._create_payload(name)
1153
1154 @classmethod
Lars Gustäbel3741eff2007-08-21 12:17:05 +00001155 def _create_pax_generic_header(cls, pax_headers, type):
Guido van Rossumd8faa362007-04-27 19:54:29 +00001156 """Return a POSIX.1-2001 extended or global header sequence
1157 that contains a list of keyword, value pairs. The values
Lars Gustäbelb506dc32007-08-07 18:36:16 +00001158 must be strings.
Guido van Rossumd8faa362007-04-27 19:54:29 +00001159 """
Lars Gustäbelb506dc32007-08-07 18:36:16 +00001160 records = b""
Guido van Rossumd8faa362007-04-27 19:54:29 +00001161 for keyword, value in pax_headers.items():
1162 keyword = keyword.encode("utf8")
1163 value = value.encode("utf8")
1164 l = len(keyword) + len(value) + 3 # ' ' + '=' + '\n'
1165 n = p = 0
1166 while True:
1167 n = l + len(str(p))
1168 if n == p:
1169 break
1170 p = n
Lars Gustäbela280ca752007-08-28 07:34:33 +00001171 records += bytes(str(p), "ascii") + b" " + keyword + b"=" + value + b"\n"
Guido van Rossumd8faa362007-04-27 19:54:29 +00001172
1173 # We use a hardcoded "././@PaxHeader" name like star does
1174 # instead of the one that POSIX recommends.
1175 info = {}
1176 info["name"] = "././@PaxHeader"
1177 info["type"] = type
1178 info["size"] = len(records)
1179 info["magic"] = POSIX_MAGIC
1180
1181 # Create pax header + record blocks.
Lars Gustäbel3741eff2007-08-21 12:17:05 +00001182 return cls._create_header(info, USTAR_FORMAT, "ascii", "replace") + \
Guido van Rossumd8faa362007-04-27 19:54:29 +00001183 cls._create_payload(records)
1184
Guido van Rossum75b64e62005-01-16 00:16:11 +00001185 @classmethod
Lars Gustäbelb506dc32007-08-07 18:36:16 +00001186 def frombuf(cls, buf, encoding, errors):
1187 """Construct a TarInfo object from a 512 byte bytes object.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001188 """
Lars Gustäbel9520a432009-11-22 18:48:49 +00001189 if len(buf) == 0:
1190 raise EmptyHeaderError("empty header")
Thomas Wouters477c8d52006-05-27 19:21:47 +00001191 if len(buf) != BLOCKSIZE:
Lars Gustäbel9520a432009-11-22 18:48:49 +00001192 raise TruncatedHeaderError("truncated header")
Thomas Wouters477c8d52006-05-27 19:21:47 +00001193 if buf.count(NUL) == BLOCKSIZE:
Lars Gustäbel9520a432009-11-22 18:48:49 +00001194 raise EOFHeaderError("end of file header")
Thomas Wouters902d6eb2007-01-09 23:18:33 +00001195
1196 chksum = nti(buf[148:156])
1197 if chksum not in calc_chksums(buf):
Lars Gustäbel9520a432009-11-22 18:48:49 +00001198 raise InvalidHeaderError("bad checksum")
Thomas Wouters477c8d52006-05-27 19:21:47 +00001199
Guido van Rossumd8faa362007-04-27 19:54:29 +00001200 obj = cls()
Lars Gustäbelb506dc32007-08-07 18:36:16 +00001201 obj.name = nts(buf[0:100], encoding, errors)
Guido van Rossumd8faa362007-04-27 19:54:29 +00001202 obj.mode = nti(buf[100:108])
1203 obj.uid = nti(buf[108:116])
1204 obj.gid = nti(buf[116:124])
1205 obj.size = nti(buf[124:136])
1206 obj.mtime = nti(buf[136:148])
1207 obj.chksum = chksum
1208 obj.type = buf[156:157]
Lars Gustäbelb506dc32007-08-07 18:36:16 +00001209 obj.linkname = nts(buf[157:257], encoding, errors)
1210 obj.uname = nts(buf[265:297], encoding, errors)
1211 obj.gname = nts(buf[297:329], encoding, errors)
Guido van Rossumd8faa362007-04-27 19:54:29 +00001212 obj.devmajor = nti(buf[329:337])
1213 obj.devminor = nti(buf[337:345])
Lars Gustäbelb506dc32007-08-07 18:36:16 +00001214 prefix = nts(buf[345:500], encoding, errors)
Thomas Wouters89f507f2006-12-13 04:49:30 +00001215
Guido van Rossumd8faa362007-04-27 19:54:29 +00001216 # Old V7 tar format represents a directory as a regular
1217 # file with a trailing slash.
1218 if obj.type == AREGTYPE and obj.name.endswith("/"):
1219 obj.type = DIRTYPE
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001220
Lars Gustäbelc2ea8c62008-04-14 10:05:48 +00001221 # The old GNU sparse format occupies some of the unused
1222 # space in the buffer for up to 4 sparse structures.
1223 # Save the them for later processing in _proc_sparse().
1224 if obj.type == GNUTYPE_SPARSE:
1225 pos = 386
1226 structs = []
1227 for i in range(4):
1228 try:
1229 offset = nti(buf[pos:pos + 12])
1230 numbytes = nti(buf[pos + 12:pos + 24])
1231 except ValueError:
1232 break
1233 structs.append((offset, numbytes))
1234 pos += 24
1235 isextended = bool(buf[482])
1236 origsize = nti(buf[483:495])
1237 obj._sparse_structs = (structs, isextended, origsize)
1238
Guido van Rossumd8faa362007-04-27 19:54:29 +00001239 # Remove redundant slashes from directories.
1240 if obj.isdir():
1241 obj.name = obj.name.rstrip("/")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001242
Guido van Rossumd8faa362007-04-27 19:54:29 +00001243 # Reconstruct a ustar longname.
1244 if prefix and obj.type not in GNU_TYPES:
1245 obj.name = prefix + "/" + obj.name
1246 return obj
1247
1248 @classmethod
1249 def fromtarfile(cls, tarfile):
1250 """Return the next TarInfo object from TarFile object
1251 tarfile.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001252 """
Guido van Rossumd8faa362007-04-27 19:54:29 +00001253 buf = tarfile.fileobj.read(BLOCKSIZE)
Lars Gustäbelb506dc32007-08-07 18:36:16 +00001254 obj = cls.frombuf(buf, tarfile.encoding, tarfile.errors)
Guido van Rossumd8faa362007-04-27 19:54:29 +00001255 obj.offset = tarfile.fileobj.tell() - BLOCKSIZE
1256 return obj._proc_member(tarfile)
Thomas Wouters89f507f2006-12-13 04:49:30 +00001257
Guido van Rossumd8faa362007-04-27 19:54:29 +00001258 #--------------------------------------------------------------------------
1259 # The following are methods that are called depending on the type of a
1260 # member. The entry point is _proc_member() which can be overridden in a
1261 # subclass to add custom _proc_*() methods. A _proc_*() method MUST
1262 # implement the following
1263 # operations:
1264 # 1. Set self.offset_data to the position where the data blocks begin,
1265 # if there is data that follows.
1266 # 2. Set tarfile.offset to the position where the next member's header will
1267 # begin.
1268 # 3. Return self or another valid TarInfo object.
1269 def _proc_member(self, tarfile):
1270 """Choose the right processing method depending on
1271 the type and call it.
Thomas Wouters89f507f2006-12-13 04:49:30 +00001272 """
Guido van Rossumd8faa362007-04-27 19:54:29 +00001273 if self.type in (GNUTYPE_LONGNAME, GNUTYPE_LONGLINK):
1274 return self._proc_gnulong(tarfile)
1275 elif self.type == GNUTYPE_SPARSE:
1276 return self._proc_sparse(tarfile)
1277 elif self.type in (XHDTYPE, XGLTYPE, SOLARIS_XHDTYPE):
1278 return self._proc_pax(tarfile)
1279 else:
1280 return self._proc_builtin(tarfile)
Thomas Wouters89f507f2006-12-13 04:49:30 +00001281
Guido van Rossumd8faa362007-04-27 19:54:29 +00001282 def _proc_builtin(self, tarfile):
1283 """Process a builtin type or an unknown type which
1284 will be treated as a regular file.
1285 """
1286 self.offset_data = tarfile.fileobj.tell()
1287 offset = self.offset_data
1288 if self.isreg() or self.type not in SUPPORTED_TYPES:
1289 # Skip the following data blocks.
1290 offset += self._block(self.size)
1291 tarfile.offset = offset
Thomas Wouters89f507f2006-12-13 04:49:30 +00001292
Guido van Rossume7ba4952007-06-06 23:52:48 +00001293 # Patch the TarInfo object with saved global
Guido van Rossumd8faa362007-04-27 19:54:29 +00001294 # header information.
Guido van Rossume7ba4952007-06-06 23:52:48 +00001295 self._apply_pax_info(tarfile.pax_headers, tarfile.encoding, tarfile.errors)
Guido van Rossumd8faa362007-04-27 19:54:29 +00001296
1297 return self
1298
1299 def _proc_gnulong(self, tarfile):
1300 """Process the blocks that hold a GNU longname
1301 or longlink member.
1302 """
1303 buf = tarfile.fileobj.read(self._block(self.size))
1304
1305 # Fetch the next header and process it.
Lars Gustäbel9520a432009-11-22 18:48:49 +00001306 try:
1307 next = self.fromtarfile(tarfile)
1308 except HeaderError:
1309 raise SubsequentHeaderError("missing or bad subsequent header")
Guido van Rossumd8faa362007-04-27 19:54:29 +00001310
1311 # Patch the TarInfo object from the next header with
1312 # the longname information.
1313 next.offset = self.offset
1314 if self.type == GNUTYPE_LONGNAME:
Lars Gustäbelb506dc32007-08-07 18:36:16 +00001315 next.name = nts(buf, tarfile.encoding, tarfile.errors)
Guido van Rossumd8faa362007-04-27 19:54:29 +00001316 elif self.type == GNUTYPE_LONGLINK:
Lars Gustäbelb506dc32007-08-07 18:36:16 +00001317 next.linkname = nts(buf, tarfile.encoding, tarfile.errors)
Guido van Rossumd8faa362007-04-27 19:54:29 +00001318
1319 return next
1320
1321 def _proc_sparse(self, tarfile):
1322 """Process a GNU sparse header plus extra headers.
1323 """
Lars Gustäbelc2ea8c62008-04-14 10:05:48 +00001324 # We already collected some sparse structures in frombuf().
1325 structs, isextended, origsize = self._sparse_structs
1326 del self._sparse_structs
Guido van Rossumd8faa362007-04-27 19:54:29 +00001327
Lars Gustäbelc2ea8c62008-04-14 10:05:48 +00001328 # Collect sparse structures from extended header blocks.
Lars Gustäbelb506dc32007-08-07 18:36:16 +00001329 while isextended:
Guido van Rossumd8faa362007-04-27 19:54:29 +00001330 buf = tarfile.fileobj.read(BLOCKSIZE)
1331 pos = 0
Guido van Rossum805365e2007-05-07 22:24:25 +00001332 for i in range(21):
Guido van Rossumd8faa362007-04-27 19:54:29 +00001333 try:
1334 offset = nti(buf[pos:pos + 12])
1335 numbytes = nti(buf[pos + 12:pos + 24])
1336 except ValueError:
1337 break
Lars Gustäbelc2ea8c62008-04-14 10:05:48 +00001338 structs.append((offset, numbytes))
Guido van Rossumd8faa362007-04-27 19:54:29 +00001339 pos += 24
Lars Gustäbelb506dc32007-08-07 18:36:16 +00001340 isextended = bool(buf[504])
Guido van Rossumd8faa362007-04-27 19:54:29 +00001341
Lars Gustäbelc2ea8c62008-04-14 10:05:48 +00001342 # Transform the sparse structures to something we can use
1343 # in ExFileObject.
1344 self.sparse = _ringbuffer()
1345 lastpos = 0
1346 realpos = 0
1347 for offset, numbytes in structs:
1348 if offset > lastpos:
1349 self.sparse.append(_hole(lastpos, offset - lastpos))
1350 self.sparse.append(_data(offset, numbytes, realpos))
1351 realpos += numbytes
1352 lastpos = offset + numbytes
Guido van Rossumd8faa362007-04-27 19:54:29 +00001353 if lastpos < origsize:
Lars Gustäbelc2ea8c62008-04-14 10:05:48 +00001354 self.sparse.append(_hole(lastpos, origsize - lastpos))
Guido van Rossumd8faa362007-04-27 19:54:29 +00001355
1356 self.offset_data = tarfile.fileobj.tell()
1357 tarfile.offset = self.offset_data + self._block(self.size)
1358 self.size = origsize
1359
1360 return self
1361
1362 def _proc_pax(self, tarfile):
1363 """Process an extended or global header as described in
1364 POSIX.1-2001.
1365 """
1366 # Read the header information.
1367 buf = tarfile.fileobj.read(self._block(self.size))
1368
1369 # A pax header stores supplemental information for either
1370 # the following file (extended) or all following files
1371 # (global).
1372 if self.type == XGLTYPE:
1373 pax_headers = tarfile.pax_headers
1374 else:
1375 pax_headers = tarfile.pax_headers.copy()
1376
Guido van Rossumd8faa362007-04-27 19:54:29 +00001377 # Parse pax header information. A record looks like that:
1378 # "%d %s=%s\n" % (length, keyword, value). length is the size
1379 # of the complete record including the length field itself and
Guido van Rossume7ba4952007-06-06 23:52:48 +00001380 # the newline. keyword and value are both UTF-8 encoded strings.
Antoine Pitroufd036452008-08-19 17:56:33 +00001381 regex = re.compile(br"(\d+) ([^=]+)=")
Guido van Rossumd8faa362007-04-27 19:54:29 +00001382 pos = 0
1383 while True:
1384 match = regex.match(buf, pos)
1385 if not match:
1386 break
1387
1388 length, keyword = match.groups()
1389 length = int(length)
1390 value = buf[match.end(2) + 1:match.start(1) + length - 1]
1391
1392 keyword = keyword.decode("utf8")
Guido van Rossumd8faa362007-04-27 19:54:29 +00001393 value = value.decode("utf8")
Guido van Rossumd8faa362007-04-27 19:54:29 +00001394
1395 pax_headers[keyword] = value
1396 pos += length
1397
Guido van Rossume7ba4952007-06-06 23:52:48 +00001398 # Fetch the next header.
Lars Gustäbel9520a432009-11-22 18:48:49 +00001399 try:
1400 next = self.fromtarfile(tarfile)
1401 except HeaderError:
1402 raise SubsequentHeaderError("missing or bad subsequent header")
Guido van Rossumd8faa362007-04-27 19:54:29 +00001403
Guido van Rossume7ba4952007-06-06 23:52:48 +00001404 if self.type in (XHDTYPE, SOLARIS_XHDTYPE):
Guido van Rossume7ba4952007-06-06 23:52:48 +00001405 # Patch the TarInfo object with the extended header info.
1406 next._apply_pax_info(pax_headers, tarfile.encoding, tarfile.errors)
1407 next.offset = self.offset
1408
1409 if "size" in pax_headers:
1410 # If the extended header replaces the size field,
1411 # we need to recalculate the offset where the next
1412 # header starts.
1413 offset = next.offset_data
1414 if next.isreg() or next.type not in SUPPORTED_TYPES:
1415 offset += next._block(next.size)
1416 tarfile.offset = offset
1417
1418 return next
1419
1420 def _apply_pax_info(self, pax_headers, encoding, errors):
1421 """Replace fields with supplemental information from a previous
1422 pax extended or global header.
1423 """
1424 for keyword, value in pax_headers.items():
1425 if keyword not in PAX_FIELDS:
1426 continue
1427
1428 if keyword == "path":
1429 value = value.rstrip("/")
1430
1431 if keyword in PAX_NUMBER_FIELDS:
1432 try:
1433 value = PAX_NUMBER_FIELDS[keyword](value)
1434 except ValueError:
1435 value = 0
Guido van Rossume7ba4952007-06-06 23:52:48 +00001436
1437 setattr(self, keyword, value)
1438
1439 self.pax_headers = pax_headers.copy()
Guido van Rossumd8faa362007-04-27 19:54:29 +00001440
1441 def _block(self, count):
1442 """Round up a byte count by BLOCKSIZE and return it,
1443 e.g. _block(834) => 1024.
1444 """
1445 blocks, remainder = divmod(count, BLOCKSIZE)
1446 if remainder:
1447 blocks += 1
1448 return blocks * BLOCKSIZE
Thomas Wouters89f507f2006-12-13 04:49:30 +00001449
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001450 def isreg(self):
1451 return self.type in REGULAR_TYPES
1452 def isfile(self):
1453 return self.isreg()
1454 def isdir(self):
1455 return self.type == DIRTYPE
1456 def issym(self):
1457 return self.type == SYMTYPE
1458 def islnk(self):
1459 return self.type == LNKTYPE
1460 def ischr(self):
1461 return self.type == CHRTYPE
1462 def isblk(self):
1463 return self.type == BLKTYPE
1464 def isfifo(self):
1465 return self.type == FIFOTYPE
1466 def issparse(self):
1467 return self.type == GNUTYPE_SPARSE
1468 def isdev(self):
1469 return self.type in (CHRTYPE, BLKTYPE, FIFOTYPE)
1470# class TarInfo
1471
1472class TarFile(object):
1473 """The TarFile Class provides an interface to tar archives.
1474 """
1475
1476 debug = 0 # May be set from 0 (no msgs) to 3 (all msgs)
1477
1478 dereference = False # If true, add content of linked file to the
1479 # tar file, else the link.
1480
1481 ignore_zeros = False # If true, skips empty or invalid blocks and
1482 # continues processing.
1483
Lars Gustäbel365aff32009-12-13 11:42:29 +00001484 errorlevel = 1 # If 0, fatal errors only appear in debug
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001485 # messages (if debug >= 0). If > 0, errors
1486 # are passed to the caller as exceptions.
1487
Guido van Rossumd8faa362007-04-27 19:54:29 +00001488 format = DEFAULT_FORMAT # The format to use when creating an archive.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001489
Guido van Rossume7ba4952007-06-06 23:52:48 +00001490 encoding = ENCODING # Encoding for 8-bit character strings.
1491
1492 errors = None # Error handler for unicode conversion.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001493
Guido van Rossumd8faa362007-04-27 19:54:29 +00001494 tarinfo = TarInfo # The default TarInfo class to use.
1495
1496 fileobject = ExFileObject # The default ExFileObject class to use.
1497
1498 def __init__(self, name=None, mode="r", fileobj=None, format=None,
1499 tarinfo=None, dereference=None, ignore_zeros=None, encoding=None,
Guido van Rossume7ba4952007-06-06 23:52:48 +00001500 errors=None, pax_headers=None, debug=None, errorlevel=None):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001501 """Open an (uncompressed) tar archive `name'. `mode' is either 'r' to
1502 read from an existing archive, 'a' to append data to an existing
1503 file or 'w' to create a new file overwriting an existing one. `mode'
1504 defaults to 'r'.
1505 If `fileobj' is given, it is used for reading or writing data. If it
1506 can be determined, `mode' is overridden by `fileobj's mode.
1507 `fileobj' is not closed, when TarFile is closed.
1508 """
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001509 if len(mode) > 1 or mode not in "raw":
Thomas Wouters477c8d52006-05-27 19:21:47 +00001510 raise ValueError("mode must be 'r', 'a' or 'w'")
Guido van Rossumd8faa362007-04-27 19:54:29 +00001511 self.mode = mode
1512 self._mode = {"r": "rb", "a": "r+b", "w": "wb"}[mode]
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001513
1514 if not fileobj:
Guido van Rossumd8faa362007-04-27 19:54:29 +00001515 if self.mode == "a" and not os.path.exists(name):
Thomas Wouterscf297e42007-02-23 15:07:44 +00001516 # Create nonexistent files in append mode.
Guido van Rossumd8faa362007-04-27 19:54:29 +00001517 self.mode = "w"
1518 self._mode = "wb"
Guido van Rossume7ba4952007-06-06 23:52:48 +00001519 fileobj = bltn_open(name, self._mode)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001520 self._extfileobj = False
1521 else:
Guido van Rossumd8faa362007-04-27 19:54:29 +00001522 if name is None and hasattr(fileobj, "name"):
1523 name = fileobj.name
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001524 if hasattr(fileobj, "mode"):
Guido van Rossumd8faa362007-04-27 19:54:29 +00001525 self._mode = fileobj.mode
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001526 self._extfileobj = True
Thomas Woutersed03b412007-08-28 21:37:11 +00001527 self.name = os.path.abspath(name) if name else None
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001528 self.fileobj = fileobj
1529
Guido van Rossumd8faa362007-04-27 19:54:29 +00001530 # Init attributes.
1531 if format is not None:
1532 self.format = format
1533 if tarinfo is not None:
1534 self.tarinfo = tarinfo
1535 if dereference is not None:
1536 self.dereference = dereference
1537 if ignore_zeros is not None:
1538 self.ignore_zeros = ignore_zeros
1539 if encoding is not None:
1540 self.encoding = encoding
Guido van Rossume7ba4952007-06-06 23:52:48 +00001541
1542 if errors is not None:
1543 self.errors = errors
1544 elif mode == "r":
Lars Gustäbelb506dc32007-08-07 18:36:16 +00001545 self.errors = "replace"
Guido van Rossume7ba4952007-06-06 23:52:48 +00001546 else:
1547 self.errors = "strict"
1548
1549 if pax_headers is not None and self.format == PAX_FORMAT:
1550 self.pax_headers = pax_headers
1551 else:
1552 self.pax_headers = {}
1553
Guido van Rossumd8faa362007-04-27 19:54:29 +00001554 if debug is not None:
1555 self.debug = debug
1556 if errorlevel is not None:
1557 self.errorlevel = errorlevel
1558
1559 # Init datastructures.
Thomas Wouters477c8d52006-05-27 19:21:47 +00001560 self.closed = False
1561 self.members = [] # list of members as TarInfo objects
1562 self._loaded = False # flag if all members have been read
Christian Heimesd8654cf2007-12-02 15:22:16 +00001563 self.offset = self.fileobj.tell()
1564 # current position in the archive file
Thomas Wouters477c8d52006-05-27 19:21:47 +00001565 self.inodes = {} # dictionary caching the inodes of
1566 # archive members already added
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001567
Lars Gustäbel7b465392009-11-18 20:29:25 +00001568 try:
1569 if self.mode == "r":
1570 self.firstmember = None
1571 self.firstmember = self.next()
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001572
Lars Gustäbel7b465392009-11-18 20:29:25 +00001573 if self.mode == "a":
1574 # Move to the end of the archive,
1575 # before the first empty block.
Lars Gustäbel7b465392009-11-18 20:29:25 +00001576 while True:
Lars Gustäbel9520a432009-11-22 18:48:49 +00001577 self.fileobj.seek(self.offset)
1578 try:
1579 tarinfo = self.tarinfo.fromtarfile(self)
1580 self.members.append(tarinfo)
1581 except EOFHeaderError:
1582 self.fileobj.seek(self.offset)
Lars Gustäbel7b465392009-11-18 20:29:25 +00001583 break
Lars Gustäbel9520a432009-11-22 18:48:49 +00001584 except HeaderError as e:
1585 raise ReadError(str(e))
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001586
Lars Gustäbel7b465392009-11-18 20:29:25 +00001587 if self.mode in "aw":
1588 self._loaded = True
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001589
Lars Gustäbel7b465392009-11-18 20:29:25 +00001590 if self.pax_headers:
1591 buf = self.tarinfo.create_pax_global_header(self.pax_headers.copy())
1592 self.fileobj.write(buf)
1593 self.offset += len(buf)
1594 except:
1595 if not self._extfileobj:
1596 self.fileobj.close()
1597 self.closed = True
1598 raise
Guido van Rossumd8faa362007-04-27 19:54:29 +00001599
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001600 #--------------------------------------------------------------------------
1601 # Below are the classmethods which act as alternate constructors to the
1602 # TarFile class. The open() method is the only one that is needed for
1603 # public use; it is the "super"-constructor and is able to select an
1604 # adequate "sub"-constructor for a particular compression using the mapping
1605 # from OPEN_METH.
1606 #
1607 # This concept allows one to subclass TarFile without losing the comfort of
1608 # the super-constructor. A sub-constructor is registered and made available
1609 # by adding it to the mapping in OPEN_METH.
1610
Guido van Rossum75b64e62005-01-16 00:16:11 +00001611 @classmethod
Guido van Rossumd8faa362007-04-27 19:54:29 +00001612 def open(cls, name=None, mode="r", fileobj=None, bufsize=RECORDSIZE, **kwargs):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001613 """Open a tar archive for reading, writing or appending. Return
1614 an appropriate TarFile class.
1615
1616 mode:
Martin v. Löwis78be7df2005-03-05 12:47:42 +00001617 'r' or 'r:*' open for reading with transparent compression
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001618 'r:' open for reading exclusively uncompressed
1619 'r:gz' open for reading with gzip compression
1620 'r:bz2' open for reading with bzip2 compression
Thomas Wouterscf297e42007-02-23 15:07:44 +00001621 'a' or 'a:' open for appending, creating the file if necessary
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001622 'w' or 'w:' open for writing without compression
1623 'w:gz' open for writing with gzip compression
1624 'w:bz2' open for writing with bzip2 compression
Martin v. Löwis78be7df2005-03-05 12:47:42 +00001625
1626 'r|*' open a stream of tar blocks with transparent compression
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001627 'r|' open an uncompressed stream of tar blocks for reading
1628 'r|gz' open a gzip compressed stream of tar blocks
1629 'r|bz2' open a bzip2 compressed stream of tar blocks
1630 'w|' open an uncompressed stream for writing
1631 'w|gz' open a gzip compressed stream for writing
1632 'w|bz2' open a bzip2 compressed stream for writing
1633 """
1634
1635 if not name and not fileobj:
Thomas Wouters477c8d52006-05-27 19:21:47 +00001636 raise ValueError("nothing to open")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001637
Martin v. Löwis78be7df2005-03-05 12:47:42 +00001638 if mode in ("r", "r:*"):
1639 # Find out which *open() is appropriate for opening the file.
1640 for comptype in cls.OPEN_METH:
1641 func = getattr(cls, cls.OPEN_METH[comptype])
Thomas Wouters902d6eb2007-01-09 23:18:33 +00001642 if fileobj is not None:
1643 saved_pos = fileobj.tell()
Martin v. Löwis78be7df2005-03-05 12:47:42 +00001644 try:
Guido van Rossumd8faa362007-04-27 19:54:29 +00001645 return func(name, "r", fileobj, **kwargs)
1646 except (ReadError, CompressionError) as e:
Thomas Wouters902d6eb2007-01-09 23:18:33 +00001647 if fileobj is not None:
1648 fileobj.seek(saved_pos)
Martin v. Löwis78be7df2005-03-05 12:47:42 +00001649 continue
Thomas Wouters477c8d52006-05-27 19:21:47 +00001650 raise ReadError("file could not be opened successfully")
Martin v. Löwis78be7df2005-03-05 12:47:42 +00001651
1652 elif ":" in mode:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001653 filemode, comptype = mode.split(":", 1)
1654 filemode = filemode or "r"
1655 comptype = comptype or "tar"
1656
1657 # Select the *open() function according to
1658 # given compression.
1659 if comptype in cls.OPEN_METH:
1660 func = getattr(cls, cls.OPEN_METH[comptype])
1661 else:
Thomas Wouters477c8d52006-05-27 19:21:47 +00001662 raise CompressionError("unknown compression type %r" % comptype)
Guido van Rossumd8faa362007-04-27 19:54:29 +00001663 return func(name, filemode, fileobj, **kwargs)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001664
1665 elif "|" in mode:
1666 filemode, comptype = mode.split("|", 1)
1667 filemode = filemode or "r"
1668 comptype = comptype or "tar"
1669
1670 if filemode not in "rw":
Thomas Wouters477c8d52006-05-27 19:21:47 +00001671 raise ValueError("mode must be 'r' or 'w'")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001672
1673 t = cls(name, filemode,
Guido van Rossumd8faa362007-04-27 19:54:29 +00001674 _Stream(name, filemode, comptype, fileobj, bufsize),
1675 **kwargs)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001676 t._extfileobj = False
1677 return t
1678
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001679 elif mode in "aw":
Guido van Rossumd8faa362007-04-27 19:54:29 +00001680 return cls.taropen(name, mode, fileobj, **kwargs)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001681
Thomas Wouters477c8d52006-05-27 19:21:47 +00001682 raise ValueError("undiscernible mode")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001683
Guido van Rossum75b64e62005-01-16 00:16:11 +00001684 @classmethod
Guido van Rossumd8faa362007-04-27 19:54:29 +00001685 def taropen(cls, name, mode="r", fileobj=None, **kwargs):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001686 """Open uncompressed tar archive name for reading or writing.
1687 """
1688 if len(mode) > 1 or mode not in "raw":
Thomas Wouters477c8d52006-05-27 19:21:47 +00001689 raise ValueError("mode must be 'r', 'a' or 'w'")
Guido van Rossumd8faa362007-04-27 19:54:29 +00001690 return cls(name, mode, fileobj, **kwargs)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001691
Guido van Rossum75b64e62005-01-16 00:16:11 +00001692 @classmethod
Guido van Rossumd8faa362007-04-27 19:54:29 +00001693 def gzopen(cls, name, mode="r", fileobj=None, compresslevel=9, **kwargs):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001694 """Open gzip compressed tar archive name for reading or writing.
1695 Appending is not allowed.
1696 """
1697 if len(mode) > 1 or mode not in "rw":
Thomas Wouters477c8d52006-05-27 19:21:47 +00001698 raise ValueError("mode must be 'r' or 'w'")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001699
1700 try:
1701 import gzip
Neal Norwitz4ec68242003-04-11 03:05:56 +00001702 gzip.GzipFile
1703 except (ImportError, AttributeError):
Thomas Wouters477c8d52006-05-27 19:21:47 +00001704 raise CompressionError("gzip module is not available")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001705
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001706 if fileobj is None:
Guido van Rossume7ba4952007-06-06 23:52:48 +00001707 fileobj = bltn_open(name, mode + "b")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001708
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001709 try:
Thomas Wouters902d6eb2007-01-09 23:18:33 +00001710 t = cls.taropen(name, mode,
Guido van Rossumd8faa362007-04-27 19:54:29 +00001711 gzip.GzipFile(name, mode, compresslevel, fileobj),
1712 **kwargs)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001713 except IOError:
Thomas Wouters477c8d52006-05-27 19:21:47 +00001714 raise ReadError("not a gzip file")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001715 t._extfileobj = False
1716 return t
1717
Guido van Rossum75b64e62005-01-16 00:16:11 +00001718 @classmethod
Guido van Rossumd8faa362007-04-27 19:54:29 +00001719 def bz2open(cls, name, mode="r", fileobj=None, compresslevel=9, **kwargs):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001720 """Open bzip2 compressed tar archive name for reading or writing.
1721 Appending is not allowed.
1722 """
1723 if len(mode) > 1 or mode not in "rw":
Thomas Wouters477c8d52006-05-27 19:21:47 +00001724 raise ValueError("mode must be 'r' or 'w'.")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001725
1726 try:
1727 import bz2
1728 except ImportError:
Thomas Wouters477c8d52006-05-27 19:21:47 +00001729 raise CompressionError("bz2 module is not available")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001730
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001731 if fileobj is not None:
Thomas Wouters477c8d52006-05-27 19:21:47 +00001732 fileobj = _BZ2Proxy(fileobj, mode)
1733 else:
1734 fileobj = bz2.BZ2File(name, mode, compresslevel=compresslevel)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001735
1736 try:
Guido van Rossumd8faa362007-04-27 19:54:29 +00001737 t = cls.taropen(name, mode, fileobj, **kwargs)
Lars Gustäbel9520a432009-11-22 18:48:49 +00001738 except (IOError, EOFError):
Thomas Wouters477c8d52006-05-27 19:21:47 +00001739 raise ReadError("not a bzip2 file")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001740 t._extfileobj = False
1741 return t
1742
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001743 # All *open() methods are registered here.
1744 OPEN_METH = {
1745 "tar": "taropen", # uncompressed tar
1746 "gz": "gzopen", # gzip compressed tar
1747 "bz2": "bz2open" # bzip2 compressed tar
1748 }
1749
1750 #--------------------------------------------------------------------------
1751 # The public methods which TarFile provides:
1752
1753 def close(self):
1754 """Close the TarFile. In write-mode, two finishing zero blocks are
1755 appended to the archive.
1756 """
1757 if self.closed:
1758 return
1759
Guido van Rossumd8faa362007-04-27 19:54:29 +00001760 if self.mode in "aw":
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001761 self.fileobj.write(NUL * (BLOCKSIZE * 2))
1762 self.offset += (BLOCKSIZE * 2)
1763 # fill up the end with zero-blocks
1764 # (like option -b20 for tar does)
1765 blocks, remainder = divmod(self.offset, RECORDSIZE)
1766 if remainder > 0:
1767 self.fileobj.write(NUL * (RECORDSIZE - remainder))
1768
1769 if not self._extfileobj:
1770 self.fileobj.close()
1771 self.closed = True
1772
1773 def getmember(self, name):
1774 """Return a TarInfo object for member `name'. If `name' can not be
1775 found in the archive, KeyError is raised. If a member occurs more
Mark Dickinson934896d2009-02-21 20:59:32 +00001776 than once in the archive, its last occurrence is assumed to be the
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001777 most up-to-date version.
1778 """
Martin v. Löwisf3c56112004-09-18 09:08:52 +00001779 tarinfo = self._getmember(name)
1780 if tarinfo is None:
Thomas Wouters477c8d52006-05-27 19:21:47 +00001781 raise KeyError("filename %r not found" % name)
Martin v. Löwisf3c56112004-09-18 09:08:52 +00001782 return tarinfo
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001783
1784 def getmembers(self):
1785 """Return the members of the archive as a list of TarInfo objects. The
1786 list has the same order as the members in the archive.
1787 """
1788 self._check()
1789 if not self._loaded: # if we want to obtain a list of
1790 self._load() # all members, we first have to
1791 # scan the whole archive.
1792 return self.members
1793
1794 def getnames(self):
1795 """Return the members of the archive as a list of their names. It has
1796 the same order as the list returned by getmembers().
1797 """
Martin v. Löwisf3c56112004-09-18 09:08:52 +00001798 return [tarinfo.name for tarinfo in self.getmembers()]
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001799
1800 def gettarinfo(self, name=None, arcname=None, fileobj=None):
1801 """Create a TarInfo object for either the file `name' or the file
1802 object `fileobj' (using os.fstat on its file descriptor). You can
1803 modify some of the TarInfo's attributes before you add it using
1804 addfile(). If given, `arcname' specifies an alternative name for the
1805 file in the archive.
1806 """
1807 self._check("aw")
1808
1809 # When fileobj is given, replace name by
1810 # fileobj's real name.
1811 if fileobj is not None:
1812 name = fileobj.name
1813
1814 # Building the name of the member in the archive.
1815 # Backward slashes are converted to forward slashes,
1816 # Absolute paths are turned to relative paths.
1817 if arcname is None:
1818 arcname = name
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001819 drv, arcname = os.path.splitdrive(arcname)
Lars Gustäbelbfdfdda2009-08-28 19:59:59 +00001820 arcname = arcname.replace(os.sep, "/")
1821 arcname = arcname.lstrip("/")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001822
1823 # Now, fill the TarInfo object with
1824 # information specific for the file.
Guido van Rossumd8faa362007-04-27 19:54:29 +00001825 tarinfo = self.tarinfo()
1826 tarinfo.tarfile = self
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001827
1828 # Use os.stat or os.lstat, depending on platform
1829 # and if symlinks shall be resolved.
1830 if fileobj is None:
1831 if hasattr(os, "lstat") and not self.dereference:
1832 statres = os.lstat(name)
1833 else:
1834 statres = os.stat(name)
1835 else:
1836 statres = os.fstat(fileobj.fileno())
1837 linkname = ""
1838
1839 stmd = statres.st_mode
1840 if stat.S_ISREG(stmd):
1841 inode = (statres.st_ino, statres.st_dev)
Guido van Rossumd8faa362007-04-27 19:54:29 +00001842 if not self.dereference and statres.st_nlink > 1 and \
1843 inode in self.inodes and arcname != self.inodes[inode]:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001844 # Is it a hardlink to an already
1845 # archived file?
1846 type = LNKTYPE
1847 linkname = self.inodes[inode]
1848 else:
1849 # The inode is added only if its valid.
1850 # For win32 it is always 0.
1851 type = REGTYPE
1852 if inode[0]:
1853 self.inodes[inode] = arcname
1854 elif stat.S_ISDIR(stmd):
1855 type = DIRTYPE
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001856 elif stat.S_ISFIFO(stmd):
1857 type = FIFOTYPE
1858 elif stat.S_ISLNK(stmd):
1859 type = SYMTYPE
1860 linkname = os.readlink(name)
1861 elif stat.S_ISCHR(stmd):
1862 type = CHRTYPE
1863 elif stat.S_ISBLK(stmd):
1864 type = BLKTYPE
1865 else:
1866 return None
1867
1868 # Fill the TarInfo object with all
1869 # information we can get.
Martin v. Löwis5dbdc592005-08-27 10:07:56 +00001870 tarinfo.name = arcname
1871 tarinfo.mode = stmd
1872 tarinfo.uid = statres.st_uid
1873 tarinfo.gid = statres.st_gid
1874 if stat.S_ISREG(stmd):
Martin v. Löwis61d77e02004-08-20 06:35:46 +00001875 tarinfo.size = statres.st_size
Martin v. Löwis5dbdc592005-08-27 10:07:56 +00001876 else:
Guido van Rossume2a383d2007-01-15 16:59:06 +00001877 tarinfo.size = 0
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001878 tarinfo.mtime = statres.st_mtime
Martin v. Löwis5dbdc592005-08-27 10:07:56 +00001879 tarinfo.type = type
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001880 tarinfo.linkname = linkname
1881 if pwd:
1882 try:
1883 tarinfo.uname = pwd.getpwuid(tarinfo.uid)[0]
1884 except KeyError:
1885 pass
1886 if grp:
1887 try:
1888 tarinfo.gname = grp.getgrgid(tarinfo.gid)[0]
1889 except KeyError:
1890 pass
1891
1892 if type in (CHRTYPE, BLKTYPE):
1893 if hasattr(os, "major") and hasattr(os, "minor"):
1894 tarinfo.devmajor = os.major(statres.st_rdev)
1895 tarinfo.devminor = os.minor(statres.st_rdev)
1896 return tarinfo
1897
1898 def list(self, verbose=True):
1899 """Print a table of contents to sys.stdout. If `verbose' is False, only
1900 the names of the members are printed. If it is True, an `ls -l'-like
1901 output is produced.
1902 """
1903 self._check()
1904
1905 for tarinfo in self:
1906 if verbose:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001907 print(filemode(tarinfo.mode), end=' ')
1908 print("%s/%s" % (tarinfo.uname or tarinfo.uid,
1909 tarinfo.gname or tarinfo.gid), end=' ')
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001910 if tarinfo.ischr() or tarinfo.isblk():
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001911 print("%10s" % ("%d,%d" \
1912 % (tarinfo.devmajor, tarinfo.devminor)), end=' ')
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001913 else:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001914 print("%10d" % tarinfo.size, end=' ')
1915 print("%d-%02d-%02d %02d:%02d:%02d" \
1916 % time.localtime(tarinfo.mtime)[:6], end=' ')
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001917
Guido van Rossumd8faa362007-04-27 19:54:29 +00001918 print(tarinfo.name + ("/" if tarinfo.isdir() else ""), end=' ')
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001919
1920 if verbose:
1921 if tarinfo.issym():
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001922 print("->", tarinfo.linkname, end=' ')
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001923 if tarinfo.islnk():
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001924 print("link to", tarinfo.linkname, end=' ')
1925 print()
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001926
Lars Gustäbel049d2aa2009-09-12 10:44:00 +00001927 def add(self, name, arcname=None, recursive=True, exclude=None, filter=None):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001928 """Add the file `name' to the archive. `name' may be any type of file
1929 (directory, fifo, symbolic link, etc.). If given, `arcname'
1930 specifies an alternative name for the file in the archive.
1931 Directories are added recursively by default. This can be avoided by
Guido van Rossum486364b2007-06-30 05:01:58 +00001932 setting `recursive' to False. `exclude' is a function that should
Lars Gustäbel049d2aa2009-09-12 10:44:00 +00001933 return True for each filename to be excluded. `filter' is a function
1934 that expects a TarInfo object argument and returns the changed
1935 TarInfo object, if it returns None the TarInfo object will be
1936 excluded from the archive.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001937 """
1938 self._check("aw")
1939
1940 if arcname is None:
1941 arcname = name
1942
Guido van Rossum486364b2007-06-30 05:01:58 +00001943 # Exclude pathnames.
Lars Gustäbel049d2aa2009-09-12 10:44:00 +00001944 if exclude is not None:
1945 import warnings
1946 warnings.warn("use the filter argument instead",
1947 DeprecationWarning, 2)
1948 if exclude(name):
1949 self._dbg(2, "tarfile: Excluded %r" % name)
1950 return
Guido van Rossum486364b2007-06-30 05:01:58 +00001951
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001952 # Skip if somebody tries to archive the archive...
Thomas Wouters902d6eb2007-01-09 23:18:33 +00001953 if self.name is not None and os.path.abspath(name) == self.name:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001954 self._dbg(2, "tarfile: Skipped %r" % name)
1955 return
1956
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001957 self._dbg(1, name)
1958
1959 # Create a TarInfo object from the file.
1960 tarinfo = self.gettarinfo(name, arcname)
1961
1962 if tarinfo is None:
1963 self._dbg(1, "tarfile: Unsupported type %r" % name)
1964 return
1965
Lars Gustäbel049d2aa2009-09-12 10:44:00 +00001966 # Change or exclude the TarInfo object.
1967 if filter is not None:
1968 tarinfo = filter(tarinfo)
1969 if tarinfo is None:
1970 self._dbg(2, "tarfile: Excluded %r" % name)
1971 return
1972
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001973 # Append the tar header and data to the archive.
1974 if tarinfo.isreg():
Guido van Rossume7ba4952007-06-06 23:52:48 +00001975 f = bltn_open(name, "rb")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001976 self.addfile(tarinfo, f)
1977 f.close()
1978
Martin v. Löwis5dbdc592005-08-27 10:07:56 +00001979 elif tarinfo.isdir():
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001980 self.addfile(tarinfo)
1981 if recursive:
1982 for f in os.listdir(name):
Lars Gustäbel049d2aa2009-09-12 10:44:00 +00001983 self.add(os.path.join(name, f), os.path.join(arcname, f),
1984 recursive, exclude, filter)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001985
Martin v. Löwis5dbdc592005-08-27 10:07:56 +00001986 else:
1987 self.addfile(tarinfo)
1988
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001989 def addfile(self, tarinfo, fileobj=None):
1990 """Add the TarInfo object `tarinfo' to the archive. If `fileobj' is
1991 given, tarinfo.size bytes are read from it and added to the archive.
1992 You can create TarInfo objects using gettarinfo().
1993 On Windows platforms, `fileobj' should always be opened with mode
1994 'rb' to avoid irritation about the file size.
1995 """
1996 self._check("aw")
1997
Thomas Wouters89f507f2006-12-13 04:49:30 +00001998 tarinfo = copy.copy(tarinfo)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001999
Guido van Rossume7ba4952007-06-06 23:52:48 +00002000 buf = tarinfo.tobuf(self.format, self.encoding, self.errors)
Thomas Wouters89f507f2006-12-13 04:49:30 +00002001 self.fileobj.write(buf)
2002 self.offset += len(buf)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002003
2004 # If there's data to follow, append it.
2005 if fileobj is not None:
2006 copyfileobj(fileobj, self.fileobj, tarinfo.size)
2007 blocks, remainder = divmod(tarinfo.size, BLOCKSIZE)
2008 if remainder > 0:
2009 self.fileobj.write(NUL * (BLOCKSIZE - remainder))
2010 blocks += 1
2011 self.offset += blocks * BLOCKSIZE
2012
Martin v. Löwisf3c56112004-09-18 09:08:52 +00002013 self.members.append(tarinfo)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002014
Martin v. Löwis00a73e72005-03-04 19:40:34 +00002015 def extractall(self, path=".", members=None):
2016 """Extract all members from the archive to the current working
2017 directory and set owner, modification time and permissions on
2018 directories afterwards. `path' specifies a different directory
2019 to extract to. `members' is optional and must be a subset of the
2020 list returned by getmembers().
2021 """
2022 directories = []
2023
2024 if members is None:
2025 members = self
2026
2027 for tarinfo in members:
2028 if tarinfo.isdir():
Christian Heimes2202f872008-02-06 14:31:34 +00002029 # Extract directories with a safe mode.
Martin v. Löwis00a73e72005-03-04 19:40:34 +00002030 directories.append(tarinfo)
Christian Heimes2202f872008-02-06 14:31:34 +00002031 tarinfo = copy.copy(tarinfo)
2032 tarinfo.mode = 0o700
2033 self.extract(tarinfo, path)
Martin v. Löwis00a73e72005-03-04 19:40:34 +00002034
2035 # Reverse sort directories.
Raymond Hettingerd4cb56d2008-01-30 02:55:10 +00002036 directories.sort(key=lambda a: a.name)
Martin v. Löwis00a73e72005-03-04 19:40:34 +00002037 directories.reverse()
2038
2039 # Set correct owner, mtime and filemode on directories.
2040 for tarinfo in directories:
Christian Heimesfaf2f632008-01-06 16:59:19 +00002041 dirpath = os.path.join(path, tarinfo.name)
Martin v. Löwis00a73e72005-03-04 19:40:34 +00002042 try:
Christian Heimesfaf2f632008-01-06 16:59:19 +00002043 self.chown(tarinfo, dirpath)
2044 self.utime(tarinfo, dirpath)
2045 self.chmod(tarinfo, dirpath)
Guido van Rossumb940e112007-01-10 16:19:56 +00002046 except ExtractError as e:
Martin v. Löwis00a73e72005-03-04 19:40:34 +00002047 if self.errorlevel > 1:
2048 raise
2049 else:
2050 self._dbg(1, "tarfile: %s" % e)
2051
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002052 def extract(self, member, path=""):
2053 """Extract a member from the archive to the current working directory,
2054 using its full name. Its file information is extracted as accurately
2055 as possible. `member' may be a filename or a TarInfo object. You can
2056 specify a different directory using `path'.
2057 """
2058 self._check("r")
2059
Guido van Rossum3172c5d2007-10-16 18:12:55 +00002060 if isinstance(member, str):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002061 tarinfo = self.getmember(member)
Guido van Rossumd8faa362007-04-27 19:54:29 +00002062 else:
2063 tarinfo = member
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002064
Neal Norwitza4f651a2004-07-20 22:07:44 +00002065 # Prepare the link target for makelink().
2066 if tarinfo.islnk():
2067 tarinfo._link_target = os.path.join(path, tarinfo.linkname)
2068
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002069 try:
2070 self._extract_member(tarinfo, os.path.join(path, tarinfo.name))
Guido van Rossumb940e112007-01-10 16:19:56 +00002071 except EnvironmentError as e:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002072 if self.errorlevel > 0:
2073 raise
2074 else:
2075 if e.filename is None:
2076 self._dbg(1, "tarfile: %s" % e.strerror)
2077 else:
2078 self._dbg(1, "tarfile: %s %r" % (e.strerror, e.filename))
Guido van Rossumb940e112007-01-10 16:19:56 +00002079 except ExtractError as e:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002080 if self.errorlevel > 1:
2081 raise
2082 else:
2083 self._dbg(1, "tarfile: %s" % e)
2084
2085 def extractfile(self, member):
2086 """Extract a member from the archive as a file object. `member' may be
2087 a filename or a TarInfo object. If `member' is a regular file, a
2088 file-like object is returned. If `member' is a link, a file-like
2089 object is constructed from the link's target. If `member' is none of
2090 the above, None is returned.
2091 The file-like object is read-only and provides the following
2092 methods: read(), readline(), readlines(), seek() and tell()
2093 """
2094 self._check("r")
2095
Guido van Rossum3172c5d2007-10-16 18:12:55 +00002096 if isinstance(member, str):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002097 tarinfo = self.getmember(member)
Guido van Rossumd8faa362007-04-27 19:54:29 +00002098 else:
2099 tarinfo = member
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002100
2101 if tarinfo.isreg():
2102 return self.fileobject(self, tarinfo)
2103
2104 elif tarinfo.type not in SUPPORTED_TYPES:
2105 # If a member's type is unknown, it is treated as a
2106 # regular file.
2107 return self.fileobject(self, tarinfo)
2108
2109 elif tarinfo.islnk() or tarinfo.issym():
2110 if isinstance(self.fileobj, _Stream):
2111 # A small but ugly workaround for the case that someone tries
2112 # to extract a (sym)link as a file-object from a non-seekable
2113 # stream of tar blocks.
Thomas Wouters477c8d52006-05-27 19:21:47 +00002114 raise StreamError("cannot extract (sym)link as file object")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002115 else:
Georg Brandl7eb4b7d2005-07-22 21:49:32 +00002116 # A (sym)link's file object is its target's file object.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002117 return self.extractfile(self._getmember(tarinfo.linkname,
2118 tarinfo))
2119 else:
2120 # If there's no data associated with the member (directory, chrdev,
2121 # blkdev, etc.), return None instead of a file object.
2122 return None
2123
2124 def _extract_member(self, tarinfo, targetpath):
2125 """Extract the TarInfo object tarinfo to a physical
2126 file called targetpath.
2127 """
2128 # Fetch the TarInfo object for the given name
2129 # and build the destination pathname, replacing
2130 # forward slashes to platform specific separators.
Lars Gustäbelbfdfdda2009-08-28 19:59:59 +00002131 targetpath = targetpath.rstrip("/")
2132 targetpath = targetpath.replace("/", os.sep)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002133
2134 # Create all upper directories.
2135 upperdirs = os.path.dirname(targetpath)
2136 if upperdirs and not os.path.exists(upperdirs):
Christian Heimes2202f872008-02-06 14:31:34 +00002137 # Create directories that are not part of the archive with
2138 # default permissions.
Thomas Woutersb2137042007-02-01 18:02:27 +00002139 os.makedirs(upperdirs)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002140
2141 if tarinfo.islnk() or tarinfo.issym():
2142 self._dbg(1, "%s -> %s" % (tarinfo.name, tarinfo.linkname))
2143 else:
2144 self._dbg(1, tarinfo.name)
2145
2146 if tarinfo.isreg():
2147 self.makefile(tarinfo, targetpath)
2148 elif tarinfo.isdir():
2149 self.makedir(tarinfo, targetpath)
2150 elif tarinfo.isfifo():
2151 self.makefifo(tarinfo, targetpath)
2152 elif tarinfo.ischr() or tarinfo.isblk():
2153 self.makedev(tarinfo, targetpath)
2154 elif tarinfo.islnk() or tarinfo.issym():
2155 self.makelink(tarinfo, targetpath)
2156 elif tarinfo.type not in SUPPORTED_TYPES:
2157 self.makeunknown(tarinfo, targetpath)
2158 else:
2159 self.makefile(tarinfo, targetpath)
2160
2161 self.chown(tarinfo, targetpath)
2162 if not tarinfo.issym():
2163 self.chmod(tarinfo, targetpath)
2164 self.utime(tarinfo, targetpath)
2165
2166 #--------------------------------------------------------------------------
2167 # Below are the different file methods. They are called via
2168 # _extract_member() when extract() is called. They can be replaced in a
2169 # subclass to implement other functionality.
2170
2171 def makedir(self, tarinfo, targetpath):
2172 """Make a directory called targetpath.
2173 """
2174 try:
Christian Heimes2202f872008-02-06 14:31:34 +00002175 # Use a safe mode for the directory, the real mode is set
2176 # later in _extract_member().
2177 os.mkdir(targetpath, 0o700)
Guido van Rossumb940e112007-01-10 16:19:56 +00002178 except EnvironmentError as e:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002179 if e.errno != errno.EEXIST:
2180 raise
2181
2182 def makefile(self, tarinfo, targetpath):
2183 """Make a file called targetpath.
2184 """
2185 source = self.extractfile(tarinfo)
Guido van Rossume7ba4952007-06-06 23:52:48 +00002186 target = bltn_open(targetpath, "wb")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002187 copyfileobj(source, target)
2188 source.close()
2189 target.close()
2190
2191 def makeunknown(self, tarinfo, targetpath):
2192 """Make a file from a TarInfo object with an unknown type
2193 at targetpath.
2194 """
2195 self.makefile(tarinfo, targetpath)
2196 self._dbg(1, "tarfile: Unknown file type %r, " \
2197 "extracted as regular file." % tarinfo.type)
2198
2199 def makefifo(self, tarinfo, targetpath):
2200 """Make a fifo called targetpath.
2201 """
2202 if hasattr(os, "mkfifo"):
2203 os.mkfifo(targetpath)
2204 else:
Thomas Wouters477c8d52006-05-27 19:21:47 +00002205 raise ExtractError("fifo not supported by system")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002206
2207 def makedev(self, tarinfo, targetpath):
2208 """Make a character or block device called targetpath.
2209 """
2210 if not hasattr(os, "mknod") or not hasattr(os, "makedev"):
Thomas Wouters477c8d52006-05-27 19:21:47 +00002211 raise ExtractError("special devices not supported by system")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002212
2213 mode = tarinfo.mode
2214 if tarinfo.isblk():
2215 mode |= stat.S_IFBLK
2216 else:
2217 mode |= stat.S_IFCHR
2218
2219 os.mknod(targetpath, mode,
2220 os.makedev(tarinfo.devmajor, tarinfo.devminor))
2221
2222 def makelink(self, tarinfo, targetpath):
2223 """Make a (symbolic) link called targetpath. If it cannot be created
2224 (platform limitation), we try to make a copy of the referenced file
2225 instead of a link.
2226 """
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002227 try:
2228 if tarinfo.issym():
Lars Gustäbelbfdfdda2009-08-28 19:59:59 +00002229 os.symlink(tarinfo.linkname, targetpath)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002230 else:
Neal Norwitza4f651a2004-07-20 22:07:44 +00002231 # See extract().
2232 os.link(tarinfo._link_target, targetpath)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002233 except AttributeError:
2234 if tarinfo.issym():
Lars Gustäbelbfdfdda2009-08-28 19:59:59 +00002235 linkpath = os.path.dirname(tarinfo.name) + "/" + \
2236 tarinfo.linkname
2237 else:
2238 linkpath = tarinfo.linkname
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002239
2240 try:
2241 self._extract_member(self.getmember(linkpath), targetpath)
Guido van Rossumb940e112007-01-10 16:19:56 +00002242 except (EnvironmentError, KeyError) as e:
Lars Gustäbelbfdfdda2009-08-28 19:59:59 +00002243 linkpath = linkpath.replace("/", os.sep)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002244 try:
2245 shutil.copy2(linkpath, targetpath)
Guido van Rossumb940e112007-01-10 16:19:56 +00002246 except EnvironmentError as e:
Thomas Wouters477c8d52006-05-27 19:21:47 +00002247 raise IOError("link could not be created")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002248
2249 def chown(self, tarinfo, targetpath):
2250 """Set owner of targetpath according to tarinfo.
2251 """
2252 if pwd and hasattr(os, "geteuid") and os.geteuid() == 0:
2253 # We have to be root to do so.
2254 try:
2255 g = grp.getgrnam(tarinfo.gname)[2]
2256 except KeyError:
2257 try:
2258 g = grp.getgrgid(tarinfo.gid)[2]
2259 except KeyError:
2260 g = os.getgid()
2261 try:
2262 u = pwd.getpwnam(tarinfo.uname)[2]
2263 except KeyError:
2264 try:
2265 u = pwd.getpwuid(tarinfo.uid)[2]
2266 except KeyError:
2267 u = os.getuid()
2268 try:
2269 if tarinfo.issym() and hasattr(os, "lchown"):
2270 os.lchown(targetpath, u, g)
2271 else:
Andrew MacIntyre7970d202003-02-19 12:51:34 +00002272 if sys.platform != "os2emx":
2273 os.chown(targetpath, u, g)
Guido van Rossumb940e112007-01-10 16:19:56 +00002274 except EnvironmentError as e:
Thomas Wouters477c8d52006-05-27 19:21:47 +00002275 raise ExtractError("could not change owner")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002276
2277 def chmod(self, tarinfo, targetpath):
2278 """Set file permissions of targetpath according to tarinfo.
2279 """
Jack Jansen834eff62003-03-07 12:47:06 +00002280 if hasattr(os, 'chmod'):
2281 try:
2282 os.chmod(targetpath, tarinfo.mode)
Guido van Rossumb940e112007-01-10 16:19:56 +00002283 except EnvironmentError as e:
Thomas Wouters477c8d52006-05-27 19:21:47 +00002284 raise ExtractError("could not change mode")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002285
2286 def utime(self, tarinfo, targetpath):
2287 """Set modification time of targetpath according to tarinfo.
2288 """
Jack Jansen834eff62003-03-07 12:47:06 +00002289 if not hasattr(os, 'utime'):
Tim Petersf9347782003-03-07 15:36:41 +00002290 return
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002291 try:
2292 os.utime(targetpath, (tarinfo.mtime, tarinfo.mtime))
Guido van Rossumb940e112007-01-10 16:19:56 +00002293 except EnvironmentError as e:
Thomas Wouters477c8d52006-05-27 19:21:47 +00002294 raise ExtractError("could not change modification time")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002295
2296 #--------------------------------------------------------------------------
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002297 def next(self):
2298 """Return the next member of the archive as a TarInfo object, when
2299 TarFile is opened for reading. Return None if there is no more
2300 available.
2301 """
2302 self._check("ra")
2303 if self.firstmember is not None:
2304 m = self.firstmember
2305 self.firstmember = None
2306 return m
2307
2308 # Read the next block.
Andrew M. Kuchling864bba12004-07-10 22:02:11 +00002309 self.fileobj.seek(self.offset)
Lars Gustäbel9520a432009-11-22 18:48:49 +00002310 tarinfo = None
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002311 while True:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002312 try:
Guido van Rossumd8faa362007-04-27 19:54:29 +00002313 tarinfo = self.tarinfo.fromtarfile(self)
Lars Gustäbel9520a432009-11-22 18:48:49 +00002314 except EOFHeaderError as e:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002315 if self.ignore_zeros:
Thomas Wouters902d6eb2007-01-09 23:18:33 +00002316 self._dbg(2, "0x%X: %s" % (self.offset, e))
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002317 self.offset += BLOCKSIZE
2318 continue
Lars Gustäbel9520a432009-11-22 18:48:49 +00002319 except InvalidHeaderError as e:
2320 if self.ignore_zeros:
2321 self._dbg(2, "0x%X: %s" % (self.offset, e))
2322 self.offset += BLOCKSIZE
2323 continue
2324 elif self.offset == 0:
2325 raise ReadError(str(e))
2326 except EmptyHeaderError:
2327 if self.offset == 0:
2328 raise ReadError("empty file")
2329 except TruncatedHeaderError as e:
2330 if self.offset == 0:
2331 raise ReadError(str(e))
2332 except SubsequentHeaderError as e:
2333 raise ReadError(str(e))
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002334 break
2335
Lars Gustäbel9520a432009-11-22 18:48:49 +00002336 if tarinfo is not None:
2337 self.members.append(tarinfo)
2338 else:
2339 self._loaded = True
2340
Thomas Wouters477c8d52006-05-27 19:21:47 +00002341 return tarinfo
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002342
2343 #--------------------------------------------------------------------------
2344 # Little helper methods:
2345
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002346 def _getmember(self, name, tarinfo=None):
2347 """Find an archive member by name from bottom to top.
2348 If tarinfo is given, it is used as the starting point.
2349 """
Martin v. Löwisf3c56112004-09-18 09:08:52 +00002350 # Ensure that all members have been loaded.
2351 members = self.getmembers()
2352
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002353 if tarinfo is None:
Martin v. Löwisf3c56112004-09-18 09:08:52 +00002354 end = len(members)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002355 else:
Martin v. Löwisf3c56112004-09-18 09:08:52 +00002356 end = members.index(tarinfo)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002357
Guido van Rossum805365e2007-05-07 22:24:25 +00002358 for i in range(end - 1, -1, -1):
Martin v. Löwisf3c56112004-09-18 09:08:52 +00002359 if name == members[i].name:
2360 return members[i]
Andrew M. Kuchling864bba12004-07-10 22:02:11 +00002361
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002362 def _load(self):
2363 """Read through the entire archive file and look for readable
2364 members.
2365 """
2366 while True:
2367 tarinfo = self.next()
2368 if tarinfo is None:
2369 break
2370 self._loaded = True
2371
2372 def _check(self, mode=None):
2373 """Check if TarFile is still open, and if the operation's mode
2374 corresponds to TarFile's mode.
2375 """
2376 if self.closed:
Thomas Wouters477c8d52006-05-27 19:21:47 +00002377 raise IOError("%s is closed" % self.__class__.__name__)
Guido van Rossumd8faa362007-04-27 19:54:29 +00002378 if mode is not None and self.mode not in mode:
2379 raise IOError("bad operation for mode %r" % self.mode)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002380
2381 def __iter__(self):
2382 """Provide an iterator object.
2383 """
2384 if self._loaded:
2385 return iter(self.members)
2386 else:
2387 return TarIter(self)
2388
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002389 def _dbg(self, level, msg):
2390 """Write debugging output to sys.stderr.
2391 """
2392 if level <= self.debug:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00002393 print(msg, file=sys.stderr)
Lars Gustäbel01385812010-03-03 12:08:54 +00002394
2395 def __enter__(self):
2396 self._check()
2397 return self
2398
2399 def __exit__(self, type, value, traceback):
2400 if type is None:
2401 self.close()
2402 else:
2403 # An exception occurred. We must not call close() because
2404 # it would try to write end-of-archive blocks and padding.
2405 if not self._extfileobj:
2406 self.fileobj.close()
2407 self.closed = True
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002408# class TarFile
2409
2410class TarIter:
2411 """Iterator Class.
2412
2413 for tarinfo in TarFile(...):
2414 suite...
2415 """
2416
2417 def __init__(self, tarfile):
2418 """Construct a TarIter object.
2419 """
2420 self.tarfile = tarfile
Martin v. Löwis637431b2005-03-03 23:12:42 +00002421 self.index = 0
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002422 def __iter__(self):
2423 """Return iterator object.
2424 """
2425 return self
Georg Brandla18af4e2007-04-21 15:47:16 +00002426 def __next__(self):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002427 """Return the next item using TarFile's next() method.
2428 When all members have been read, set TarFile as _loaded.
2429 """
Martin v. Löwis637431b2005-03-03 23:12:42 +00002430 # Fix for SF #1100429: Under rare circumstances it can
2431 # happen that getmembers() is called during iteration,
2432 # which will cause TarIter to stop prematurely.
2433 if not self.tarfile._loaded:
2434 tarinfo = self.tarfile.next()
2435 if not tarinfo:
2436 self.tarfile._loaded = True
2437 raise StopIteration
2438 else:
2439 try:
2440 tarinfo = self.tarfile.members[self.index]
2441 except IndexError:
2442 raise StopIteration
2443 self.index += 1
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002444 return tarinfo
2445
2446# Helper classes for sparse file support
2447class _section:
2448 """Base class for _data and _hole.
2449 """
2450 def __init__(self, offset, size):
2451 self.offset = offset
2452 self.size = size
2453 def __contains__(self, offset):
2454 return self.offset <= offset < self.offset + self.size
2455
2456class _data(_section):
2457 """Represent a data section in a sparse file.
2458 """
2459 def __init__(self, offset, size, realpos):
2460 _section.__init__(self, offset, size)
2461 self.realpos = realpos
2462
2463class _hole(_section):
2464 """Represent a hole section in a sparse file.
2465 """
2466 pass
2467
2468class _ringbuffer(list):
2469 """Ringbuffer class which increases performance
2470 over a regular list.
2471 """
2472 def __init__(self):
2473 self.idx = 0
2474 def find(self, offset):
2475 idx = self.idx
2476 while True:
2477 item = self[idx]
2478 if offset in item:
2479 break
2480 idx += 1
2481 if idx == len(self):
2482 idx = 0
2483 if idx == self.idx:
2484 # End of File
2485 return None
2486 self.idx = idx
2487 return item
2488
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002489#--------------------
2490# exported functions
2491#--------------------
2492def is_tarfile(name):
2493 """Return True if name points to a tar archive that we
2494 are able to handle, else return False.
2495 """
2496 try:
2497 t = open(name)
2498 t.close()
2499 return True
2500 except TarError:
2501 return False
2502
Guido van Rossume7ba4952007-06-06 23:52:48 +00002503bltn_open = open
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002504open = TarFile.open