blob: 98b774a218b75698e0e79a11e459aaecee2eb77e [file] [log] [blame]
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001#!/usr/bin/env python
2# -*- coding: iso-8859-1 -*-
3#-------------------------------------------------------------------
4# tarfile.py
5#-------------------------------------------------------------------
6# Copyright (C) 2002 Lars Gustäbel <lars@gustaebel.de>
7# All rights reserved.
8#
9# Permission is hereby granted, free of charge, to any person
10# obtaining a copy of this software and associated documentation
11# files (the "Software"), to deal in the Software without
12# restriction, including without limitation the rights to use,
13# copy, modify, merge, publish, distribute, sublicense, and/or sell
14# copies of the Software, and to permit persons to whom the
15# Software is furnished to do so, subject to the following
16# conditions:
17#
18# The above copyright notice and this permission notice shall be
19# included in all copies or substantial portions of the Software.
20#
21# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
22# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
23# OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
24# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
25# HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
26# WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
27# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
28# OTHER DEALINGS IN THE SOFTWARE.
29#
30"""Read from and write to tar format archives.
31"""
32
33__version__ = "$Revision$"
34# $Source$
35
Guido van Rossumd8faa362007-04-27 19:54:29 +000036version = "0.9.0"
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +000037__author__ = "Lars Gustäbel (lars@gustaebel.de)"
38__date__ = "$Date$"
39__cvsid__ = "$Id$"
40__credits__ = "Gustavo Niemeyer, Niels Gustäbel, Richard Townsend."
41
42#---------
43# Imports
44#---------
45import sys
46import os
47import shutil
48import stat
49import errno
50import time
51import struct
Thomas Wouters89f507f2006-12-13 04:49:30 +000052import copy
Guido van Rossumd8faa362007-04-27 19:54:29 +000053import re
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +000054
Jack Jansencfc49022003-03-07 13:37:32 +000055if sys.platform == 'mac':
56 # This module needs work for MacOS9, especially in the area of pathname
57 # handling. In many places it is assumed a simple substitution of / by the
58 # local os.path.sep is good enough to convert pathnames, but this does not
59 # work with the mac rooted:path:name versus :nonrooted:path:name syntax
60 raise ImportError, "tarfile does not work for platform==mac"
61
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +000062try:
63 import grp, pwd
64except ImportError:
65 grp = pwd = None
66
67# from tarfile import *
68__all__ = ["TarFile", "TarInfo", "is_tarfile", "TarError"]
69
Guido van Rossum8f78fe92006-08-24 04:03:53 +000070from __builtin__ import open as _open # Since 'open' is TarFile.open
71
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +000072#---------------------------------------------------------
73# tar constants
74#---------------------------------------------------------
Lars Gustäbelb506dc32007-08-07 18:36:16 +000075NUL = b"\0" # the null character
Guido van Rossumd8faa362007-04-27 19:54:29 +000076BLOCKSIZE = 512 # length of processing blocks
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +000077RECORDSIZE = BLOCKSIZE * 20 # length of records
Lars Gustäbelb506dc32007-08-07 18:36:16 +000078GNU_MAGIC = b"ustar \0" # magic gnu tar string
79POSIX_MAGIC = b"ustar\x0000" # magic posix tar string
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +000080
Guido van Rossumd8faa362007-04-27 19:54:29 +000081LENGTH_NAME = 100 # maximum length of a filename
82LENGTH_LINK = 100 # maximum length of a linkname
83LENGTH_PREFIX = 155 # maximum length of the prefix field
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +000084
Lars Gustäbelb506dc32007-08-07 18:36:16 +000085REGTYPE = b"0" # regular file
86AREGTYPE = b"\0" # regular file
87LNKTYPE = b"1" # link (inside tarfile)
88SYMTYPE = b"2" # symbolic link
89CHRTYPE = b"3" # character special device
90BLKTYPE = b"4" # block special device
91DIRTYPE = b"5" # directory
92FIFOTYPE = b"6" # fifo special device
93CONTTYPE = b"7" # contiguous file
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +000094
Lars Gustäbelb506dc32007-08-07 18:36:16 +000095GNUTYPE_LONGNAME = b"L" # GNU tar longname
96GNUTYPE_LONGLINK = b"K" # GNU tar longlink
97GNUTYPE_SPARSE = b"S" # GNU tar sparse file
Guido van Rossumd8faa362007-04-27 19:54:29 +000098
Lars Gustäbelb506dc32007-08-07 18:36:16 +000099XHDTYPE = b"x" # POSIX.1-2001 extended header
100XGLTYPE = b"g" # POSIX.1-2001 global header
101SOLARIS_XHDTYPE = b"X" # Solaris extended header
Guido van Rossumd8faa362007-04-27 19:54:29 +0000102
103USTAR_FORMAT = 0 # POSIX.1-1988 (ustar) format
104GNU_FORMAT = 1 # GNU tar format
105PAX_FORMAT = 2 # POSIX.1-2001 (pax) format
106DEFAULT_FORMAT = GNU_FORMAT
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000107
108#---------------------------------------------------------
109# tarfile constants
110#---------------------------------------------------------
Guido van Rossumd8faa362007-04-27 19:54:29 +0000111# File types that tarfile supports:
112SUPPORTED_TYPES = (REGTYPE, AREGTYPE, LNKTYPE,
113 SYMTYPE, DIRTYPE, FIFOTYPE,
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000114 CONTTYPE, CHRTYPE, BLKTYPE,
115 GNUTYPE_LONGNAME, GNUTYPE_LONGLINK,
116 GNUTYPE_SPARSE)
117
Guido van Rossumd8faa362007-04-27 19:54:29 +0000118# File types that will be treated as a regular file.
119REGULAR_TYPES = (REGTYPE, AREGTYPE,
120 CONTTYPE, GNUTYPE_SPARSE)
121
122# File types that are part of the GNU tar format.
123GNU_TYPES = (GNUTYPE_LONGNAME, GNUTYPE_LONGLINK,
124 GNUTYPE_SPARSE)
125
126# Fields from a pax header that override a TarInfo attribute.
127PAX_FIELDS = ("path", "linkpath", "size", "mtime",
128 "uid", "gid", "uname", "gname")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000129
Guido van Rossume7ba4952007-06-06 23:52:48 +0000130# Fields in a pax header that are numbers, all other fields
131# are treated as strings.
132PAX_NUMBER_FIELDS = {
133 "atime": float,
134 "ctime": float,
135 "mtime": float,
136 "uid": int,
137 "gid": int,
138 "size": int
139}
140
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000141#---------------------------------------------------------
142# Bits used in the mode field, values in octal.
143#---------------------------------------------------------
Guido van Rossumcd16bf62007-06-13 18:07:49 +0000144S_IFLNK = 0o120000 # symbolic link
145S_IFREG = 0o100000 # regular file
146S_IFBLK = 0o060000 # block device
147S_IFDIR = 0o040000 # directory
148S_IFCHR = 0o020000 # character device
149S_IFIFO = 0o010000 # fifo
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000150
Guido van Rossumcd16bf62007-06-13 18:07:49 +0000151TSUID = 0o4000 # set UID on execution
152TSGID = 0o2000 # set GID on execution
153TSVTX = 0o1000 # reserved
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000154
Guido van Rossumcd16bf62007-06-13 18:07:49 +0000155TUREAD = 0o400 # read by owner
156TUWRITE = 0o200 # write by owner
157TUEXEC = 0o100 # execute/search by owner
158TGREAD = 0o040 # read by group
159TGWRITE = 0o020 # write by group
160TGEXEC = 0o010 # execute/search by group
161TOREAD = 0o004 # read by other
162TOWRITE = 0o002 # write by other
163TOEXEC = 0o001 # execute/search by other
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000164
165#---------------------------------------------------------
Guido van Rossumd8faa362007-04-27 19:54:29 +0000166# initialization
167#---------------------------------------------------------
168ENCODING = sys.getfilesystemencoding()
169if ENCODING is None:
Lars Gustäbel3741eff2007-08-21 12:17:05 +0000170 ENCODING = "ascii"
Guido van Rossumd8faa362007-04-27 19:54:29 +0000171
172#---------------------------------------------------------
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000173# Some useful functions
174#---------------------------------------------------------
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000175
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000176def stn(s, length, encoding, errors):
177 """Convert a string to a null-terminated bytes object.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000178 """
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000179 s = s.encode(encoding, errors)
Thomas Wouters89f507f2006-12-13 04:49:30 +0000180 return s[:length] + (length - len(s)) * NUL
Thomas Wouters477c8d52006-05-27 19:21:47 +0000181
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000182def nts(s, encoding, errors):
183 """Convert a null-terminated bytes object to a string.
Guido van Rossumd8faa362007-04-27 19:54:29 +0000184 """
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000185 p = s.find(b"\0")
186 if p != -1:
187 s = s[:p]
188 return s.decode(encoding, errors)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000189
Thomas Wouters477c8d52006-05-27 19:21:47 +0000190def nti(s):
191 """Convert a number field to a python number.
192 """
193 # There are two possible encodings for a number field, see
194 # itn() below.
Guido van Rossumcd16bf62007-06-13 18:07:49 +0000195 if s[0] != chr(0o200):
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000196 try:
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000197 n = int(nts(s, "ascii", "strict") or "0", 8)
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000198 except ValueError:
199 raise HeaderError("invalid header")
Thomas Wouters477c8d52006-05-27 19:21:47 +0000200 else:
Guido van Rossume2a383d2007-01-15 16:59:06 +0000201 n = 0
Guido van Rossum805365e2007-05-07 22:24:25 +0000202 for i in range(len(s) - 1):
Thomas Wouters477c8d52006-05-27 19:21:47 +0000203 n <<= 8
204 n += ord(s[i + 1])
205 return n
206
Guido van Rossumd8faa362007-04-27 19:54:29 +0000207def itn(n, digits=8, format=DEFAULT_FORMAT):
Thomas Wouters477c8d52006-05-27 19:21:47 +0000208 """Convert a python number to a number field.
209 """
210 # POSIX 1003.1-1988 requires numbers to be encoded as a string of
211 # octal digits followed by a null-byte, this allows values up to
212 # (8**(digits-1))-1. GNU tar allows storing numbers greater than
Guido van Rossumcd16bf62007-06-13 18:07:49 +0000213 # that if necessary. A leading 0o200 byte indicates this particular
Thomas Wouters477c8d52006-05-27 19:21:47 +0000214 # encoding, the following digits-1 bytes are a big-endian
215 # representation. This allows values up to (256**(digits-1))-1.
216 if 0 <= n < 8 ** (digits - 1):
Lars Gustäbela280ca752007-08-28 07:34:33 +0000217 s = bytes("%0*o" % (digits - 1, n), "ascii") + NUL
Thomas Wouters477c8d52006-05-27 19:21:47 +0000218 else:
Guido van Rossumd8faa362007-04-27 19:54:29 +0000219 if format != GNU_FORMAT or n >= 256 ** (digits - 1):
Thomas Wouters477c8d52006-05-27 19:21:47 +0000220 raise ValueError("overflow in number field")
221
222 if n < 0:
223 # XXX We mimic GNU tar's behaviour with negative numbers,
224 # this could raise OverflowError.
225 n = struct.unpack("L", struct.pack("l", n))[0]
226
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000227 s = b""
Guido van Rossum805365e2007-05-07 22:24:25 +0000228 for i in range(digits - 1):
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000229 s.insert(0, n & 0o377)
Thomas Wouters477c8d52006-05-27 19:21:47 +0000230 n >>= 8
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000231 s.insert(0, 0o200)
Thomas Wouters477c8d52006-05-27 19:21:47 +0000232 return s
233
234def calc_chksums(buf):
235 """Calculate the checksum for a member's header by summing up all
236 characters except for the chksum field which is treated as if
237 it was filled with spaces. According to the GNU tar sources,
238 some tars (Sun and NeXT) calculate chksum with signed char,
239 which will be different if there are chars in the buffer with
240 the high bit set. So we calculate two checksums, unsigned and
241 signed.
242 """
243 unsigned_chksum = 256 + sum(struct.unpack("148B", buf[:148]) + struct.unpack("356B", buf[156:512]))
244 signed_chksum = 256 + sum(struct.unpack("148b", buf[:148]) + struct.unpack("356b", buf[156:512]))
245 return unsigned_chksum, signed_chksum
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000246
247def copyfileobj(src, dst, length=None):
248 """Copy length bytes from fileobj src to fileobj dst.
249 If length is None, copy the entire content.
250 """
251 if length == 0:
252 return
253 if length is None:
254 shutil.copyfileobj(src, dst)
255 return
256
257 BUFSIZE = 16 * 1024
258 blocks, remainder = divmod(length, BUFSIZE)
Guido van Rossum805365e2007-05-07 22:24:25 +0000259 for b in range(blocks):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000260 buf = src.read(BUFSIZE)
261 if len(buf) < BUFSIZE:
Thomas Wouters477c8d52006-05-27 19:21:47 +0000262 raise IOError("end of file reached")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000263 dst.write(buf)
264
265 if remainder != 0:
266 buf = src.read(remainder)
267 if len(buf) < remainder:
Thomas Wouters477c8d52006-05-27 19:21:47 +0000268 raise IOError("end of file reached")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000269 dst.write(buf)
270 return
271
272filemode_table = (
Andrew M. Kuchling8bc462f2004-10-20 11:48:42 +0000273 ((S_IFLNK, "l"),
274 (S_IFREG, "-"),
275 (S_IFBLK, "b"),
276 (S_IFDIR, "d"),
277 (S_IFCHR, "c"),
278 (S_IFIFO, "p")),
279
280 ((TUREAD, "r"),),
281 ((TUWRITE, "w"),),
282 ((TUEXEC|TSUID, "s"),
283 (TSUID, "S"),
284 (TUEXEC, "x")),
285
286 ((TGREAD, "r"),),
287 ((TGWRITE, "w"),),
288 ((TGEXEC|TSGID, "s"),
289 (TSGID, "S"),
290 (TGEXEC, "x")),
291
292 ((TOREAD, "r"),),
293 ((TOWRITE, "w"),),
294 ((TOEXEC|TSVTX, "t"),
295 (TSVTX, "T"),
296 (TOEXEC, "x"))
297)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000298
299def filemode(mode):
300 """Convert a file's mode to a string of the form
301 -rwxrwxrwx.
302 Used by TarFile.list()
303 """
Andrew M. Kuchling8bc462f2004-10-20 11:48:42 +0000304 perm = []
305 for table in filemode_table:
306 for bit, char in table:
307 if mode & bit == bit:
308 perm.append(char)
309 break
310 else:
311 perm.append("-")
312 return "".join(perm)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000313
314if os.sep != "/":
315 normpath = lambda path: os.path.normpath(path).replace(os.sep, "/")
316else:
317 normpath = os.path.normpath
318
319class TarError(Exception):
320 """Base exception."""
321 pass
322class ExtractError(TarError):
323 """General exception for extract errors."""
324 pass
325class ReadError(TarError):
326 """Exception for unreadble tar archives."""
327 pass
328class CompressionError(TarError):
329 """Exception for unavailable compression methods."""
330 pass
331class StreamError(TarError):
332 """Exception for unsupported operations on stream-like TarFiles."""
333 pass
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000334class HeaderError(TarError):
335 """Exception for invalid headers."""
336 pass
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000337
338#---------------------------
339# internal stream interface
340#---------------------------
341class _LowLevelFile:
342 """Low-level file object. Supports reading and writing.
343 It is used instead of a regular file object for streaming
344 access.
345 """
346
347 def __init__(self, name, mode):
348 mode = {
349 "r": os.O_RDONLY,
350 "w": os.O_WRONLY | os.O_CREAT | os.O_TRUNC,
351 }[mode]
352 if hasattr(os, "O_BINARY"):
353 mode |= os.O_BINARY
354 self.fd = os.open(name, mode)
355
356 def close(self):
357 os.close(self.fd)
358
359 def read(self, size):
360 return os.read(self.fd, size)
361
362 def write(self, s):
363 os.write(self.fd, s)
364
365class _Stream:
366 """Class that serves as an adapter between TarFile and
367 a stream-like object. The stream-like object only
368 needs to have a read() or write() method and is accessed
369 blockwise. Use of gzip or bzip2 compression is possible.
370 A stream-like object could be for example: sys.stdin,
371 sys.stdout, a socket, a tape device etc.
372
373 _Stream is intended to be used only internally.
374 """
375
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000376 def __init__(self, name, mode, comptype, fileobj, bufsize):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000377 """Construct a _Stream object.
378 """
379 self._extfileobj = True
380 if fileobj is None:
381 fileobj = _LowLevelFile(name, mode)
382 self._extfileobj = False
383
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000384 if comptype == '*':
385 # Enable transparent compression detection for the
386 # stream interface
387 fileobj = _StreamProxy(fileobj)
388 comptype = fileobj.getcomptype()
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000389
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000390 self.name = name or ""
391 self.mode = mode
392 self.comptype = comptype
393 self.fileobj = fileobj
394 self.bufsize = bufsize
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000395 self.buf = b""
Guido van Rossume2a383d2007-01-15 16:59:06 +0000396 self.pos = 0
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000397 self.closed = False
398
399 if comptype == "gz":
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000400 try:
401 import zlib
402 except ImportError:
Thomas Wouters477c8d52006-05-27 19:21:47 +0000403 raise CompressionError("zlib module is not available")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000404 self.zlib = zlib
405 self.crc = zlib.crc32("")
406 if mode == "r":
407 self._init_read_gz()
408 else:
409 self._init_write_gz()
410
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000411 if comptype == "bz2":
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000412 try:
413 import bz2
414 except ImportError:
Thomas Wouters477c8d52006-05-27 19:21:47 +0000415 raise CompressionError("bz2 module is not available")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000416 if mode == "r":
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000417 self.dbuf = b""
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000418 self.cmp = bz2.BZ2Decompressor()
419 else:
420 self.cmp = bz2.BZ2Compressor()
421
422 def __del__(self):
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000423 if hasattr(self, "closed") and not self.closed:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000424 self.close()
425
426 def _init_write_gz(self):
427 """Initialize for writing with gzip compression.
428 """
429 self.cmp = self.zlib.compressobj(9, self.zlib.DEFLATED,
430 -self.zlib.MAX_WBITS,
431 self.zlib.DEF_MEM_LEVEL,
432 0)
Guido van Rossume2a383d2007-01-15 16:59:06 +0000433 timestamp = struct.pack("<L", int(time.time()))
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000434 self.__write(b"\037\213\010\010" + timestamp + b"\002\377")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000435 if self.name.endswith(".gz"):
436 self.name = self.name[:-3]
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000437 # RFC1952 says we must use ISO-8859-1 for the FNAME field.
438 self.__write(self.name.encode("iso-8859-1", "replace") + NUL)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000439
440 def write(self, s):
441 """Write string s to the stream.
442 """
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000443 if self.comptype == "gz":
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000444 self.crc = self.zlib.crc32(s, self.crc)
445 self.pos += len(s)
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000446 if self.comptype != "tar":
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000447 s = self.cmp.compress(s)
448 self.__write(s)
449
450 def __write(self, s):
451 """Write string s to the stream if a whole new block
452 is ready to be written.
453 """
454 self.buf += s
455 while len(self.buf) > self.bufsize:
456 self.fileobj.write(self.buf[:self.bufsize])
457 self.buf = self.buf[self.bufsize:]
458
459 def close(self):
460 """Close the _Stream object. No operation should be
461 done on it afterwards.
462 """
463 if self.closed:
464 return
465
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000466 if self.mode == "w" and self.comptype != "tar":
Martin v. Löwisc234a522004-08-22 21:28:33 +0000467 self.buf += self.cmp.flush()
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000468
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000469 if self.mode == "w" and self.buf:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000470 self.fileobj.write(self.buf)
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000471 self.buf = b""
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000472 if self.comptype == "gz":
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000473 # The native zlib crc is an unsigned 32-bit integer, but
474 # the Python wrapper implicitly casts that to a signed C
475 # long. So, on a 32-bit box self.crc may "look negative",
476 # while the same crc on a 64-bit box may "look positive".
477 # To avoid irksome warnings from the `struct` module, force
478 # it to look positive on all boxes.
Guido van Rossume2a383d2007-01-15 16:59:06 +0000479 self.fileobj.write(struct.pack("<L", self.crc & 0xffffffff))
480 self.fileobj.write(struct.pack("<L", self.pos & 0xffffFFFF))
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000481
482 if not self._extfileobj:
483 self.fileobj.close()
484
485 self.closed = True
486
487 def _init_read_gz(self):
488 """Initialize for reading a gzip compressed fileobj.
489 """
490 self.cmp = self.zlib.decompressobj(-self.zlib.MAX_WBITS)
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000491 self.dbuf = b""
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000492
493 # taken from gzip.GzipFile with some alterations
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000494 if self.__read(2) != b"\037\213":
Thomas Wouters477c8d52006-05-27 19:21:47 +0000495 raise ReadError("not a gzip file")
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000496 if self.__read(1) != b"\010":
Thomas Wouters477c8d52006-05-27 19:21:47 +0000497 raise CompressionError("unsupported compression method")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000498
499 flag = ord(self.__read(1))
500 self.__read(6)
501
502 if flag & 4:
503 xlen = ord(self.__read(1)) + 256 * ord(self.__read(1))
504 self.read(xlen)
505 if flag & 8:
506 while True:
507 s = self.__read(1)
508 if not s or s == NUL:
509 break
510 if flag & 16:
511 while True:
512 s = self.__read(1)
513 if not s or s == NUL:
514 break
515 if flag & 2:
516 self.__read(2)
517
518 def tell(self):
519 """Return the stream's file pointer position.
520 """
521 return self.pos
522
523 def seek(self, pos=0):
524 """Set the stream's file pointer to pos. Negative seeking
525 is forbidden.
526 """
527 if pos - self.pos >= 0:
528 blocks, remainder = divmod(pos - self.pos, self.bufsize)
Guido van Rossum805365e2007-05-07 22:24:25 +0000529 for i in range(blocks):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000530 self.read(self.bufsize)
531 self.read(remainder)
532 else:
Thomas Wouters477c8d52006-05-27 19:21:47 +0000533 raise StreamError("seeking backwards is not allowed")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000534 return self.pos
535
536 def read(self, size=None):
537 """Return the next size number of bytes from the stream.
538 If size is not defined, return all bytes of the stream
539 up to EOF.
540 """
541 if size is None:
542 t = []
543 while True:
544 buf = self._read(self.bufsize)
545 if not buf:
546 break
547 t.append(buf)
548 buf = "".join(t)
549 else:
550 buf = self._read(size)
551 self.pos += len(buf)
552 return buf
553
554 def _read(self, size):
555 """Return size bytes from the stream.
556 """
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000557 if self.comptype == "tar":
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000558 return self.__read(size)
559
560 c = len(self.dbuf)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000561 while c < size:
562 buf = self.__read(self.bufsize)
563 if not buf:
564 break
Guido van Rossumd8faa362007-04-27 19:54:29 +0000565 try:
566 buf = self.cmp.decompress(buf)
567 except IOError:
568 raise ReadError("invalid compressed data")
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000569 self.dbuf += buf
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000570 c += len(buf)
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000571 buf = self.dbuf[:size]
572 self.dbuf = self.dbuf[size:]
573 return buf
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000574
575 def __read(self, size):
576 """Return size bytes from stream. If internal buffer is empty,
577 read another block from the stream.
578 """
579 c = len(self.buf)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000580 while c < size:
581 buf = self.fileobj.read(self.bufsize)
582 if not buf:
583 break
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000584 self.buf += buf
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000585 c += len(buf)
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000586 buf = self.buf[:size]
587 self.buf = self.buf[size:]
588 return buf
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000589# class _Stream
590
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000591class _StreamProxy(object):
592 """Small proxy class that enables transparent compression
593 detection for the Stream interface (mode 'r|*').
594 """
595
596 def __init__(self, fileobj):
597 self.fileobj = fileobj
598 self.buf = self.fileobj.read(BLOCKSIZE)
599
600 def read(self, size):
601 self.read = self.fileobj.read
602 return self.buf
603
604 def getcomptype(self):
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000605 if self.buf.startswith(b"\037\213\010"):
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000606 return "gz"
Lars Gustäbela280ca752007-08-28 07:34:33 +0000607 if self.buf.startswith(b"BZh91"):
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000608 return "bz2"
609 return "tar"
610
611 def close(self):
612 self.fileobj.close()
613# class StreamProxy
614
Thomas Wouters477c8d52006-05-27 19:21:47 +0000615class _BZ2Proxy(object):
616 """Small proxy class that enables external file object
617 support for "r:bz2" and "w:bz2" modes. This is actually
618 a workaround for a limitation in bz2 module's BZ2File
619 class which (unlike gzip.GzipFile) has no support for
620 a file object argument.
621 """
622
623 blocksize = 16 * 1024
624
625 def __init__(self, fileobj, mode):
626 self.fileobj = fileobj
627 self.mode = mode
Guido van Rossumd8faa362007-04-27 19:54:29 +0000628 self.name = getattr(self.fileobj, "name", None)
Thomas Wouters477c8d52006-05-27 19:21:47 +0000629 self.init()
630
631 def init(self):
632 import bz2
633 self.pos = 0
634 if self.mode == "r":
635 self.bz2obj = bz2.BZ2Decompressor()
636 self.fileobj.seek(0)
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000637 self.buf = b""
Thomas Wouters477c8d52006-05-27 19:21:47 +0000638 else:
639 self.bz2obj = bz2.BZ2Compressor()
640
641 def read(self, size):
Thomas Wouters477c8d52006-05-27 19:21:47 +0000642 x = len(self.buf)
643 while x < size:
644 try:
645 raw = self.fileobj.read(self.blocksize)
646 data = self.bz2obj.decompress(raw)
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000647 self.buf += data
Thomas Wouters477c8d52006-05-27 19:21:47 +0000648 except EOFError:
649 break
650 x += len(data)
Thomas Wouters477c8d52006-05-27 19:21:47 +0000651
652 buf = self.buf[:size]
653 self.buf = self.buf[size:]
654 self.pos += len(buf)
655 return buf
656
657 def seek(self, pos):
658 if pos < self.pos:
659 self.init()
660 self.read(pos - self.pos)
661
662 def tell(self):
663 return self.pos
664
665 def write(self, data):
666 self.pos += len(data)
667 raw = self.bz2obj.compress(data)
668 self.fileobj.write(raw)
669
670 def close(self):
671 if self.mode == "w":
672 raw = self.bz2obj.flush()
673 self.fileobj.write(raw)
674 self.fileobj.close()
675# class _BZ2Proxy
676
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000677#------------------------
678# Extraction file object
679#------------------------
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000680class _FileInFile(object):
681 """A thin wrapper around an existing file object that
682 provides a part of its data as an individual file
683 object.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000684 """
685
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000686 def __init__(self, fileobj, offset, size, sparse=None):
687 self.fileobj = fileobj
688 self.offset = offset
689 self.size = size
690 self.sparse = sparse
691 self.position = 0
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000692
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000693 def seekable(self):
694 if not hasattr(self.fileobj, "seekable"):
695 # XXX gzip.GzipFile and bz2.BZ2File
696 return True
697 return self.fileobj.seekable()
698
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000699 def tell(self):
700 """Return the current file position.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000701 """
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000702 return self.position
703
704 def seek(self, position):
705 """Seek to a position in the file.
706 """
707 self.position = position
708
709 def read(self, size=None):
710 """Read data from the file.
711 """
712 if size is None:
713 size = self.size - self.position
714 else:
715 size = min(size, self.size - self.position)
716
717 if self.sparse is None:
718 return self.readnormal(size)
719 else:
720 return self.readsparse(size)
721
722 def readnormal(self, size):
723 """Read operation for regular files.
724 """
725 self.fileobj.seek(self.offset + self.position)
726 self.position += size
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000727 return self.fileobj.read(size)
728
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000729 def readsparse(self, size):
730 """Read operation for sparse files.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000731 """
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000732 data = b""
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000733 while size > 0:
734 buf = self.readsparsesection(size)
735 if not buf:
736 break
737 size -= len(buf)
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000738 data += buf
739 return data
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000740
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000741 def readsparsesection(self, size):
742 """Read a single section of a sparse file.
743 """
744 section = self.sparse.find(self.position)
745
746 if section is None:
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000747 return b""
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000748
749 size = min(size, section.offset + section.size - self.position)
750
751 if isinstance(section, _data):
752 realpos = section.realpos + self.position - section.offset
753 self.fileobj.seek(self.offset + realpos)
754 self.position += size
755 return self.fileobj.read(size)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000756 else:
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000757 self.position += size
758 return NUL * size
759#class _FileInFile
760
761
762class ExFileObject(object):
763 """File-like object for reading an archive member.
764 Is returned by TarFile.extractfile().
765 """
766 blocksize = 1024
767
768 def __init__(self, tarfile, tarinfo):
769 self.fileobj = _FileInFile(tarfile.fileobj,
770 tarinfo.offset_data,
771 tarinfo.size,
772 getattr(tarinfo, "sparse", None))
773 self.name = tarinfo.name
774 self.mode = "r"
775 self.closed = False
776 self.size = tarinfo.size
777
778 self.position = 0
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000779 self.buffer = b""
780
781 def readable(self):
782 return True
783
784 def writable(self):
785 return False
786
787 def seekable(self):
788 return self.fileobj.seekable()
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000789
790 def read(self, size=None):
791 """Read at most size bytes from the file. If size is not
792 present or None, read all data until EOF is reached.
793 """
794 if self.closed:
795 raise ValueError("I/O operation on closed file")
796
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000797 buf = b""
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000798 if self.buffer:
799 if size is None:
800 buf = self.buffer
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000801 self.buffer = b""
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000802 else:
803 buf = self.buffer[:size]
804 self.buffer = self.buffer[size:]
805
806 if size is None:
807 buf += self.fileobj.read()
808 else:
809 buf += self.fileobj.read(size - len(buf))
810
811 self.position += len(buf)
812 return buf
813
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000814 # XXX TextIOWrapper uses the read1() method.
815 read1 = read
816
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000817 def readline(self, size=-1):
818 """Read one entire line from the file. If size is present
819 and non-negative, return a string with at most that
820 size, which may be an incomplete line.
821 """
822 if self.closed:
823 raise ValueError("I/O operation on closed file")
824
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000825 pos = self.buffer.find(b"\n") + 1
826 if pos == 0:
827 # no newline found.
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000828 while True:
829 buf = self.fileobj.read(self.blocksize)
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000830 self.buffer += buf
831 if not buf or b"\n" in buf:
832 pos = self.buffer.find(b"\n") + 1
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000833 if pos == 0:
834 # no newline found.
835 pos = len(self.buffer)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000836 break
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000837
838 if size != -1:
839 pos = min(size, pos)
840
841 buf = self.buffer[:pos]
842 self.buffer = self.buffer[pos:]
843 self.position += len(buf)
844 return buf
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000845
846 def readlines(self):
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000847 """Return a list with all remaining lines.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000848 """
849 result = []
850 while True:
851 line = self.readline()
852 if not line: break
853 result.append(line)
854 return result
855
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000856 def tell(self):
857 """Return the current file position.
858 """
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000859 if self.closed:
860 raise ValueError("I/O operation on closed file")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000861
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000862 return self.position
863
864 def seek(self, pos, whence=os.SEEK_SET):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000865 """Seek to a position in the file.
866 """
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000867 if self.closed:
868 raise ValueError("I/O operation on closed file")
869
870 if whence == os.SEEK_SET:
871 self.position = min(max(pos, 0), self.size)
872 elif whence == os.SEEK_CUR:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000873 if pos < 0:
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000874 self.position = max(self.position + pos, 0)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000875 else:
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000876 self.position = min(self.position + pos, self.size)
877 elif whence == os.SEEK_END:
878 self.position = max(min(self.size + pos, self.size), 0)
879 else:
880 raise ValueError("Invalid argument")
881
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000882 self.buffer = b""
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000883 self.fileobj.seek(self.position)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000884
885 def close(self):
886 """Close the file object.
887 """
888 self.closed = True
Martin v. Löwisdf241532005-03-03 08:17:42 +0000889
890 def __iter__(self):
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000891 """Get an iterator over the file's lines.
Martin v. Löwisdf241532005-03-03 08:17:42 +0000892 """
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000893 while True:
894 line = self.readline()
895 if not line:
896 break
897 yield line
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000898#class ExFileObject
899
900#------------------
901# Exported Classes
902#------------------
903class TarInfo(object):
904 """Informational class which holds the details about an
905 archive member given by a tar header block.
906 TarInfo objects are returned by TarFile.getmember(),
907 TarFile.getmembers() and TarFile.gettarinfo() and are
908 usually created internally.
909 """
910
911 def __init__(self, name=""):
912 """Construct a TarInfo object. name is the optional name
913 of the member.
914 """
Guido van Rossumd8faa362007-04-27 19:54:29 +0000915 self.name = name # member name
Guido van Rossumcd16bf62007-06-13 18:07:49 +0000916 self.mode = 0o644 # file permissions
Thomas Wouters477c8d52006-05-27 19:21:47 +0000917 self.uid = 0 # user id
918 self.gid = 0 # group id
919 self.size = 0 # file size
920 self.mtime = 0 # modification time
921 self.chksum = 0 # header checksum
922 self.type = REGTYPE # member type
923 self.linkname = "" # link name
Guido van Rossumd8faa362007-04-27 19:54:29 +0000924 self.uname = "root" # user name
925 self.gname = "root" # group name
Thomas Wouters477c8d52006-05-27 19:21:47 +0000926 self.devmajor = 0 # device major number
927 self.devminor = 0 # device minor number
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000928
Thomas Wouters477c8d52006-05-27 19:21:47 +0000929 self.offset = 0 # the tar header starts here
930 self.offset_data = 0 # the file's data starts here
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000931
Guido van Rossumd8faa362007-04-27 19:54:29 +0000932 self.pax_headers = {} # pax header information
933
934 # In pax headers the "name" and "linkname" field are called
935 # "path" and "linkpath".
936 def _getpath(self):
937 return self.name
938 def _setpath(self, name):
939 self.name = name
940 path = property(_getpath, _setpath)
941
942 def _getlinkpath(self):
943 return self.linkname
944 def _setlinkpath(self, linkname):
945 self.linkname = linkname
946 linkpath = property(_getlinkpath, _setlinkpath)
947
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000948 def __repr__(self):
949 return "<%s %r at %#x>" % (self.__class__.__name__,self.name,id(self))
950
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000951 def get_info(self):
Guido van Rossumd8faa362007-04-27 19:54:29 +0000952 """Return the TarInfo's attributes as a dictionary.
953 """
954 info = {
955 "name": normpath(self.name),
Guido van Rossumcd16bf62007-06-13 18:07:49 +0000956 "mode": self.mode & 0o7777,
Guido van Rossumd8faa362007-04-27 19:54:29 +0000957 "uid": self.uid,
958 "gid": self.gid,
959 "size": self.size,
960 "mtime": self.mtime,
961 "chksum": self.chksum,
962 "type": self.type,
963 "linkname": normpath(self.linkname) if self.linkname else "",
964 "uname": self.uname,
965 "gname": self.gname,
966 "devmajor": self.devmajor,
967 "devminor": self.devminor
968 }
969
970 if info["type"] == DIRTYPE and not info["name"].endswith("/"):
971 info["name"] += "/"
972
973 return info
974
Guido van Rossume7ba4952007-06-06 23:52:48 +0000975 def tobuf(self, format=DEFAULT_FORMAT, encoding=ENCODING, errors="strict"):
Guido van Rossumd8faa362007-04-27 19:54:29 +0000976 """Return a tar header as a string of 512 byte blocks.
977 """
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000978 info = self.get_info()
Guido van Rossume7ba4952007-06-06 23:52:48 +0000979
Guido van Rossumd8faa362007-04-27 19:54:29 +0000980 if format == USTAR_FORMAT:
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000981 return self.create_ustar_header(info, encoding, errors)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000982 elif format == GNU_FORMAT:
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000983 return self.create_gnu_header(info, encoding, errors)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000984 elif format == PAX_FORMAT:
Lars Gustäbel3741eff2007-08-21 12:17:05 +0000985 return self.create_pax_header(info)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000986 else:
987 raise ValueError("invalid format")
988
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000989 def create_ustar_header(self, info, encoding, errors):
Guido van Rossumd8faa362007-04-27 19:54:29 +0000990 """Return the object as a ustar header block.
991 """
Guido van Rossumd8faa362007-04-27 19:54:29 +0000992 info["magic"] = POSIX_MAGIC
993
994 if len(info["linkname"]) > LENGTH_LINK:
995 raise ValueError("linkname is too long")
996
997 if len(info["name"]) > LENGTH_NAME:
998 info["prefix"], info["name"] = self._posix_split_name(info["name"])
999
Lars Gustäbelb506dc32007-08-07 18:36:16 +00001000 return self._create_header(info, USTAR_FORMAT, encoding, errors)
Guido van Rossumd8faa362007-04-27 19:54:29 +00001001
Lars Gustäbelb506dc32007-08-07 18:36:16 +00001002 def create_gnu_header(self, info, encoding, errors):
Guido van Rossumd8faa362007-04-27 19:54:29 +00001003 """Return the object as a GNU header block sequence.
1004 """
Guido van Rossumd8faa362007-04-27 19:54:29 +00001005 info["magic"] = GNU_MAGIC
1006
Lars Gustäbelb506dc32007-08-07 18:36:16 +00001007 buf = b""
Guido van Rossumd8faa362007-04-27 19:54:29 +00001008 if len(info["linkname"]) > LENGTH_LINK:
Lars Gustäbelb506dc32007-08-07 18:36:16 +00001009 buf += self._create_gnu_long_header(info["linkname"], GNUTYPE_LONGLINK, encoding, errors)
Guido van Rossumd8faa362007-04-27 19:54:29 +00001010
1011 if len(info["name"]) > LENGTH_NAME:
Lars Gustäbelb506dc32007-08-07 18:36:16 +00001012 buf += self._create_gnu_long_header(info["name"], GNUTYPE_LONGNAME, encoding, errors)
Guido van Rossumd8faa362007-04-27 19:54:29 +00001013
Lars Gustäbelb506dc32007-08-07 18:36:16 +00001014 return buf + self._create_header(info, GNU_FORMAT, encoding, errors)
Guido van Rossumd8faa362007-04-27 19:54:29 +00001015
Lars Gustäbel3741eff2007-08-21 12:17:05 +00001016 def create_pax_header(self, info):
Guido van Rossumd8faa362007-04-27 19:54:29 +00001017 """Return the object as a ustar header block. If it cannot be
1018 represented this way, prepend a pax extended header sequence
1019 with supplement information.
1020 """
Guido van Rossumd8faa362007-04-27 19:54:29 +00001021 info["magic"] = POSIX_MAGIC
1022 pax_headers = self.pax_headers.copy()
1023
1024 # Test string fields for values that exceed the field length or cannot
1025 # be represented in ASCII encoding.
1026 for name, hname, length in (
1027 ("name", "path", LENGTH_NAME), ("linkname", "linkpath", LENGTH_LINK),
1028 ("uname", "uname", 32), ("gname", "gname", 32)):
1029
Guido van Rossume7ba4952007-06-06 23:52:48 +00001030 if hname in pax_headers:
1031 # The pax header has priority.
1032 continue
1033
Guido van Rossumd8faa362007-04-27 19:54:29 +00001034 # Try to encode the string as ASCII.
1035 try:
Lars Gustäbelb506dc32007-08-07 18:36:16 +00001036 info[name].encode("ascii", "strict")
Guido van Rossumd8faa362007-04-27 19:54:29 +00001037 except UnicodeEncodeError:
Lars Gustäbelb506dc32007-08-07 18:36:16 +00001038 pax_headers[hname] = info[name]
Guido van Rossumd8faa362007-04-27 19:54:29 +00001039 continue
1040
Guido van Rossume7ba4952007-06-06 23:52:48 +00001041 if len(info[name]) > length:
Lars Gustäbelb506dc32007-08-07 18:36:16 +00001042 pax_headers[hname] = info[name]
Guido van Rossumd8faa362007-04-27 19:54:29 +00001043
1044 # Test number fields for values that exceed the field limit or values
1045 # that like to be stored as float.
1046 for name, digits in (("uid", 8), ("gid", 8), ("size", 12), ("mtime", 12)):
Guido van Rossume7ba4952007-06-06 23:52:48 +00001047 if name in pax_headers:
1048 # The pax header has priority. Avoid overflow.
1049 info[name] = 0
1050 continue
1051
Guido van Rossumd8faa362007-04-27 19:54:29 +00001052 val = info[name]
1053 if not 0 <= val < 8 ** (digits - 1) or isinstance(val, float):
Guido van Rossumef87d6e2007-05-02 19:09:54 +00001054 pax_headers[name] = str(val)
Guido van Rossumd8faa362007-04-27 19:54:29 +00001055 info[name] = 0
1056
Guido van Rossume7ba4952007-06-06 23:52:48 +00001057 # Create a pax extended header if necessary.
Guido van Rossumd8faa362007-04-27 19:54:29 +00001058 if pax_headers:
Lars Gustäbel3741eff2007-08-21 12:17:05 +00001059 buf = self._create_pax_generic_header(pax_headers, XHDTYPE)
Guido van Rossumd8faa362007-04-27 19:54:29 +00001060 else:
Lars Gustäbelb506dc32007-08-07 18:36:16 +00001061 buf = b""
Guido van Rossumd8faa362007-04-27 19:54:29 +00001062
Lars Gustäbel3741eff2007-08-21 12:17:05 +00001063 return buf + self._create_header(info, USTAR_FORMAT, "ascii", "replace")
Guido van Rossumd8faa362007-04-27 19:54:29 +00001064
1065 @classmethod
Lars Gustäbel3741eff2007-08-21 12:17:05 +00001066 def create_pax_global_header(cls, pax_headers):
Guido van Rossumd8faa362007-04-27 19:54:29 +00001067 """Return the object as a pax global header block sequence.
1068 """
Lars Gustäbel3741eff2007-08-21 12:17:05 +00001069 return cls._create_pax_generic_header(pax_headers, XGLTYPE)
Guido van Rossumd8faa362007-04-27 19:54:29 +00001070
1071 def _posix_split_name(self, name):
1072 """Split a name longer than 100 chars into a prefix
1073 and a name part.
1074 """
1075 prefix = name[:LENGTH_PREFIX + 1]
1076 while prefix and prefix[-1] != "/":
1077 prefix = prefix[:-1]
1078
1079 name = name[len(prefix):]
1080 prefix = prefix[:-1]
1081
1082 if not prefix or len(name) > LENGTH_NAME:
1083 raise ValueError("name is too long")
1084 return prefix, name
1085
1086 @staticmethod
Lars Gustäbelb506dc32007-08-07 18:36:16 +00001087 def _create_header(info, format, encoding, errors):
Guido van Rossumd8faa362007-04-27 19:54:29 +00001088 """Return a header block. info is a dictionary with file
1089 information, format must be one of the *_FORMAT constants.
1090 """
1091 parts = [
Lars Gustäbelb506dc32007-08-07 18:36:16 +00001092 stn(info.get("name", ""), 100, encoding, errors),
Guido van Rossumcd16bf62007-06-13 18:07:49 +00001093 itn(info.get("mode", 0) & 0o7777, 8, format),
Guido van Rossumd8faa362007-04-27 19:54:29 +00001094 itn(info.get("uid", 0), 8, format),
1095 itn(info.get("gid", 0), 8, format),
1096 itn(info.get("size", 0), 12, format),
1097 itn(info.get("mtime", 0), 12, format),
Lars Gustäbelb506dc32007-08-07 18:36:16 +00001098 b" ", # checksum field
Guido van Rossumd8faa362007-04-27 19:54:29 +00001099 info.get("type", REGTYPE),
Lars Gustäbelb506dc32007-08-07 18:36:16 +00001100 stn(info.get("linkname", ""), 100, encoding, errors),
1101 info.get("magic", POSIX_MAGIC),
1102 stn(info.get("uname", "root"), 32, encoding, errors),
1103 stn(info.get("gname", "root"), 32, encoding, errors),
Guido van Rossumd8faa362007-04-27 19:54:29 +00001104 itn(info.get("devmajor", 0), 8, format),
1105 itn(info.get("devminor", 0), 8, format),
Lars Gustäbelb506dc32007-08-07 18:36:16 +00001106 stn(info.get("prefix", ""), 155, encoding, errors)
Guido van Rossumd8faa362007-04-27 19:54:29 +00001107 ]
1108
Lars Gustäbelb506dc32007-08-07 18:36:16 +00001109 buf = struct.pack("%ds" % BLOCKSIZE, b"".join(parts))
Guido van Rossumd8faa362007-04-27 19:54:29 +00001110 chksum = calc_chksums(buf[-BLOCKSIZE:])[0]
Lars Gustäbela280ca752007-08-28 07:34:33 +00001111 buf = buf[:-364] + bytes("%06o\0" % chksum, "ascii") + buf[-357:]
Guido van Rossumd8faa362007-04-27 19:54:29 +00001112 return buf
1113
1114 @staticmethod
1115 def _create_payload(payload):
1116 """Return the string payload filled with zero bytes
1117 up to the next 512 byte border.
1118 """
1119 blocks, remainder = divmod(len(payload), BLOCKSIZE)
1120 if remainder > 0:
1121 payload += (BLOCKSIZE - remainder) * NUL
1122 return payload
1123
1124 @classmethod
Lars Gustäbelb506dc32007-08-07 18:36:16 +00001125 def _create_gnu_long_header(cls, name, type, encoding, errors):
Guido van Rossumd8faa362007-04-27 19:54:29 +00001126 """Return a GNUTYPE_LONGNAME or GNUTYPE_LONGLINK sequence
1127 for name.
1128 """
Lars Gustäbelb506dc32007-08-07 18:36:16 +00001129 name = name.encode(encoding, errors) + NUL
Guido van Rossumd8faa362007-04-27 19:54:29 +00001130
1131 info = {}
1132 info["name"] = "././@LongLink"
1133 info["type"] = type
1134 info["size"] = len(name)
1135 info["magic"] = GNU_MAGIC
1136
1137 # create extended header + name blocks.
Lars Gustäbelb506dc32007-08-07 18:36:16 +00001138 return cls._create_header(info, USTAR_FORMAT, encoding, errors) + \
Guido van Rossumd8faa362007-04-27 19:54:29 +00001139 cls._create_payload(name)
1140
1141 @classmethod
Lars Gustäbel3741eff2007-08-21 12:17:05 +00001142 def _create_pax_generic_header(cls, pax_headers, type):
Guido van Rossumd8faa362007-04-27 19:54:29 +00001143 """Return a POSIX.1-2001 extended or global header sequence
1144 that contains a list of keyword, value pairs. The values
Lars Gustäbelb506dc32007-08-07 18:36:16 +00001145 must be strings.
Guido van Rossumd8faa362007-04-27 19:54:29 +00001146 """
Lars Gustäbelb506dc32007-08-07 18:36:16 +00001147 records = b""
Guido van Rossumd8faa362007-04-27 19:54:29 +00001148 for keyword, value in pax_headers.items():
1149 keyword = keyword.encode("utf8")
1150 value = value.encode("utf8")
1151 l = len(keyword) + len(value) + 3 # ' ' + '=' + '\n'
1152 n = p = 0
1153 while True:
1154 n = l + len(str(p))
1155 if n == p:
1156 break
1157 p = n
Lars Gustäbela280ca752007-08-28 07:34:33 +00001158 records += bytes(str(p), "ascii") + b" " + keyword + b"=" + value + b"\n"
Guido van Rossumd8faa362007-04-27 19:54:29 +00001159
1160 # We use a hardcoded "././@PaxHeader" name like star does
1161 # instead of the one that POSIX recommends.
1162 info = {}
1163 info["name"] = "././@PaxHeader"
1164 info["type"] = type
1165 info["size"] = len(records)
1166 info["magic"] = POSIX_MAGIC
1167
1168 # Create pax header + record blocks.
Lars Gustäbel3741eff2007-08-21 12:17:05 +00001169 return cls._create_header(info, USTAR_FORMAT, "ascii", "replace") + \
Guido van Rossumd8faa362007-04-27 19:54:29 +00001170 cls._create_payload(records)
1171
Guido van Rossum75b64e62005-01-16 00:16:11 +00001172 @classmethod
Lars Gustäbelb506dc32007-08-07 18:36:16 +00001173 def frombuf(cls, buf, encoding, errors):
1174 """Construct a TarInfo object from a 512 byte bytes object.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001175 """
Thomas Wouters477c8d52006-05-27 19:21:47 +00001176 if len(buf) != BLOCKSIZE:
Thomas Wouters902d6eb2007-01-09 23:18:33 +00001177 raise HeaderError("truncated header")
Thomas Wouters477c8d52006-05-27 19:21:47 +00001178 if buf.count(NUL) == BLOCKSIZE:
Thomas Wouters902d6eb2007-01-09 23:18:33 +00001179 raise HeaderError("empty header")
1180
1181 chksum = nti(buf[148:156])
1182 if chksum not in calc_chksums(buf):
1183 raise HeaderError("bad checksum")
Thomas Wouters477c8d52006-05-27 19:21:47 +00001184
Guido van Rossumd8faa362007-04-27 19:54:29 +00001185 obj = cls()
1186 obj.buf = buf
Lars Gustäbelb506dc32007-08-07 18:36:16 +00001187 obj.name = nts(buf[0:100], encoding, errors)
Guido van Rossumd8faa362007-04-27 19:54:29 +00001188 obj.mode = nti(buf[100:108])
1189 obj.uid = nti(buf[108:116])
1190 obj.gid = nti(buf[116:124])
1191 obj.size = nti(buf[124:136])
1192 obj.mtime = nti(buf[136:148])
1193 obj.chksum = chksum
1194 obj.type = buf[156:157]
Lars Gustäbelb506dc32007-08-07 18:36:16 +00001195 obj.linkname = nts(buf[157:257], encoding, errors)
1196 obj.uname = nts(buf[265:297], encoding, errors)
1197 obj.gname = nts(buf[297:329], encoding, errors)
Guido van Rossumd8faa362007-04-27 19:54:29 +00001198 obj.devmajor = nti(buf[329:337])
1199 obj.devminor = nti(buf[337:345])
Lars Gustäbelb506dc32007-08-07 18:36:16 +00001200 prefix = nts(buf[345:500], encoding, errors)
Thomas Wouters89f507f2006-12-13 04:49:30 +00001201
Guido van Rossumd8faa362007-04-27 19:54:29 +00001202 # Old V7 tar format represents a directory as a regular
1203 # file with a trailing slash.
1204 if obj.type == AREGTYPE and obj.name.endswith("/"):
1205 obj.type = DIRTYPE
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001206
Guido van Rossumd8faa362007-04-27 19:54:29 +00001207 # Remove redundant slashes from directories.
1208 if obj.isdir():
1209 obj.name = obj.name.rstrip("/")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001210
Guido van Rossumd8faa362007-04-27 19:54:29 +00001211 # Reconstruct a ustar longname.
1212 if prefix and obj.type not in GNU_TYPES:
1213 obj.name = prefix + "/" + obj.name
1214 return obj
1215
1216 @classmethod
1217 def fromtarfile(cls, tarfile):
1218 """Return the next TarInfo object from TarFile object
1219 tarfile.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001220 """
Guido van Rossumd8faa362007-04-27 19:54:29 +00001221 buf = tarfile.fileobj.read(BLOCKSIZE)
1222 if not buf:
1223 return
Lars Gustäbelb506dc32007-08-07 18:36:16 +00001224 obj = cls.frombuf(buf, tarfile.encoding, tarfile.errors)
Guido van Rossumd8faa362007-04-27 19:54:29 +00001225 obj.offset = tarfile.fileobj.tell() - BLOCKSIZE
1226 return obj._proc_member(tarfile)
Thomas Wouters89f507f2006-12-13 04:49:30 +00001227
Guido van Rossumd8faa362007-04-27 19:54:29 +00001228 #--------------------------------------------------------------------------
1229 # The following are methods that are called depending on the type of a
1230 # member. The entry point is _proc_member() which can be overridden in a
1231 # subclass to add custom _proc_*() methods. A _proc_*() method MUST
1232 # implement the following
1233 # operations:
1234 # 1. Set self.offset_data to the position where the data blocks begin,
1235 # if there is data that follows.
1236 # 2. Set tarfile.offset to the position where the next member's header will
1237 # begin.
1238 # 3. Return self or another valid TarInfo object.
1239 def _proc_member(self, tarfile):
1240 """Choose the right processing method depending on
1241 the type and call it.
Thomas Wouters89f507f2006-12-13 04:49:30 +00001242 """
Guido van Rossumd8faa362007-04-27 19:54:29 +00001243 if self.type in (GNUTYPE_LONGNAME, GNUTYPE_LONGLINK):
1244 return self._proc_gnulong(tarfile)
1245 elif self.type == GNUTYPE_SPARSE:
1246 return self._proc_sparse(tarfile)
1247 elif self.type in (XHDTYPE, XGLTYPE, SOLARIS_XHDTYPE):
1248 return self._proc_pax(tarfile)
1249 else:
1250 return self._proc_builtin(tarfile)
Thomas Wouters89f507f2006-12-13 04:49:30 +00001251
Guido van Rossumd8faa362007-04-27 19:54:29 +00001252 def _proc_builtin(self, tarfile):
1253 """Process a builtin type or an unknown type which
1254 will be treated as a regular file.
1255 """
1256 self.offset_data = tarfile.fileobj.tell()
1257 offset = self.offset_data
1258 if self.isreg() or self.type not in SUPPORTED_TYPES:
1259 # Skip the following data blocks.
1260 offset += self._block(self.size)
1261 tarfile.offset = offset
Thomas Wouters89f507f2006-12-13 04:49:30 +00001262
Guido van Rossume7ba4952007-06-06 23:52:48 +00001263 # Patch the TarInfo object with saved global
Guido van Rossumd8faa362007-04-27 19:54:29 +00001264 # header information.
Guido van Rossume7ba4952007-06-06 23:52:48 +00001265 self._apply_pax_info(tarfile.pax_headers, tarfile.encoding, tarfile.errors)
Guido van Rossumd8faa362007-04-27 19:54:29 +00001266
1267 return self
1268
1269 def _proc_gnulong(self, tarfile):
1270 """Process the blocks that hold a GNU longname
1271 or longlink member.
1272 """
1273 buf = tarfile.fileobj.read(self._block(self.size))
1274
1275 # Fetch the next header and process it.
Guido van Rossume7ba4952007-06-06 23:52:48 +00001276 next = self.fromtarfile(tarfile)
1277 if next is None:
1278 raise HeaderError("missing subsequent header")
Guido van Rossumd8faa362007-04-27 19:54:29 +00001279
1280 # Patch the TarInfo object from the next header with
1281 # the longname information.
1282 next.offset = self.offset
1283 if self.type == GNUTYPE_LONGNAME:
Lars Gustäbelb506dc32007-08-07 18:36:16 +00001284 next.name = nts(buf, tarfile.encoding, tarfile.errors)
Guido van Rossumd8faa362007-04-27 19:54:29 +00001285 elif self.type == GNUTYPE_LONGLINK:
Lars Gustäbelb506dc32007-08-07 18:36:16 +00001286 next.linkname = nts(buf, tarfile.encoding, tarfile.errors)
Guido van Rossumd8faa362007-04-27 19:54:29 +00001287
1288 return next
1289
1290 def _proc_sparse(self, tarfile):
1291 """Process a GNU sparse header plus extra headers.
1292 """
1293 buf = self.buf
1294 sp = _ringbuffer()
1295 pos = 386
1296 lastpos = 0
1297 realpos = 0
1298 # There are 4 possible sparse structs in the
1299 # first header.
Guido van Rossum805365e2007-05-07 22:24:25 +00001300 for i in range(4):
Guido van Rossumd8faa362007-04-27 19:54:29 +00001301 try:
1302 offset = nti(buf[pos:pos + 12])
1303 numbytes = nti(buf[pos + 12:pos + 24])
1304 except ValueError:
1305 break
1306 if offset > lastpos:
1307 sp.append(_hole(lastpos, offset - lastpos))
1308 sp.append(_data(offset, numbytes, realpos))
1309 realpos += numbytes
1310 lastpos = offset + numbytes
1311 pos += 24
1312
Lars Gustäbelb506dc32007-08-07 18:36:16 +00001313 isextended = bool(buf[482])
Guido van Rossumd8faa362007-04-27 19:54:29 +00001314 origsize = nti(buf[483:495])
1315
1316 # If the isextended flag is given,
1317 # there are extra headers to process.
Lars Gustäbelb506dc32007-08-07 18:36:16 +00001318 while isextended:
Guido van Rossumd8faa362007-04-27 19:54:29 +00001319 buf = tarfile.fileobj.read(BLOCKSIZE)
1320 pos = 0
Guido van Rossum805365e2007-05-07 22:24:25 +00001321 for i in range(21):
Guido van Rossumd8faa362007-04-27 19:54:29 +00001322 try:
1323 offset = nti(buf[pos:pos + 12])
1324 numbytes = nti(buf[pos + 12:pos + 24])
1325 except ValueError:
1326 break
1327 if offset > lastpos:
1328 sp.append(_hole(lastpos, offset - lastpos))
1329 sp.append(_data(offset, numbytes, realpos))
1330 realpos += numbytes
1331 lastpos = offset + numbytes
1332 pos += 24
Lars Gustäbelb506dc32007-08-07 18:36:16 +00001333 isextended = bool(buf[504])
Guido van Rossumd8faa362007-04-27 19:54:29 +00001334
1335 if lastpos < origsize:
1336 sp.append(_hole(lastpos, origsize - lastpos))
1337
1338 self.sparse = sp
1339
1340 self.offset_data = tarfile.fileobj.tell()
1341 tarfile.offset = self.offset_data + self._block(self.size)
1342 self.size = origsize
1343
1344 return self
1345
1346 def _proc_pax(self, tarfile):
1347 """Process an extended or global header as described in
1348 POSIX.1-2001.
1349 """
1350 # Read the header information.
1351 buf = tarfile.fileobj.read(self._block(self.size))
1352
1353 # A pax header stores supplemental information for either
1354 # the following file (extended) or all following files
1355 # (global).
1356 if self.type == XGLTYPE:
1357 pax_headers = tarfile.pax_headers
1358 else:
1359 pax_headers = tarfile.pax_headers.copy()
1360
Guido van Rossumd8faa362007-04-27 19:54:29 +00001361 # Parse pax header information. A record looks like that:
1362 # "%d %s=%s\n" % (length, keyword, value). length is the size
1363 # of the complete record including the length field itself and
Guido van Rossume7ba4952007-06-06 23:52:48 +00001364 # the newline. keyword and value are both UTF-8 encoded strings.
Guido van Rossumd8faa362007-04-27 19:54:29 +00001365 regex = re.compile(r"(\d+) ([^=]+)=", re.U)
1366 pos = 0
1367 while True:
1368 match = regex.match(buf, pos)
1369 if not match:
1370 break
1371
1372 length, keyword = match.groups()
1373 length = int(length)
1374 value = buf[match.end(2) + 1:match.start(1) + length - 1]
1375
1376 keyword = keyword.decode("utf8")
Guido van Rossumd8faa362007-04-27 19:54:29 +00001377 value = value.decode("utf8")
Guido van Rossumd8faa362007-04-27 19:54:29 +00001378
1379 pax_headers[keyword] = value
1380 pos += length
1381
Guido van Rossume7ba4952007-06-06 23:52:48 +00001382 # Fetch the next header.
1383 next = self.fromtarfile(tarfile)
Guido van Rossumd8faa362007-04-27 19:54:29 +00001384
Guido van Rossume7ba4952007-06-06 23:52:48 +00001385 if self.type in (XHDTYPE, SOLARIS_XHDTYPE):
1386 if next is None:
1387 raise HeaderError("missing subsequent header")
Guido van Rossumd8faa362007-04-27 19:54:29 +00001388
Guido van Rossume7ba4952007-06-06 23:52:48 +00001389 # Patch the TarInfo object with the extended header info.
1390 next._apply_pax_info(pax_headers, tarfile.encoding, tarfile.errors)
1391 next.offset = self.offset
1392
1393 if "size" in pax_headers:
1394 # If the extended header replaces the size field,
1395 # we need to recalculate the offset where the next
1396 # header starts.
1397 offset = next.offset_data
1398 if next.isreg() or next.type not in SUPPORTED_TYPES:
1399 offset += next._block(next.size)
1400 tarfile.offset = offset
1401
1402 return next
1403
1404 def _apply_pax_info(self, pax_headers, encoding, errors):
1405 """Replace fields with supplemental information from a previous
1406 pax extended or global header.
1407 """
1408 for keyword, value in pax_headers.items():
1409 if keyword not in PAX_FIELDS:
1410 continue
1411
1412 if keyword == "path":
1413 value = value.rstrip("/")
1414
1415 if keyword in PAX_NUMBER_FIELDS:
1416 try:
1417 value = PAX_NUMBER_FIELDS[keyword](value)
1418 except ValueError:
1419 value = 0
Guido van Rossume7ba4952007-06-06 23:52:48 +00001420
1421 setattr(self, keyword, value)
1422
1423 self.pax_headers = pax_headers.copy()
Guido van Rossumd8faa362007-04-27 19:54:29 +00001424
1425 def _block(self, count):
1426 """Round up a byte count by BLOCKSIZE and return it,
1427 e.g. _block(834) => 1024.
1428 """
1429 blocks, remainder = divmod(count, BLOCKSIZE)
1430 if remainder:
1431 blocks += 1
1432 return blocks * BLOCKSIZE
Thomas Wouters89f507f2006-12-13 04:49:30 +00001433
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001434 def isreg(self):
1435 return self.type in REGULAR_TYPES
1436 def isfile(self):
1437 return self.isreg()
1438 def isdir(self):
1439 return self.type == DIRTYPE
1440 def issym(self):
1441 return self.type == SYMTYPE
1442 def islnk(self):
1443 return self.type == LNKTYPE
1444 def ischr(self):
1445 return self.type == CHRTYPE
1446 def isblk(self):
1447 return self.type == BLKTYPE
1448 def isfifo(self):
1449 return self.type == FIFOTYPE
1450 def issparse(self):
1451 return self.type == GNUTYPE_SPARSE
1452 def isdev(self):
1453 return self.type in (CHRTYPE, BLKTYPE, FIFOTYPE)
1454# class TarInfo
1455
1456class TarFile(object):
1457 """The TarFile Class provides an interface to tar archives.
1458 """
1459
1460 debug = 0 # May be set from 0 (no msgs) to 3 (all msgs)
1461
1462 dereference = False # If true, add content of linked file to the
1463 # tar file, else the link.
1464
1465 ignore_zeros = False # If true, skips empty or invalid blocks and
1466 # continues processing.
1467
1468 errorlevel = 0 # If 0, fatal errors only appear in debug
1469 # messages (if debug >= 0). If > 0, errors
1470 # are passed to the caller as exceptions.
1471
Guido van Rossumd8faa362007-04-27 19:54:29 +00001472 format = DEFAULT_FORMAT # The format to use when creating an archive.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001473
Guido van Rossume7ba4952007-06-06 23:52:48 +00001474 encoding = ENCODING # Encoding for 8-bit character strings.
1475
1476 errors = None # Error handler for unicode conversion.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001477
Guido van Rossumd8faa362007-04-27 19:54:29 +00001478 tarinfo = TarInfo # The default TarInfo class to use.
1479
1480 fileobject = ExFileObject # The default ExFileObject class to use.
1481
1482 def __init__(self, name=None, mode="r", fileobj=None, format=None,
1483 tarinfo=None, dereference=None, ignore_zeros=None, encoding=None,
Guido van Rossume7ba4952007-06-06 23:52:48 +00001484 errors=None, pax_headers=None, debug=None, errorlevel=None):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001485 """Open an (uncompressed) tar archive `name'. `mode' is either 'r' to
1486 read from an existing archive, 'a' to append data to an existing
1487 file or 'w' to create a new file overwriting an existing one. `mode'
1488 defaults to 'r'.
1489 If `fileobj' is given, it is used for reading or writing data. If it
1490 can be determined, `mode' is overridden by `fileobj's mode.
1491 `fileobj' is not closed, when TarFile is closed.
1492 """
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001493 if len(mode) > 1 or mode not in "raw":
Thomas Wouters477c8d52006-05-27 19:21:47 +00001494 raise ValueError("mode must be 'r', 'a' or 'w'")
Guido van Rossumd8faa362007-04-27 19:54:29 +00001495 self.mode = mode
1496 self._mode = {"r": "rb", "a": "r+b", "w": "wb"}[mode]
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001497
1498 if not fileobj:
Guido van Rossumd8faa362007-04-27 19:54:29 +00001499 if self.mode == "a" and not os.path.exists(name):
Thomas Wouterscf297e42007-02-23 15:07:44 +00001500 # Create nonexistent files in append mode.
Guido van Rossumd8faa362007-04-27 19:54:29 +00001501 self.mode = "w"
1502 self._mode = "wb"
Guido van Rossume7ba4952007-06-06 23:52:48 +00001503 fileobj = bltn_open(name, self._mode)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001504 self._extfileobj = False
1505 else:
Guido van Rossumd8faa362007-04-27 19:54:29 +00001506 if name is None and hasattr(fileobj, "name"):
1507 name = fileobj.name
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001508 if hasattr(fileobj, "mode"):
Guido van Rossumd8faa362007-04-27 19:54:29 +00001509 self._mode = fileobj.mode
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001510 self._extfileobj = True
Guido van Rossumd8faa362007-04-27 19:54:29 +00001511 self.name = os.path.abspath(name)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001512 self.fileobj = fileobj
1513
Guido van Rossumd8faa362007-04-27 19:54:29 +00001514 # Init attributes.
1515 if format is not None:
1516 self.format = format
1517 if tarinfo is not None:
1518 self.tarinfo = tarinfo
1519 if dereference is not None:
1520 self.dereference = dereference
1521 if ignore_zeros is not None:
1522 self.ignore_zeros = ignore_zeros
1523 if encoding is not None:
1524 self.encoding = encoding
Guido van Rossume7ba4952007-06-06 23:52:48 +00001525
1526 if errors is not None:
1527 self.errors = errors
1528 elif mode == "r":
Lars Gustäbelb506dc32007-08-07 18:36:16 +00001529 self.errors = "replace"
Guido van Rossume7ba4952007-06-06 23:52:48 +00001530 else:
1531 self.errors = "strict"
1532
1533 if pax_headers is not None and self.format == PAX_FORMAT:
1534 self.pax_headers = pax_headers
1535 else:
1536 self.pax_headers = {}
1537
Guido van Rossumd8faa362007-04-27 19:54:29 +00001538 if debug is not None:
1539 self.debug = debug
1540 if errorlevel is not None:
1541 self.errorlevel = errorlevel
1542
1543 # Init datastructures.
Thomas Wouters477c8d52006-05-27 19:21:47 +00001544 self.closed = False
1545 self.members = [] # list of members as TarInfo objects
1546 self._loaded = False # flag if all members have been read
Lars Gustäbelb506dc32007-08-07 18:36:16 +00001547 self.offset = 0 # current position in the archive file
Thomas Wouters477c8d52006-05-27 19:21:47 +00001548 self.inodes = {} # dictionary caching the inodes of
1549 # archive members already added
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001550
Guido van Rossumd8faa362007-04-27 19:54:29 +00001551 if self.mode == "r":
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001552 self.firstmember = None
1553 self.firstmember = self.next()
1554
Guido van Rossumd8faa362007-04-27 19:54:29 +00001555 if self.mode == "a":
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001556 # Move to the end of the archive,
1557 # before the first empty block.
1558 self.firstmember = None
1559 while True:
Guido van Rossumd8faa362007-04-27 19:54:29 +00001560 if self.next() is None:
Thomas Wouterscf297e42007-02-23 15:07:44 +00001561 if self.offset > 0:
Lars Gustäbelb506dc32007-08-07 18:36:16 +00001562 self.fileobj.seek(self.fileobj.tell() - BLOCKSIZE)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001563 break
1564
Guido van Rossumd8faa362007-04-27 19:54:29 +00001565 if self.mode in "aw":
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001566 self._loaded = True
1567
Guido van Rossume7ba4952007-06-06 23:52:48 +00001568 if self.pax_headers:
Lars Gustäbel3741eff2007-08-21 12:17:05 +00001569 buf = self.tarinfo.create_pax_global_header(self.pax_headers.copy())
Guido van Rossumd8faa362007-04-27 19:54:29 +00001570 self.fileobj.write(buf)
1571 self.offset += len(buf)
1572
1573 def _getposix(self):
1574 return self.format == USTAR_FORMAT
1575 def _setposix(self, value):
1576 import warnings
1577 warnings.warn("use the format attribute instead", DeprecationWarning)
1578 if value:
1579 self.format = USTAR_FORMAT
1580 else:
1581 self.format = GNU_FORMAT
1582 posix = property(_getposix, _setposix)
1583
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001584 #--------------------------------------------------------------------------
1585 # Below are the classmethods which act as alternate constructors to the
1586 # TarFile class. The open() method is the only one that is needed for
1587 # public use; it is the "super"-constructor and is able to select an
1588 # adequate "sub"-constructor for a particular compression using the mapping
1589 # from OPEN_METH.
1590 #
1591 # This concept allows one to subclass TarFile without losing the comfort of
1592 # the super-constructor. A sub-constructor is registered and made available
1593 # by adding it to the mapping in OPEN_METH.
1594
Guido van Rossum75b64e62005-01-16 00:16:11 +00001595 @classmethod
Guido van Rossumd8faa362007-04-27 19:54:29 +00001596 def open(cls, name=None, mode="r", fileobj=None, bufsize=RECORDSIZE, **kwargs):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001597 """Open a tar archive for reading, writing or appending. Return
1598 an appropriate TarFile class.
1599
1600 mode:
Martin v. Löwis78be7df2005-03-05 12:47:42 +00001601 'r' or 'r:*' open for reading with transparent compression
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001602 'r:' open for reading exclusively uncompressed
1603 'r:gz' open for reading with gzip compression
1604 'r:bz2' open for reading with bzip2 compression
Thomas Wouterscf297e42007-02-23 15:07:44 +00001605 'a' or 'a:' open for appending, creating the file if necessary
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001606 'w' or 'w:' open for writing without compression
1607 'w:gz' open for writing with gzip compression
1608 'w:bz2' open for writing with bzip2 compression
Martin v. Löwis78be7df2005-03-05 12:47:42 +00001609
1610 'r|*' open a stream of tar blocks with transparent compression
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001611 'r|' open an uncompressed stream of tar blocks for reading
1612 'r|gz' open a gzip compressed stream of tar blocks
1613 'r|bz2' open a bzip2 compressed stream of tar blocks
1614 'w|' open an uncompressed stream for writing
1615 'w|gz' open a gzip compressed stream for writing
1616 'w|bz2' open a bzip2 compressed stream for writing
1617 """
1618
1619 if not name and not fileobj:
Thomas Wouters477c8d52006-05-27 19:21:47 +00001620 raise ValueError("nothing to open")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001621
Martin v. Löwis78be7df2005-03-05 12:47:42 +00001622 if mode in ("r", "r:*"):
1623 # Find out which *open() is appropriate for opening the file.
1624 for comptype in cls.OPEN_METH:
1625 func = getattr(cls, cls.OPEN_METH[comptype])
Thomas Wouters902d6eb2007-01-09 23:18:33 +00001626 if fileobj is not None:
1627 saved_pos = fileobj.tell()
Martin v. Löwis78be7df2005-03-05 12:47:42 +00001628 try:
Guido van Rossumd8faa362007-04-27 19:54:29 +00001629 return func(name, "r", fileobj, **kwargs)
1630 except (ReadError, CompressionError) as e:
Thomas Wouters902d6eb2007-01-09 23:18:33 +00001631 if fileobj is not None:
1632 fileobj.seek(saved_pos)
Martin v. Löwis78be7df2005-03-05 12:47:42 +00001633 continue
Thomas Wouters477c8d52006-05-27 19:21:47 +00001634 raise ReadError("file could not be opened successfully")
Martin v. Löwis78be7df2005-03-05 12:47:42 +00001635
1636 elif ":" in mode:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001637 filemode, comptype = mode.split(":", 1)
1638 filemode = filemode or "r"
1639 comptype = comptype or "tar"
1640
1641 # Select the *open() function according to
1642 # given compression.
1643 if comptype in cls.OPEN_METH:
1644 func = getattr(cls, cls.OPEN_METH[comptype])
1645 else:
Thomas Wouters477c8d52006-05-27 19:21:47 +00001646 raise CompressionError("unknown compression type %r" % comptype)
Guido van Rossumd8faa362007-04-27 19:54:29 +00001647 return func(name, filemode, fileobj, **kwargs)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001648
1649 elif "|" in mode:
1650 filemode, comptype = mode.split("|", 1)
1651 filemode = filemode or "r"
1652 comptype = comptype or "tar"
1653
1654 if filemode not in "rw":
Thomas Wouters477c8d52006-05-27 19:21:47 +00001655 raise ValueError("mode must be 'r' or 'w'")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001656
1657 t = cls(name, filemode,
Guido van Rossumd8faa362007-04-27 19:54:29 +00001658 _Stream(name, filemode, comptype, fileobj, bufsize),
1659 **kwargs)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001660 t._extfileobj = False
1661 return t
1662
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001663 elif mode in "aw":
Guido van Rossumd8faa362007-04-27 19:54:29 +00001664 return cls.taropen(name, mode, fileobj, **kwargs)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001665
Thomas Wouters477c8d52006-05-27 19:21:47 +00001666 raise ValueError("undiscernible mode")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001667
Guido van Rossum75b64e62005-01-16 00:16:11 +00001668 @classmethod
Guido van Rossumd8faa362007-04-27 19:54:29 +00001669 def taropen(cls, name, mode="r", fileobj=None, **kwargs):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001670 """Open uncompressed tar archive name for reading or writing.
1671 """
1672 if len(mode) > 1 or mode not in "raw":
Thomas Wouters477c8d52006-05-27 19:21:47 +00001673 raise ValueError("mode must be 'r', 'a' or 'w'")
Guido van Rossumd8faa362007-04-27 19:54:29 +00001674 return cls(name, mode, fileobj, **kwargs)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001675
Guido van Rossum75b64e62005-01-16 00:16:11 +00001676 @classmethod
Guido van Rossumd8faa362007-04-27 19:54:29 +00001677 def gzopen(cls, name, mode="r", fileobj=None, compresslevel=9, **kwargs):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001678 """Open gzip compressed tar archive name for reading or writing.
1679 Appending is not allowed.
1680 """
1681 if len(mode) > 1 or mode not in "rw":
Thomas Wouters477c8d52006-05-27 19:21:47 +00001682 raise ValueError("mode must be 'r' or 'w'")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001683
1684 try:
1685 import gzip
Neal Norwitz4ec68242003-04-11 03:05:56 +00001686 gzip.GzipFile
1687 except (ImportError, AttributeError):
Thomas Wouters477c8d52006-05-27 19:21:47 +00001688 raise CompressionError("gzip module is not available")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001689
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001690 if fileobj is None:
Guido van Rossume7ba4952007-06-06 23:52:48 +00001691 fileobj = bltn_open(name, mode + "b")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001692
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001693 try:
Thomas Wouters902d6eb2007-01-09 23:18:33 +00001694 t = cls.taropen(name, mode,
Guido van Rossumd8faa362007-04-27 19:54:29 +00001695 gzip.GzipFile(name, mode, compresslevel, fileobj),
1696 **kwargs)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001697 except IOError:
Thomas Wouters477c8d52006-05-27 19:21:47 +00001698 raise ReadError("not a gzip file")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001699 t._extfileobj = False
1700 return t
1701
Guido van Rossum75b64e62005-01-16 00:16:11 +00001702 @classmethod
Guido van Rossumd8faa362007-04-27 19:54:29 +00001703 def bz2open(cls, name, mode="r", fileobj=None, compresslevel=9, **kwargs):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001704 """Open bzip2 compressed tar archive name for reading or writing.
1705 Appending is not allowed.
1706 """
1707 if len(mode) > 1 or mode not in "rw":
Thomas Wouters477c8d52006-05-27 19:21:47 +00001708 raise ValueError("mode must be 'r' or 'w'.")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001709
1710 try:
1711 import bz2
1712 except ImportError:
Thomas Wouters477c8d52006-05-27 19:21:47 +00001713 raise CompressionError("bz2 module is not available")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001714
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001715 if fileobj is not None:
Thomas Wouters477c8d52006-05-27 19:21:47 +00001716 fileobj = _BZ2Proxy(fileobj, mode)
1717 else:
1718 fileobj = bz2.BZ2File(name, mode, compresslevel=compresslevel)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001719
1720 try:
Guido van Rossumd8faa362007-04-27 19:54:29 +00001721 t = cls.taropen(name, mode, fileobj, **kwargs)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001722 except IOError:
Thomas Wouters477c8d52006-05-27 19:21:47 +00001723 raise ReadError("not a bzip2 file")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001724 t._extfileobj = False
1725 return t
1726
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001727 # All *open() methods are registered here.
1728 OPEN_METH = {
1729 "tar": "taropen", # uncompressed tar
1730 "gz": "gzopen", # gzip compressed tar
1731 "bz2": "bz2open" # bzip2 compressed tar
1732 }
1733
1734 #--------------------------------------------------------------------------
1735 # The public methods which TarFile provides:
1736
1737 def close(self):
1738 """Close the TarFile. In write-mode, two finishing zero blocks are
1739 appended to the archive.
1740 """
1741 if self.closed:
1742 return
1743
Guido van Rossumd8faa362007-04-27 19:54:29 +00001744 if self.mode in "aw":
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001745 self.fileobj.write(NUL * (BLOCKSIZE * 2))
1746 self.offset += (BLOCKSIZE * 2)
1747 # fill up the end with zero-blocks
1748 # (like option -b20 for tar does)
1749 blocks, remainder = divmod(self.offset, RECORDSIZE)
1750 if remainder > 0:
1751 self.fileobj.write(NUL * (RECORDSIZE - remainder))
1752
1753 if not self._extfileobj:
1754 self.fileobj.close()
1755 self.closed = True
1756
1757 def getmember(self, name):
1758 """Return a TarInfo object for member `name'. If `name' can not be
1759 found in the archive, KeyError is raised. If a member occurs more
1760 than once in the archive, its last occurence is assumed to be the
1761 most up-to-date version.
1762 """
Martin v. Löwisf3c56112004-09-18 09:08:52 +00001763 tarinfo = self._getmember(name)
1764 if tarinfo is None:
Thomas Wouters477c8d52006-05-27 19:21:47 +00001765 raise KeyError("filename %r not found" % name)
Martin v. Löwisf3c56112004-09-18 09:08:52 +00001766 return tarinfo
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001767
1768 def getmembers(self):
1769 """Return the members of the archive as a list of TarInfo objects. The
1770 list has the same order as the members in the archive.
1771 """
1772 self._check()
1773 if not self._loaded: # if we want to obtain a list of
1774 self._load() # all members, we first have to
1775 # scan the whole archive.
1776 return self.members
1777
1778 def getnames(self):
1779 """Return the members of the archive as a list of their names. It has
1780 the same order as the list returned by getmembers().
1781 """
Martin v. Löwisf3c56112004-09-18 09:08:52 +00001782 return [tarinfo.name for tarinfo in self.getmembers()]
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001783
1784 def gettarinfo(self, name=None, arcname=None, fileobj=None):
1785 """Create a TarInfo object for either the file `name' or the file
1786 object `fileobj' (using os.fstat on its file descriptor). You can
1787 modify some of the TarInfo's attributes before you add it using
1788 addfile(). If given, `arcname' specifies an alternative name for the
1789 file in the archive.
1790 """
1791 self._check("aw")
1792
1793 # When fileobj is given, replace name by
1794 # fileobj's real name.
1795 if fileobj is not None:
1796 name = fileobj.name
1797
1798 # Building the name of the member in the archive.
1799 # Backward slashes are converted to forward slashes,
1800 # Absolute paths are turned to relative paths.
1801 if arcname is None:
1802 arcname = name
1803 arcname = normpath(arcname)
1804 drv, arcname = os.path.splitdrive(arcname)
1805 while arcname[0:1] == "/":
1806 arcname = arcname[1:]
1807
1808 # Now, fill the TarInfo object with
1809 # information specific for the file.
Guido van Rossumd8faa362007-04-27 19:54:29 +00001810 tarinfo = self.tarinfo()
1811 tarinfo.tarfile = self
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001812
1813 # Use os.stat or os.lstat, depending on platform
1814 # and if symlinks shall be resolved.
1815 if fileobj is None:
1816 if hasattr(os, "lstat") and not self.dereference:
1817 statres = os.lstat(name)
1818 else:
1819 statres = os.stat(name)
1820 else:
1821 statres = os.fstat(fileobj.fileno())
1822 linkname = ""
1823
1824 stmd = statres.st_mode
1825 if stat.S_ISREG(stmd):
1826 inode = (statres.st_ino, statres.st_dev)
Guido van Rossumd8faa362007-04-27 19:54:29 +00001827 if not self.dereference and statres.st_nlink > 1 and \
1828 inode in self.inodes and arcname != self.inodes[inode]:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001829 # Is it a hardlink to an already
1830 # archived file?
1831 type = LNKTYPE
1832 linkname = self.inodes[inode]
1833 else:
1834 # The inode is added only if its valid.
1835 # For win32 it is always 0.
1836 type = REGTYPE
1837 if inode[0]:
1838 self.inodes[inode] = arcname
1839 elif stat.S_ISDIR(stmd):
1840 type = DIRTYPE
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001841 elif stat.S_ISFIFO(stmd):
1842 type = FIFOTYPE
1843 elif stat.S_ISLNK(stmd):
1844 type = SYMTYPE
1845 linkname = os.readlink(name)
1846 elif stat.S_ISCHR(stmd):
1847 type = CHRTYPE
1848 elif stat.S_ISBLK(stmd):
1849 type = BLKTYPE
1850 else:
1851 return None
1852
1853 # Fill the TarInfo object with all
1854 # information we can get.
Martin v. Löwis5dbdc592005-08-27 10:07:56 +00001855 tarinfo.name = arcname
1856 tarinfo.mode = stmd
1857 tarinfo.uid = statres.st_uid
1858 tarinfo.gid = statres.st_gid
1859 if stat.S_ISREG(stmd):
Martin v. Löwis61d77e02004-08-20 06:35:46 +00001860 tarinfo.size = statres.st_size
Martin v. Löwis5dbdc592005-08-27 10:07:56 +00001861 else:
Guido van Rossume2a383d2007-01-15 16:59:06 +00001862 tarinfo.size = 0
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001863 tarinfo.mtime = statres.st_mtime
Martin v. Löwis5dbdc592005-08-27 10:07:56 +00001864 tarinfo.type = type
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001865 tarinfo.linkname = linkname
1866 if pwd:
1867 try:
1868 tarinfo.uname = pwd.getpwuid(tarinfo.uid)[0]
1869 except KeyError:
1870 pass
1871 if grp:
1872 try:
1873 tarinfo.gname = grp.getgrgid(tarinfo.gid)[0]
1874 except KeyError:
1875 pass
1876
1877 if type in (CHRTYPE, BLKTYPE):
1878 if hasattr(os, "major") and hasattr(os, "minor"):
1879 tarinfo.devmajor = os.major(statres.st_rdev)
1880 tarinfo.devminor = os.minor(statres.st_rdev)
1881 return tarinfo
1882
1883 def list(self, verbose=True):
1884 """Print a table of contents to sys.stdout. If `verbose' is False, only
1885 the names of the members are printed. If it is True, an `ls -l'-like
1886 output is produced.
1887 """
1888 self._check()
1889
1890 for tarinfo in self:
1891 if verbose:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001892 print(filemode(tarinfo.mode), end=' ')
1893 print("%s/%s" % (tarinfo.uname or tarinfo.uid,
1894 tarinfo.gname or tarinfo.gid), end=' ')
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001895 if tarinfo.ischr() or tarinfo.isblk():
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001896 print("%10s" % ("%d,%d" \
1897 % (tarinfo.devmajor, tarinfo.devminor)), end=' ')
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001898 else:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001899 print("%10d" % tarinfo.size, end=' ')
1900 print("%d-%02d-%02d %02d:%02d:%02d" \
1901 % time.localtime(tarinfo.mtime)[:6], end=' ')
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001902
Guido van Rossumd8faa362007-04-27 19:54:29 +00001903 print(tarinfo.name + ("/" if tarinfo.isdir() else ""), end=' ')
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001904
1905 if verbose:
1906 if tarinfo.issym():
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001907 print("->", tarinfo.linkname, end=' ')
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001908 if tarinfo.islnk():
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001909 print("link to", tarinfo.linkname, end=' ')
1910 print()
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001911
Guido van Rossum486364b2007-06-30 05:01:58 +00001912 def add(self, name, arcname=None, recursive=True, exclude=None):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001913 """Add the file `name' to the archive. `name' may be any type of file
1914 (directory, fifo, symbolic link, etc.). If given, `arcname'
1915 specifies an alternative name for the file in the archive.
1916 Directories are added recursively by default. This can be avoided by
Guido van Rossum486364b2007-06-30 05:01:58 +00001917 setting `recursive' to False. `exclude' is a function that should
1918 return True for each filename to be excluded.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001919 """
1920 self._check("aw")
1921
1922 if arcname is None:
1923 arcname = name
1924
Guido van Rossum486364b2007-06-30 05:01:58 +00001925 # Exclude pathnames.
1926 if exclude is not None and exclude(name):
1927 self._dbg(2, "tarfile: Excluded %r" % name)
1928 return
1929
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001930 # Skip if somebody tries to archive the archive...
Thomas Wouters902d6eb2007-01-09 23:18:33 +00001931 if self.name is not None and os.path.abspath(name) == self.name:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001932 self._dbg(2, "tarfile: Skipped %r" % name)
1933 return
1934
1935 # Special case: The user wants to add the current
1936 # working directory.
1937 if name == ".":
1938 if recursive:
1939 if arcname == ".":
1940 arcname = ""
Guido van Rossumd8faa362007-04-27 19:54:29 +00001941 for f in os.listdir(name):
Guido van Rossum486364b2007-06-30 05:01:58 +00001942 self.add(f, os.path.join(arcname, f), recursive, exclude)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001943 return
1944
1945 self._dbg(1, name)
1946
1947 # Create a TarInfo object from the file.
1948 tarinfo = self.gettarinfo(name, arcname)
1949
1950 if tarinfo is None:
1951 self._dbg(1, "tarfile: Unsupported type %r" % name)
1952 return
1953
1954 # Append the tar header and data to the archive.
1955 if tarinfo.isreg():
Guido van Rossume7ba4952007-06-06 23:52:48 +00001956 f = bltn_open(name, "rb")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001957 self.addfile(tarinfo, f)
1958 f.close()
1959
Martin v. Löwis5dbdc592005-08-27 10:07:56 +00001960 elif tarinfo.isdir():
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001961 self.addfile(tarinfo)
1962 if recursive:
1963 for f in os.listdir(name):
Guido van Rossum486364b2007-06-30 05:01:58 +00001964 self.add(os.path.join(name, f), os.path.join(arcname, f), recursive, exclude)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001965
Martin v. Löwis5dbdc592005-08-27 10:07:56 +00001966 else:
1967 self.addfile(tarinfo)
1968
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001969 def addfile(self, tarinfo, fileobj=None):
1970 """Add the TarInfo object `tarinfo' to the archive. If `fileobj' is
1971 given, tarinfo.size bytes are read from it and added to the archive.
1972 You can create TarInfo objects using gettarinfo().
1973 On Windows platforms, `fileobj' should always be opened with mode
1974 'rb' to avoid irritation about the file size.
1975 """
1976 self._check("aw")
1977
Thomas Wouters89f507f2006-12-13 04:49:30 +00001978 tarinfo = copy.copy(tarinfo)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001979
Guido van Rossume7ba4952007-06-06 23:52:48 +00001980 buf = tarinfo.tobuf(self.format, self.encoding, self.errors)
Thomas Wouters89f507f2006-12-13 04:49:30 +00001981 self.fileobj.write(buf)
1982 self.offset += len(buf)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001983
1984 # If there's data to follow, append it.
1985 if fileobj is not None:
1986 copyfileobj(fileobj, self.fileobj, tarinfo.size)
1987 blocks, remainder = divmod(tarinfo.size, BLOCKSIZE)
1988 if remainder > 0:
1989 self.fileobj.write(NUL * (BLOCKSIZE - remainder))
1990 blocks += 1
1991 self.offset += blocks * BLOCKSIZE
1992
Martin v. Löwisf3c56112004-09-18 09:08:52 +00001993 self.members.append(tarinfo)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001994
Martin v. Löwis00a73e72005-03-04 19:40:34 +00001995 def extractall(self, path=".", members=None):
1996 """Extract all members from the archive to the current working
1997 directory and set owner, modification time and permissions on
1998 directories afterwards. `path' specifies a different directory
1999 to extract to. `members' is optional and must be a subset of the
2000 list returned by getmembers().
2001 """
2002 directories = []
2003
2004 if members is None:
2005 members = self
2006
2007 for tarinfo in members:
2008 if tarinfo.isdir():
2009 # Extract directory with a safe mode, so that
2010 # all files below can be extracted as well.
2011 try:
Guido van Rossumcd16bf62007-06-13 18:07:49 +00002012 os.makedirs(os.path.join(path, tarinfo.name), 0o700)
Martin v. Löwis00a73e72005-03-04 19:40:34 +00002013 except EnvironmentError:
2014 pass
2015 directories.append(tarinfo)
2016 else:
2017 self.extract(tarinfo, path)
2018
2019 # Reverse sort directories.
2020 directories.sort(lambda a, b: cmp(a.name, b.name))
2021 directories.reverse()
2022
2023 # Set correct owner, mtime and filemode on directories.
2024 for tarinfo in directories:
2025 path = os.path.join(path, tarinfo.name)
2026 try:
2027 self.chown(tarinfo, path)
2028 self.utime(tarinfo, path)
2029 self.chmod(tarinfo, path)
Guido van Rossumb940e112007-01-10 16:19:56 +00002030 except ExtractError as e:
Martin v. Löwis00a73e72005-03-04 19:40:34 +00002031 if self.errorlevel > 1:
2032 raise
2033 else:
2034 self._dbg(1, "tarfile: %s" % e)
2035
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002036 def extract(self, member, path=""):
2037 """Extract a member from the archive to the current working directory,
2038 using its full name. Its file information is extracted as accurately
2039 as possible. `member' may be a filename or a TarInfo object. You can
2040 specify a different directory using `path'.
2041 """
2042 self._check("r")
2043
Guido van Rossumd8faa362007-04-27 19:54:29 +00002044 if isinstance(member, basestring):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002045 tarinfo = self.getmember(member)
Guido van Rossumd8faa362007-04-27 19:54:29 +00002046 else:
2047 tarinfo = member
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002048
Neal Norwitza4f651a2004-07-20 22:07:44 +00002049 # Prepare the link target for makelink().
2050 if tarinfo.islnk():
2051 tarinfo._link_target = os.path.join(path, tarinfo.linkname)
2052
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002053 try:
2054 self._extract_member(tarinfo, os.path.join(path, tarinfo.name))
Guido van Rossumb940e112007-01-10 16:19:56 +00002055 except EnvironmentError as e:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002056 if self.errorlevel > 0:
2057 raise
2058 else:
2059 if e.filename is None:
2060 self._dbg(1, "tarfile: %s" % e.strerror)
2061 else:
2062 self._dbg(1, "tarfile: %s %r" % (e.strerror, e.filename))
Guido van Rossumb940e112007-01-10 16:19:56 +00002063 except ExtractError as e:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002064 if self.errorlevel > 1:
2065 raise
2066 else:
2067 self._dbg(1, "tarfile: %s" % e)
2068
2069 def extractfile(self, member):
2070 """Extract a member from the archive as a file object. `member' may be
2071 a filename or a TarInfo object. If `member' is a regular file, a
2072 file-like object is returned. If `member' is a link, a file-like
2073 object is constructed from the link's target. If `member' is none of
2074 the above, None is returned.
2075 The file-like object is read-only and provides the following
2076 methods: read(), readline(), readlines(), seek() and tell()
2077 """
2078 self._check("r")
2079
Guido van Rossumd8faa362007-04-27 19:54:29 +00002080 if isinstance(member, basestring):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002081 tarinfo = self.getmember(member)
Guido van Rossumd8faa362007-04-27 19:54:29 +00002082 else:
2083 tarinfo = member
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002084
2085 if tarinfo.isreg():
2086 return self.fileobject(self, tarinfo)
2087
2088 elif tarinfo.type not in SUPPORTED_TYPES:
2089 # If a member's type is unknown, it is treated as a
2090 # regular file.
2091 return self.fileobject(self, tarinfo)
2092
2093 elif tarinfo.islnk() or tarinfo.issym():
2094 if isinstance(self.fileobj, _Stream):
2095 # A small but ugly workaround for the case that someone tries
2096 # to extract a (sym)link as a file-object from a non-seekable
2097 # stream of tar blocks.
Thomas Wouters477c8d52006-05-27 19:21:47 +00002098 raise StreamError("cannot extract (sym)link as file object")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002099 else:
Georg Brandl7eb4b7d2005-07-22 21:49:32 +00002100 # A (sym)link's file object is its target's file object.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002101 return self.extractfile(self._getmember(tarinfo.linkname,
2102 tarinfo))
2103 else:
2104 # If there's no data associated with the member (directory, chrdev,
2105 # blkdev, etc.), return None instead of a file object.
2106 return None
2107
2108 def _extract_member(self, tarinfo, targetpath):
2109 """Extract the TarInfo object tarinfo to a physical
2110 file called targetpath.
2111 """
2112 # Fetch the TarInfo object for the given name
2113 # and build the destination pathname, replacing
2114 # forward slashes to platform specific separators.
2115 if targetpath[-1:] == "/":
2116 targetpath = targetpath[:-1]
2117 targetpath = os.path.normpath(targetpath)
2118
2119 # Create all upper directories.
2120 upperdirs = os.path.dirname(targetpath)
2121 if upperdirs and not os.path.exists(upperdirs):
Thomas Woutersb2137042007-02-01 18:02:27 +00002122 os.makedirs(upperdirs)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002123
2124 if tarinfo.islnk() or tarinfo.issym():
2125 self._dbg(1, "%s -> %s" % (tarinfo.name, tarinfo.linkname))
2126 else:
2127 self._dbg(1, tarinfo.name)
2128
2129 if tarinfo.isreg():
2130 self.makefile(tarinfo, targetpath)
2131 elif tarinfo.isdir():
2132 self.makedir(tarinfo, targetpath)
2133 elif tarinfo.isfifo():
2134 self.makefifo(tarinfo, targetpath)
2135 elif tarinfo.ischr() or tarinfo.isblk():
2136 self.makedev(tarinfo, targetpath)
2137 elif tarinfo.islnk() or tarinfo.issym():
2138 self.makelink(tarinfo, targetpath)
2139 elif tarinfo.type not in SUPPORTED_TYPES:
2140 self.makeunknown(tarinfo, targetpath)
2141 else:
2142 self.makefile(tarinfo, targetpath)
2143
2144 self.chown(tarinfo, targetpath)
2145 if not tarinfo.issym():
2146 self.chmod(tarinfo, targetpath)
2147 self.utime(tarinfo, targetpath)
2148
2149 #--------------------------------------------------------------------------
2150 # Below are the different file methods. They are called via
2151 # _extract_member() when extract() is called. They can be replaced in a
2152 # subclass to implement other functionality.
2153
2154 def makedir(self, tarinfo, targetpath):
2155 """Make a directory called targetpath.
2156 """
2157 try:
2158 os.mkdir(targetpath)
Guido van Rossumb940e112007-01-10 16:19:56 +00002159 except EnvironmentError as e:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002160 if e.errno != errno.EEXIST:
2161 raise
2162
2163 def makefile(self, tarinfo, targetpath):
2164 """Make a file called targetpath.
2165 """
2166 source = self.extractfile(tarinfo)
Guido van Rossume7ba4952007-06-06 23:52:48 +00002167 target = bltn_open(targetpath, "wb")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002168 copyfileobj(source, target)
2169 source.close()
2170 target.close()
2171
2172 def makeunknown(self, tarinfo, targetpath):
2173 """Make a file from a TarInfo object with an unknown type
2174 at targetpath.
2175 """
2176 self.makefile(tarinfo, targetpath)
2177 self._dbg(1, "tarfile: Unknown file type %r, " \
2178 "extracted as regular file." % tarinfo.type)
2179
2180 def makefifo(self, tarinfo, targetpath):
2181 """Make a fifo called targetpath.
2182 """
2183 if hasattr(os, "mkfifo"):
2184 os.mkfifo(targetpath)
2185 else:
Thomas Wouters477c8d52006-05-27 19:21:47 +00002186 raise ExtractError("fifo not supported by system")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002187
2188 def makedev(self, tarinfo, targetpath):
2189 """Make a character or block device called targetpath.
2190 """
2191 if not hasattr(os, "mknod") or not hasattr(os, "makedev"):
Thomas Wouters477c8d52006-05-27 19:21:47 +00002192 raise ExtractError("special devices not supported by system")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002193
2194 mode = tarinfo.mode
2195 if tarinfo.isblk():
2196 mode |= stat.S_IFBLK
2197 else:
2198 mode |= stat.S_IFCHR
2199
2200 os.mknod(targetpath, mode,
2201 os.makedev(tarinfo.devmajor, tarinfo.devminor))
2202
2203 def makelink(self, tarinfo, targetpath):
2204 """Make a (symbolic) link called targetpath. If it cannot be created
2205 (platform limitation), we try to make a copy of the referenced file
2206 instead of a link.
2207 """
2208 linkpath = tarinfo.linkname
2209 try:
2210 if tarinfo.issym():
2211 os.symlink(linkpath, targetpath)
2212 else:
Neal Norwitza4f651a2004-07-20 22:07:44 +00002213 # See extract().
2214 os.link(tarinfo._link_target, targetpath)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002215 except AttributeError:
2216 if tarinfo.issym():
2217 linkpath = os.path.join(os.path.dirname(tarinfo.name),
2218 linkpath)
2219 linkpath = normpath(linkpath)
2220
2221 try:
2222 self._extract_member(self.getmember(linkpath), targetpath)
Guido van Rossumb940e112007-01-10 16:19:56 +00002223 except (EnvironmentError, KeyError) as e:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002224 linkpath = os.path.normpath(linkpath)
2225 try:
2226 shutil.copy2(linkpath, targetpath)
Guido van Rossumb940e112007-01-10 16:19:56 +00002227 except EnvironmentError as e:
Thomas Wouters477c8d52006-05-27 19:21:47 +00002228 raise IOError("link could not be created")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002229
2230 def chown(self, tarinfo, targetpath):
2231 """Set owner of targetpath according to tarinfo.
2232 """
2233 if pwd and hasattr(os, "geteuid") and os.geteuid() == 0:
2234 # We have to be root to do so.
2235 try:
2236 g = grp.getgrnam(tarinfo.gname)[2]
2237 except KeyError:
2238 try:
2239 g = grp.getgrgid(tarinfo.gid)[2]
2240 except KeyError:
2241 g = os.getgid()
2242 try:
2243 u = pwd.getpwnam(tarinfo.uname)[2]
2244 except KeyError:
2245 try:
2246 u = pwd.getpwuid(tarinfo.uid)[2]
2247 except KeyError:
2248 u = os.getuid()
2249 try:
2250 if tarinfo.issym() and hasattr(os, "lchown"):
2251 os.lchown(targetpath, u, g)
2252 else:
Andrew MacIntyre7970d202003-02-19 12:51:34 +00002253 if sys.platform != "os2emx":
2254 os.chown(targetpath, u, g)
Guido van Rossumb940e112007-01-10 16:19:56 +00002255 except EnvironmentError as e:
Thomas Wouters477c8d52006-05-27 19:21:47 +00002256 raise ExtractError("could not change owner")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002257
2258 def chmod(self, tarinfo, targetpath):
2259 """Set file permissions of targetpath according to tarinfo.
2260 """
Jack Jansen834eff62003-03-07 12:47:06 +00002261 if hasattr(os, 'chmod'):
2262 try:
2263 os.chmod(targetpath, tarinfo.mode)
Guido van Rossumb940e112007-01-10 16:19:56 +00002264 except EnvironmentError as e:
Thomas Wouters477c8d52006-05-27 19:21:47 +00002265 raise ExtractError("could not change mode")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002266
2267 def utime(self, tarinfo, targetpath):
2268 """Set modification time of targetpath according to tarinfo.
2269 """
Jack Jansen834eff62003-03-07 12:47:06 +00002270 if not hasattr(os, 'utime'):
Tim Petersf9347782003-03-07 15:36:41 +00002271 return
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002272 if sys.platform == "win32" and tarinfo.isdir():
2273 # According to msdn.microsoft.com, it is an error (EACCES)
2274 # to use utime() on directories.
2275 return
2276 try:
2277 os.utime(targetpath, (tarinfo.mtime, tarinfo.mtime))
Guido van Rossumb940e112007-01-10 16:19:56 +00002278 except EnvironmentError as e:
Thomas Wouters477c8d52006-05-27 19:21:47 +00002279 raise ExtractError("could not change modification time")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002280
2281 #--------------------------------------------------------------------------
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002282 def next(self):
2283 """Return the next member of the archive as a TarInfo object, when
2284 TarFile is opened for reading. Return None if there is no more
2285 available.
2286 """
2287 self._check("ra")
2288 if self.firstmember is not None:
2289 m = self.firstmember
2290 self.firstmember = None
2291 return m
2292
2293 # Read the next block.
Andrew M. Kuchling864bba12004-07-10 22:02:11 +00002294 self.fileobj.seek(self.offset)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002295 while True:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002296 try:
Guido van Rossumd8faa362007-04-27 19:54:29 +00002297 tarinfo = self.tarinfo.fromtarfile(self)
2298 if tarinfo is None:
2299 return
2300 self.members.append(tarinfo)
Thomas Wouters477c8d52006-05-27 19:21:47 +00002301
Guido van Rossumb940e112007-01-10 16:19:56 +00002302 except HeaderError as e:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002303 if self.ignore_zeros:
Thomas Wouters902d6eb2007-01-09 23:18:33 +00002304 self._dbg(2, "0x%X: %s" % (self.offset, e))
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002305 self.offset += BLOCKSIZE
2306 continue
2307 else:
Andrew M. Kuchling864bba12004-07-10 22:02:11 +00002308 if self.offset == 0:
Thomas Wouters902d6eb2007-01-09 23:18:33 +00002309 raise ReadError(str(e))
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002310 return None
2311 break
2312
Thomas Wouters477c8d52006-05-27 19:21:47 +00002313 return tarinfo
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002314
2315 #--------------------------------------------------------------------------
2316 # Little helper methods:
2317
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002318 def _getmember(self, name, tarinfo=None):
2319 """Find an archive member by name from bottom to top.
2320 If tarinfo is given, it is used as the starting point.
2321 """
Martin v. Löwisf3c56112004-09-18 09:08:52 +00002322 # Ensure that all members have been loaded.
2323 members = self.getmembers()
2324
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002325 if tarinfo is None:
Martin v. Löwisf3c56112004-09-18 09:08:52 +00002326 end = len(members)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002327 else:
Martin v. Löwisf3c56112004-09-18 09:08:52 +00002328 end = members.index(tarinfo)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002329
Guido van Rossum805365e2007-05-07 22:24:25 +00002330 for i in range(end - 1, -1, -1):
Martin v. Löwisf3c56112004-09-18 09:08:52 +00002331 if name == members[i].name:
2332 return members[i]
Andrew M. Kuchling864bba12004-07-10 22:02:11 +00002333
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002334 def _load(self):
2335 """Read through the entire archive file and look for readable
2336 members.
2337 """
2338 while True:
2339 tarinfo = self.next()
2340 if tarinfo is None:
2341 break
2342 self._loaded = True
2343
2344 def _check(self, mode=None):
2345 """Check if TarFile is still open, and if the operation's mode
2346 corresponds to TarFile's mode.
2347 """
2348 if self.closed:
Thomas Wouters477c8d52006-05-27 19:21:47 +00002349 raise IOError("%s is closed" % self.__class__.__name__)
Guido van Rossumd8faa362007-04-27 19:54:29 +00002350 if mode is not None and self.mode not in mode:
2351 raise IOError("bad operation for mode %r" % self.mode)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002352
2353 def __iter__(self):
2354 """Provide an iterator object.
2355 """
2356 if self._loaded:
2357 return iter(self.members)
2358 else:
2359 return TarIter(self)
2360
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002361 def _dbg(self, level, msg):
2362 """Write debugging output to sys.stderr.
2363 """
2364 if level <= self.debug:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00002365 print(msg, file=sys.stderr)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002366# class TarFile
2367
2368class TarIter:
2369 """Iterator Class.
2370
2371 for tarinfo in TarFile(...):
2372 suite...
2373 """
2374
2375 def __init__(self, tarfile):
2376 """Construct a TarIter object.
2377 """
2378 self.tarfile = tarfile
Martin v. Löwis637431b2005-03-03 23:12:42 +00002379 self.index = 0
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002380 def __iter__(self):
2381 """Return iterator object.
2382 """
2383 return self
Georg Brandla18af4e2007-04-21 15:47:16 +00002384 def __next__(self):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002385 """Return the next item using TarFile's next() method.
2386 When all members have been read, set TarFile as _loaded.
2387 """
Martin v. Löwis637431b2005-03-03 23:12:42 +00002388 # Fix for SF #1100429: Under rare circumstances it can
2389 # happen that getmembers() is called during iteration,
2390 # which will cause TarIter to stop prematurely.
2391 if not self.tarfile._loaded:
2392 tarinfo = self.tarfile.next()
2393 if not tarinfo:
2394 self.tarfile._loaded = True
2395 raise StopIteration
2396 else:
2397 try:
2398 tarinfo = self.tarfile.members[self.index]
2399 except IndexError:
2400 raise StopIteration
2401 self.index += 1
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002402 return tarinfo
2403
2404# Helper classes for sparse file support
2405class _section:
2406 """Base class for _data and _hole.
2407 """
2408 def __init__(self, offset, size):
2409 self.offset = offset
2410 self.size = size
2411 def __contains__(self, offset):
2412 return self.offset <= offset < self.offset + self.size
2413
2414class _data(_section):
2415 """Represent a data section in a sparse file.
2416 """
2417 def __init__(self, offset, size, realpos):
2418 _section.__init__(self, offset, size)
2419 self.realpos = realpos
2420
2421class _hole(_section):
2422 """Represent a hole section in a sparse file.
2423 """
2424 pass
2425
2426class _ringbuffer(list):
2427 """Ringbuffer class which increases performance
2428 over a regular list.
2429 """
2430 def __init__(self):
2431 self.idx = 0
2432 def find(self, offset):
2433 idx = self.idx
2434 while True:
2435 item = self[idx]
2436 if offset in item:
2437 break
2438 idx += 1
2439 if idx == len(self):
2440 idx = 0
2441 if idx == self.idx:
2442 # End of File
2443 return None
2444 self.idx = idx
2445 return item
2446
2447#---------------------------------------------
2448# zipfile compatible TarFile class
2449#---------------------------------------------
2450TAR_PLAIN = 0 # zipfile.ZIP_STORED
2451TAR_GZIPPED = 8 # zipfile.ZIP_DEFLATED
2452class TarFileCompat:
2453 """TarFile class compatible with standard module zipfile's
2454 ZipFile class.
2455 """
2456 def __init__(self, file, mode="r", compression=TAR_PLAIN):
2457 if compression == TAR_PLAIN:
2458 self.tarfile = TarFile.taropen(file, mode)
2459 elif compression == TAR_GZIPPED:
2460 self.tarfile = TarFile.gzopen(file, mode)
2461 else:
Thomas Wouters477c8d52006-05-27 19:21:47 +00002462 raise ValueError("unknown compression constant")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002463 if mode[0:1] == "r":
2464 members = self.tarfile.getmembers()
Raymond Hettingera1d09e22005-09-11 16:34:05 +00002465 for m in members:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002466 m.filename = m.name
2467 m.file_size = m.size
2468 m.date_time = time.gmtime(m.mtime)[:6]
2469 def namelist(self):
2470 return map(lambda m: m.name, self.infolist())
2471 def infolist(self):
2472 return filter(lambda m: m.type in REGULAR_TYPES,
2473 self.tarfile.getmembers())
2474 def printdir(self):
2475 self.tarfile.list()
2476 def testzip(self):
2477 return
2478 def getinfo(self, name):
2479 return self.tarfile.getmember(name)
2480 def read(self, name):
2481 return self.tarfile.extractfile(self.tarfile.getmember(name)).read()
2482 def write(self, filename, arcname=None, compress_type=None):
2483 self.tarfile.add(filename, arcname)
2484 def writestr(self, zinfo, bytes):
Guido van Rossum68937b42007-05-18 00:51:22 +00002485 from io import StringIO
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002486 import calendar
2487 zinfo.name = zinfo.filename
2488 zinfo.size = zinfo.file_size
2489 zinfo.mtime = calendar.timegm(zinfo.date_time)
Raymond Hettingera6172712004-12-31 19:15:26 +00002490 self.tarfile.addfile(zinfo, StringIO(bytes))
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002491 def close(self):
2492 self.tarfile.close()
2493#class TarFileCompat
2494
2495#--------------------
2496# exported functions
2497#--------------------
2498def is_tarfile(name):
2499 """Return True if name points to a tar archive that we
2500 are able to handle, else return False.
2501 """
2502 try:
2503 t = open(name)
2504 t.close()
2505 return True
2506 except TarError:
2507 return False
2508
Guido van Rossume7ba4952007-06-06 23:52:48 +00002509bltn_open = open
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002510open = TarFile.open