blob: 4864d97d0d1e2e096bac394fd71eb7ecbc4fa44c [file] [log] [blame]
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001#!/usr/bin/env python
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002#-------------------------------------------------------------------
3# tarfile.py
4#-------------------------------------------------------------------
Christian Heimes9c1257e2007-11-04 11:37:22 +00005# Copyright (C) 2002 Lars Gustaebel <lars@gustaebel.de>
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00006# All rights reserved.
7#
8# Permission is hereby granted, free of charge, to any person
9# obtaining a copy of this software and associated documentation
10# files (the "Software"), to deal in the Software without
11# restriction, including without limitation the rights to use,
12# copy, modify, merge, publish, distribute, sublicense, and/or sell
13# copies of the Software, and to permit persons to whom the
14# Software is furnished to do so, subject to the following
15# conditions:
16#
17# The above copyright notice and this permission notice shall be
18# included in all copies or substantial portions of the Software.
19#
20# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
21# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
22# OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
23# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
24# HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
25# WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
26# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
27# OTHER DEALINGS IN THE SOFTWARE.
28#
29"""Read from and write to tar format archives.
30"""
31
32__version__ = "$Revision$"
33# $Source$
34
Guido van Rossumd8faa362007-04-27 19:54:29 +000035version = "0.9.0"
Christian Heimes9c1257e2007-11-04 11:37:22 +000036__author__ = "Lars Gust\xe4bel (lars@gustaebel.de)"
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +000037__date__ = "$Date$"
38__cvsid__ = "$Id$"
Christian Heimes9c1257e2007-11-04 11:37:22 +000039__credits__ = "Gustavo Niemeyer, Niels Gust\xe4bel, Richard Townsend."
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +000040
41#---------
42# Imports
43#---------
44import sys
45import os
46import shutil
47import stat
48import errno
49import time
50import struct
Thomas Wouters89f507f2006-12-13 04:49:30 +000051import copy
Guido van Rossumd8faa362007-04-27 19:54:29 +000052import re
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +000053
Jack Jansencfc49022003-03-07 13:37:32 +000054if sys.platform == 'mac':
55 # This module needs work for MacOS9, especially in the area of pathname
56 # handling. In many places it is assumed a simple substitution of / by the
57 # local os.path.sep is good enough to convert pathnames, but this does not
58 # work with the mac rooted:path:name versus :nonrooted:path:name syntax
Collin Winterce36ad82007-08-30 01:19:48 +000059 raise ImportError("tarfile does not work for platform==mac")
Jack Jansencfc49022003-03-07 13:37:32 +000060
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +000061try:
62 import grp, pwd
63except ImportError:
64 grp = pwd = None
65
66# from tarfile import *
67__all__ = ["TarFile", "TarInfo", "is_tarfile", "TarError"]
68
Guido van Rossum8f78fe92006-08-24 04:03:53 +000069from __builtin__ import open as _open # Since 'open' is TarFile.open
70
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +000071#---------------------------------------------------------
72# tar constants
73#---------------------------------------------------------
Lars Gustäbelb506dc32007-08-07 18:36:16 +000074NUL = b"\0" # the null character
Guido van Rossumd8faa362007-04-27 19:54:29 +000075BLOCKSIZE = 512 # length of processing blocks
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +000076RECORDSIZE = BLOCKSIZE * 20 # length of records
Lars Gustäbelb506dc32007-08-07 18:36:16 +000077GNU_MAGIC = b"ustar \0" # magic gnu tar string
78POSIX_MAGIC = b"ustar\x0000" # magic posix tar string
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +000079
Guido van Rossumd8faa362007-04-27 19:54:29 +000080LENGTH_NAME = 100 # maximum length of a filename
81LENGTH_LINK = 100 # maximum length of a linkname
82LENGTH_PREFIX = 155 # maximum length of the prefix field
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +000083
Lars Gustäbelb506dc32007-08-07 18:36:16 +000084REGTYPE = b"0" # regular file
85AREGTYPE = b"\0" # regular file
86LNKTYPE = b"1" # link (inside tarfile)
87SYMTYPE = b"2" # symbolic link
88CHRTYPE = b"3" # character special device
89BLKTYPE = b"4" # block special device
90DIRTYPE = b"5" # directory
91FIFOTYPE = b"6" # fifo special device
92CONTTYPE = b"7" # contiguous file
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +000093
Lars Gustäbelb506dc32007-08-07 18:36:16 +000094GNUTYPE_LONGNAME = b"L" # GNU tar longname
95GNUTYPE_LONGLINK = b"K" # GNU tar longlink
96GNUTYPE_SPARSE = b"S" # GNU tar sparse file
Guido van Rossumd8faa362007-04-27 19:54:29 +000097
Lars Gustäbelb506dc32007-08-07 18:36:16 +000098XHDTYPE = b"x" # POSIX.1-2001 extended header
99XGLTYPE = b"g" # POSIX.1-2001 global header
100SOLARIS_XHDTYPE = b"X" # Solaris extended header
Guido van Rossumd8faa362007-04-27 19:54:29 +0000101
102USTAR_FORMAT = 0 # POSIX.1-1988 (ustar) format
103GNU_FORMAT = 1 # GNU tar format
104PAX_FORMAT = 2 # POSIX.1-2001 (pax) format
105DEFAULT_FORMAT = GNU_FORMAT
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000106
107#---------------------------------------------------------
108# tarfile constants
109#---------------------------------------------------------
Guido van Rossumd8faa362007-04-27 19:54:29 +0000110# File types that tarfile supports:
111SUPPORTED_TYPES = (REGTYPE, AREGTYPE, LNKTYPE,
112 SYMTYPE, DIRTYPE, FIFOTYPE,
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000113 CONTTYPE, CHRTYPE, BLKTYPE,
114 GNUTYPE_LONGNAME, GNUTYPE_LONGLINK,
115 GNUTYPE_SPARSE)
116
Guido van Rossumd8faa362007-04-27 19:54:29 +0000117# File types that will be treated as a regular file.
118REGULAR_TYPES = (REGTYPE, AREGTYPE,
119 CONTTYPE, GNUTYPE_SPARSE)
120
121# File types that are part of the GNU tar format.
122GNU_TYPES = (GNUTYPE_LONGNAME, GNUTYPE_LONGLINK,
123 GNUTYPE_SPARSE)
124
125# Fields from a pax header that override a TarInfo attribute.
126PAX_FIELDS = ("path", "linkpath", "size", "mtime",
127 "uid", "gid", "uname", "gname")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000128
Guido van Rossume7ba4952007-06-06 23:52:48 +0000129# Fields in a pax header that are numbers, all other fields
130# are treated as strings.
131PAX_NUMBER_FIELDS = {
132 "atime": float,
133 "ctime": float,
134 "mtime": float,
135 "uid": int,
136 "gid": int,
137 "size": int
138}
139
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000140#---------------------------------------------------------
141# Bits used in the mode field, values in octal.
142#---------------------------------------------------------
Guido van Rossumcd16bf62007-06-13 18:07:49 +0000143S_IFLNK = 0o120000 # symbolic link
144S_IFREG = 0o100000 # regular file
145S_IFBLK = 0o060000 # block device
146S_IFDIR = 0o040000 # directory
147S_IFCHR = 0o020000 # character device
148S_IFIFO = 0o010000 # fifo
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000149
Guido van Rossumcd16bf62007-06-13 18:07:49 +0000150TSUID = 0o4000 # set UID on execution
151TSGID = 0o2000 # set GID on execution
152TSVTX = 0o1000 # reserved
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000153
Guido van Rossumcd16bf62007-06-13 18:07:49 +0000154TUREAD = 0o400 # read by owner
155TUWRITE = 0o200 # write by owner
156TUEXEC = 0o100 # execute/search by owner
157TGREAD = 0o040 # read by group
158TGWRITE = 0o020 # write by group
159TGEXEC = 0o010 # execute/search by group
160TOREAD = 0o004 # read by other
161TOWRITE = 0o002 # write by other
162TOEXEC = 0o001 # execute/search by other
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000163
164#---------------------------------------------------------
Guido van Rossumd8faa362007-04-27 19:54:29 +0000165# initialization
166#---------------------------------------------------------
167ENCODING = sys.getfilesystemencoding()
168if ENCODING is None:
Lars Gustäbel3741eff2007-08-21 12:17:05 +0000169 ENCODING = "ascii"
Guido van Rossumd8faa362007-04-27 19:54:29 +0000170
171#---------------------------------------------------------
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000172# Some useful functions
173#---------------------------------------------------------
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000174
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000175def stn(s, length, encoding, errors):
176 """Convert a string to a null-terminated bytes object.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000177 """
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000178 s = s.encode(encoding, errors)
Thomas Wouters89f507f2006-12-13 04:49:30 +0000179 return s[:length] + (length - len(s)) * NUL
Thomas Wouters477c8d52006-05-27 19:21:47 +0000180
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000181def nts(s, encoding, errors):
182 """Convert a null-terminated bytes object to a string.
Guido van Rossumd8faa362007-04-27 19:54:29 +0000183 """
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000184 p = s.find(b"\0")
185 if p != -1:
186 s = s[:p]
187 return s.decode(encoding, errors)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000188
Thomas Wouters477c8d52006-05-27 19:21:47 +0000189def nti(s):
190 """Convert a number field to a python number.
191 """
192 # There are two possible encodings for a number field, see
193 # itn() below.
Guido van Rossumcd16bf62007-06-13 18:07:49 +0000194 if s[0] != chr(0o200):
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000195 try:
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000196 n = int(nts(s, "ascii", "strict") or "0", 8)
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000197 except ValueError:
198 raise HeaderError("invalid header")
Thomas Wouters477c8d52006-05-27 19:21:47 +0000199 else:
Guido van Rossume2a383d2007-01-15 16:59:06 +0000200 n = 0
Guido van Rossum805365e2007-05-07 22:24:25 +0000201 for i in range(len(s) - 1):
Thomas Wouters477c8d52006-05-27 19:21:47 +0000202 n <<= 8
203 n += ord(s[i + 1])
204 return n
205
Guido van Rossumd8faa362007-04-27 19:54:29 +0000206def itn(n, digits=8, format=DEFAULT_FORMAT):
Thomas Wouters477c8d52006-05-27 19:21:47 +0000207 """Convert a python number to a number field.
208 """
209 # POSIX 1003.1-1988 requires numbers to be encoded as a string of
210 # octal digits followed by a null-byte, this allows values up to
211 # (8**(digits-1))-1. GNU tar allows storing numbers greater than
Guido van Rossumcd16bf62007-06-13 18:07:49 +0000212 # that if necessary. A leading 0o200 byte indicates this particular
Thomas Wouters477c8d52006-05-27 19:21:47 +0000213 # encoding, the following digits-1 bytes are a big-endian
214 # representation. This allows values up to (256**(digits-1))-1.
215 if 0 <= n < 8 ** (digits - 1):
Lars Gustäbela280ca752007-08-28 07:34:33 +0000216 s = bytes("%0*o" % (digits - 1, n), "ascii") + NUL
Thomas Wouters477c8d52006-05-27 19:21:47 +0000217 else:
Guido van Rossumd8faa362007-04-27 19:54:29 +0000218 if format != GNU_FORMAT or n >= 256 ** (digits - 1):
Thomas Wouters477c8d52006-05-27 19:21:47 +0000219 raise ValueError("overflow in number field")
220
221 if n < 0:
222 # XXX We mimic GNU tar's behaviour with negative numbers,
223 # this could raise OverflowError.
224 n = struct.unpack("L", struct.pack("l", n))[0]
225
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000226 s = b""
Guido van Rossum805365e2007-05-07 22:24:25 +0000227 for i in range(digits - 1):
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000228 s.insert(0, n & 0o377)
Thomas Wouters477c8d52006-05-27 19:21:47 +0000229 n >>= 8
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000230 s.insert(0, 0o200)
Thomas Wouters477c8d52006-05-27 19:21:47 +0000231 return s
232
233def calc_chksums(buf):
234 """Calculate the checksum for a member's header by summing up all
235 characters except for the chksum field which is treated as if
236 it was filled with spaces. According to the GNU tar sources,
237 some tars (Sun and NeXT) calculate chksum with signed char,
238 which will be different if there are chars in the buffer with
239 the high bit set. So we calculate two checksums, unsigned and
240 signed.
241 """
242 unsigned_chksum = 256 + sum(struct.unpack("148B", buf[:148]) + struct.unpack("356B", buf[156:512]))
243 signed_chksum = 256 + sum(struct.unpack("148b", buf[:148]) + struct.unpack("356b", buf[156:512]))
244 return unsigned_chksum, signed_chksum
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000245
246def copyfileobj(src, dst, length=None):
247 """Copy length bytes from fileobj src to fileobj dst.
248 If length is None, copy the entire content.
249 """
250 if length == 0:
251 return
252 if length is None:
253 shutil.copyfileobj(src, dst)
254 return
255
256 BUFSIZE = 16 * 1024
257 blocks, remainder = divmod(length, BUFSIZE)
Guido van Rossum805365e2007-05-07 22:24:25 +0000258 for b in range(blocks):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000259 buf = src.read(BUFSIZE)
260 if len(buf) < BUFSIZE:
Thomas Wouters477c8d52006-05-27 19:21:47 +0000261 raise IOError("end of file reached")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000262 dst.write(buf)
263
264 if remainder != 0:
265 buf = src.read(remainder)
266 if len(buf) < remainder:
Thomas Wouters477c8d52006-05-27 19:21:47 +0000267 raise IOError("end of file reached")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000268 dst.write(buf)
269 return
270
271filemode_table = (
Andrew M. Kuchling8bc462f2004-10-20 11:48:42 +0000272 ((S_IFLNK, "l"),
273 (S_IFREG, "-"),
274 (S_IFBLK, "b"),
275 (S_IFDIR, "d"),
276 (S_IFCHR, "c"),
277 (S_IFIFO, "p")),
278
279 ((TUREAD, "r"),),
280 ((TUWRITE, "w"),),
281 ((TUEXEC|TSUID, "s"),
282 (TSUID, "S"),
283 (TUEXEC, "x")),
284
285 ((TGREAD, "r"),),
286 ((TGWRITE, "w"),),
287 ((TGEXEC|TSGID, "s"),
288 (TSGID, "S"),
289 (TGEXEC, "x")),
290
291 ((TOREAD, "r"),),
292 ((TOWRITE, "w"),),
293 ((TOEXEC|TSVTX, "t"),
294 (TSVTX, "T"),
295 (TOEXEC, "x"))
296)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000297
298def filemode(mode):
299 """Convert a file's mode to a string of the form
300 -rwxrwxrwx.
301 Used by TarFile.list()
302 """
Andrew M. Kuchling8bc462f2004-10-20 11:48:42 +0000303 perm = []
304 for table in filemode_table:
305 for bit, char in table:
306 if mode & bit == bit:
307 perm.append(char)
308 break
309 else:
310 perm.append("-")
311 return "".join(perm)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000312
313if os.sep != "/":
314 normpath = lambda path: os.path.normpath(path).replace(os.sep, "/")
315else:
316 normpath = os.path.normpath
317
318class TarError(Exception):
319 """Base exception."""
320 pass
321class ExtractError(TarError):
322 """General exception for extract errors."""
323 pass
324class ReadError(TarError):
325 """Exception for unreadble tar archives."""
326 pass
327class CompressionError(TarError):
328 """Exception for unavailable compression methods."""
329 pass
330class StreamError(TarError):
331 """Exception for unsupported operations on stream-like TarFiles."""
332 pass
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000333class HeaderError(TarError):
334 """Exception for invalid headers."""
335 pass
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000336
337#---------------------------
338# internal stream interface
339#---------------------------
340class _LowLevelFile:
341 """Low-level file object. Supports reading and writing.
342 It is used instead of a regular file object for streaming
343 access.
344 """
345
346 def __init__(self, name, mode):
347 mode = {
348 "r": os.O_RDONLY,
349 "w": os.O_WRONLY | os.O_CREAT | os.O_TRUNC,
350 }[mode]
351 if hasattr(os, "O_BINARY"):
352 mode |= os.O_BINARY
353 self.fd = os.open(name, mode)
354
355 def close(self):
356 os.close(self.fd)
357
358 def read(self, size):
359 return os.read(self.fd, size)
360
361 def write(self, s):
362 os.write(self.fd, s)
363
364class _Stream:
365 """Class that serves as an adapter between TarFile and
366 a stream-like object. The stream-like object only
367 needs to have a read() or write() method and is accessed
368 blockwise. Use of gzip or bzip2 compression is possible.
369 A stream-like object could be for example: sys.stdin,
370 sys.stdout, a socket, a tape device etc.
371
372 _Stream is intended to be used only internally.
373 """
374
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000375 def __init__(self, name, mode, comptype, fileobj, bufsize):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000376 """Construct a _Stream object.
377 """
378 self._extfileobj = True
379 if fileobj is None:
380 fileobj = _LowLevelFile(name, mode)
381 self._extfileobj = False
382
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000383 if comptype == '*':
384 # Enable transparent compression detection for the
385 # stream interface
386 fileobj = _StreamProxy(fileobj)
387 comptype = fileobj.getcomptype()
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000388
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000389 self.name = name or ""
390 self.mode = mode
391 self.comptype = comptype
392 self.fileobj = fileobj
393 self.bufsize = bufsize
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000394 self.buf = b""
Guido van Rossume2a383d2007-01-15 16:59:06 +0000395 self.pos = 0
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000396 self.closed = False
397
398 if comptype == "gz":
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000399 try:
400 import zlib
401 except ImportError:
Thomas Wouters477c8d52006-05-27 19:21:47 +0000402 raise CompressionError("zlib module is not available")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000403 self.zlib = zlib
404 self.crc = zlib.crc32("")
405 if mode == "r":
406 self._init_read_gz()
407 else:
408 self._init_write_gz()
409
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000410 if comptype == "bz2":
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000411 try:
412 import bz2
413 except ImportError:
Thomas Wouters477c8d52006-05-27 19:21:47 +0000414 raise CompressionError("bz2 module is not available")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000415 if mode == "r":
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000416 self.dbuf = b""
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000417 self.cmp = bz2.BZ2Decompressor()
418 else:
419 self.cmp = bz2.BZ2Compressor()
420
421 def __del__(self):
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000422 if hasattr(self, "closed") and not self.closed:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000423 self.close()
424
425 def _init_write_gz(self):
426 """Initialize for writing with gzip compression.
427 """
428 self.cmp = self.zlib.compressobj(9, self.zlib.DEFLATED,
429 -self.zlib.MAX_WBITS,
430 self.zlib.DEF_MEM_LEVEL,
431 0)
Guido van Rossume2a383d2007-01-15 16:59:06 +0000432 timestamp = struct.pack("<L", int(time.time()))
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000433 self.__write(b"\037\213\010\010" + timestamp + b"\002\377")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000434 if self.name.endswith(".gz"):
435 self.name = self.name[:-3]
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000436 # RFC1952 says we must use ISO-8859-1 for the FNAME field.
437 self.__write(self.name.encode("iso-8859-1", "replace") + NUL)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000438
439 def write(self, s):
440 """Write string s to the stream.
441 """
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000442 if self.comptype == "gz":
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000443 self.crc = self.zlib.crc32(s, self.crc)
444 self.pos += len(s)
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000445 if self.comptype != "tar":
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000446 s = self.cmp.compress(s)
447 self.__write(s)
448
449 def __write(self, s):
450 """Write string s to the stream if a whole new block
451 is ready to be written.
452 """
453 self.buf += s
454 while len(self.buf) > self.bufsize:
455 self.fileobj.write(self.buf[:self.bufsize])
456 self.buf = self.buf[self.bufsize:]
457
458 def close(self):
459 """Close the _Stream object. No operation should be
460 done on it afterwards.
461 """
462 if self.closed:
463 return
464
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000465 if self.mode == "w" and self.comptype != "tar":
Martin v. Löwisc234a522004-08-22 21:28:33 +0000466 self.buf += self.cmp.flush()
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000467
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000468 if self.mode == "w" and self.buf:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000469 self.fileobj.write(self.buf)
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000470 self.buf = b""
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000471 if self.comptype == "gz":
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000472 # The native zlib crc is an unsigned 32-bit integer, but
473 # the Python wrapper implicitly casts that to a signed C
474 # long. So, on a 32-bit box self.crc may "look negative",
475 # while the same crc on a 64-bit box may "look positive".
476 # To avoid irksome warnings from the `struct` module, force
477 # it to look positive on all boxes.
Guido van Rossume2a383d2007-01-15 16:59:06 +0000478 self.fileobj.write(struct.pack("<L", self.crc & 0xffffffff))
479 self.fileobj.write(struct.pack("<L", self.pos & 0xffffFFFF))
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000480
481 if not self._extfileobj:
482 self.fileobj.close()
483
484 self.closed = True
485
486 def _init_read_gz(self):
487 """Initialize for reading a gzip compressed fileobj.
488 """
489 self.cmp = self.zlib.decompressobj(-self.zlib.MAX_WBITS)
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000490 self.dbuf = b""
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000491
492 # taken from gzip.GzipFile with some alterations
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000493 if self.__read(2) != b"\037\213":
Thomas Wouters477c8d52006-05-27 19:21:47 +0000494 raise ReadError("not a gzip file")
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000495 if self.__read(1) != b"\010":
Thomas Wouters477c8d52006-05-27 19:21:47 +0000496 raise CompressionError("unsupported compression method")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000497
498 flag = ord(self.__read(1))
499 self.__read(6)
500
501 if flag & 4:
502 xlen = ord(self.__read(1)) + 256 * ord(self.__read(1))
503 self.read(xlen)
504 if flag & 8:
505 while True:
506 s = self.__read(1)
507 if not s or s == NUL:
508 break
509 if flag & 16:
510 while True:
511 s = self.__read(1)
512 if not s or s == NUL:
513 break
514 if flag & 2:
515 self.__read(2)
516
517 def tell(self):
518 """Return the stream's file pointer position.
519 """
520 return self.pos
521
522 def seek(self, pos=0):
523 """Set the stream's file pointer to pos. Negative seeking
524 is forbidden.
525 """
526 if pos - self.pos >= 0:
527 blocks, remainder = divmod(pos - self.pos, self.bufsize)
Guido van Rossum805365e2007-05-07 22:24:25 +0000528 for i in range(blocks):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000529 self.read(self.bufsize)
530 self.read(remainder)
531 else:
Thomas Wouters477c8d52006-05-27 19:21:47 +0000532 raise StreamError("seeking backwards is not allowed")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000533 return self.pos
534
535 def read(self, size=None):
536 """Return the next size number of bytes from the stream.
537 If size is not defined, return all bytes of the stream
538 up to EOF.
539 """
540 if size is None:
541 t = []
542 while True:
543 buf = self._read(self.bufsize)
544 if not buf:
545 break
546 t.append(buf)
547 buf = "".join(t)
548 else:
549 buf = self._read(size)
550 self.pos += len(buf)
551 return buf
552
553 def _read(self, size):
554 """Return size bytes from the stream.
555 """
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000556 if self.comptype == "tar":
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000557 return self.__read(size)
558
559 c = len(self.dbuf)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000560 while c < size:
561 buf = self.__read(self.bufsize)
562 if not buf:
563 break
Guido van Rossumd8faa362007-04-27 19:54:29 +0000564 try:
565 buf = self.cmp.decompress(buf)
566 except IOError:
567 raise ReadError("invalid compressed data")
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000568 self.dbuf += buf
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000569 c += len(buf)
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000570 buf = self.dbuf[:size]
571 self.dbuf = self.dbuf[size:]
572 return buf
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000573
574 def __read(self, size):
575 """Return size bytes from stream. If internal buffer is empty,
576 read another block from the stream.
577 """
578 c = len(self.buf)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000579 while c < size:
580 buf = self.fileobj.read(self.bufsize)
581 if not buf:
582 break
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000583 self.buf += buf
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000584 c += len(buf)
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000585 buf = self.buf[:size]
586 self.buf = self.buf[size:]
587 return buf
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000588# class _Stream
589
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000590class _StreamProxy(object):
591 """Small proxy class that enables transparent compression
592 detection for the Stream interface (mode 'r|*').
593 """
594
595 def __init__(self, fileobj):
596 self.fileobj = fileobj
597 self.buf = self.fileobj.read(BLOCKSIZE)
598
599 def read(self, size):
600 self.read = self.fileobj.read
601 return self.buf
602
603 def getcomptype(self):
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000604 if self.buf.startswith(b"\037\213\010"):
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000605 return "gz"
Lars Gustäbela280ca752007-08-28 07:34:33 +0000606 if self.buf.startswith(b"BZh91"):
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000607 return "bz2"
608 return "tar"
609
610 def close(self):
611 self.fileobj.close()
612# class StreamProxy
613
Thomas Wouters477c8d52006-05-27 19:21:47 +0000614class _BZ2Proxy(object):
615 """Small proxy class that enables external file object
616 support for "r:bz2" and "w:bz2" modes. This is actually
617 a workaround for a limitation in bz2 module's BZ2File
618 class which (unlike gzip.GzipFile) has no support for
619 a file object argument.
620 """
621
622 blocksize = 16 * 1024
623
624 def __init__(self, fileobj, mode):
625 self.fileobj = fileobj
626 self.mode = mode
Guido van Rossumd8faa362007-04-27 19:54:29 +0000627 self.name = getattr(self.fileobj, "name", None)
Thomas Wouters477c8d52006-05-27 19:21:47 +0000628 self.init()
629
630 def init(self):
631 import bz2
632 self.pos = 0
633 if self.mode == "r":
634 self.bz2obj = bz2.BZ2Decompressor()
635 self.fileobj.seek(0)
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000636 self.buf = b""
Thomas Wouters477c8d52006-05-27 19:21:47 +0000637 else:
638 self.bz2obj = bz2.BZ2Compressor()
639
640 def read(self, size):
Thomas Wouters477c8d52006-05-27 19:21:47 +0000641 x = len(self.buf)
642 while x < size:
643 try:
644 raw = self.fileobj.read(self.blocksize)
645 data = self.bz2obj.decompress(raw)
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000646 self.buf += data
Thomas Wouters477c8d52006-05-27 19:21:47 +0000647 except EOFError:
648 break
649 x += len(data)
Thomas Wouters477c8d52006-05-27 19:21:47 +0000650
651 buf = self.buf[:size]
652 self.buf = self.buf[size:]
653 self.pos += len(buf)
654 return buf
655
656 def seek(self, pos):
657 if pos < self.pos:
658 self.init()
659 self.read(pos - self.pos)
660
661 def tell(self):
662 return self.pos
663
664 def write(self, data):
665 self.pos += len(data)
666 raw = self.bz2obj.compress(data)
667 self.fileobj.write(raw)
668
669 def close(self):
670 if self.mode == "w":
671 raw = self.bz2obj.flush()
672 self.fileobj.write(raw)
673 self.fileobj.close()
674# class _BZ2Proxy
675
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000676#------------------------
677# Extraction file object
678#------------------------
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000679class _FileInFile(object):
680 """A thin wrapper around an existing file object that
681 provides a part of its data as an individual file
682 object.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000683 """
684
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000685 def __init__(self, fileobj, offset, size, sparse=None):
686 self.fileobj = fileobj
687 self.offset = offset
688 self.size = size
689 self.sparse = sparse
690 self.position = 0
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000691
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000692 def seekable(self):
693 if not hasattr(self.fileobj, "seekable"):
694 # XXX gzip.GzipFile and bz2.BZ2File
695 return True
696 return self.fileobj.seekable()
697
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000698 def tell(self):
699 """Return the current file position.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000700 """
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000701 return self.position
702
703 def seek(self, position):
704 """Seek to a position in the file.
705 """
706 self.position = position
707
708 def read(self, size=None):
709 """Read data from the file.
710 """
711 if size is None:
712 size = self.size - self.position
713 else:
714 size = min(size, self.size - self.position)
715
716 if self.sparse is None:
717 return self.readnormal(size)
718 else:
719 return self.readsparse(size)
720
721 def readnormal(self, size):
722 """Read operation for regular files.
723 """
724 self.fileobj.seek(self.offset + self.position)
725 self.position += size
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000726 return self.fileobj.read(size)
727
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000728 def readsparse(self, size):
729 """Read operation for sparse files.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000730 """
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000731 data = b""
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000732 while size > 0:
733 buf = self.readsparsesection(size)
734 if not buf:
735 break
736 size -= len(buf)
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000737 data += buf
738 return data
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000739
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000740 def readsparsesection(self, size):
741 """Read a single section of a sparse file.
742 """
743 section = self.sparse.find(self.position)
744
745 if section is None:
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000746 return b""
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000747
748 size = min(size, section.offset + section.size - self.position)
749
750 if isinstance(section, _data):
751 realpos = section.realpos + self.position - section.offset
752 self.fileobj.seek(self.offset + realpos)
753 self.position += size
754 return self.fileobj.read(size)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000755 else:
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000756 self.position += size
757 return NUL * size
758#class _FileInFile
759
760
761class ExFileObject(object):
762 """File-like object for reading an archive member.
763 Is returned by TarFile.extractfile().
764 """
765 blocksize = 1024
766
767 def __init__(self, tarfile, tarinfo):
768 self.fileobj = _FileInFile(tarfile.fileobj,
769 tarinfo.offset_data,
770 tarinfo.size,
771 getattr(tarinfo, "sparse", None))
772 self.name = tarinfo.name
773 self.mode = "r"
774 self.closed = False
775 self.size = tarinfo.size
776
777 self.position = 0
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000778 self.buffer = b""
779
780 def readable(self):
781 return True
782
783 def writable(self):
784 return False
785
786 def seekable(self):
787 return self.fileobj.seekable()
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000788
789 def read(self, size=None):
790 """Read at most size bytes from the file. If size is not
791 present or None, read all data until EOF is reached.
792 """
793 if self.closed:
794 raise ValueError("I/O operation on closed file")
795
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000796 buf = b""
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000797 if self.buffer:
798 if size is None:
799 buf = self.buffer
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000800 self.buffer = b""
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000801 else:
802 buf = self.buffer[:size]
803 self.buffer = self.buffer[size:]
804
805 if size is None:
806 buf += self.fileobj.read()
807 else:
808 buf += self.fileobj.read(size - len(buf))
809
810 self.position += len(buf)
811 return buf
812
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000813 # XXX TextIOWrapper uses the read1() method.
814 read1 = read
815
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000816 def readline(self, size=-1):
817 """Read one entire line from the file. If size is present
818 and non-negative, return a string with at most that
819 size, which may be an incomplete line.
820 """
821 if self.closed:
822 raise ValueError("I/O operation on closed file")
823
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000824 pos = self.buffer.find(b"\n") + 1
825 if pos == 0:
826 # no newline found.
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000827 while True:
828 buf = self.fileobj.read(self.blocksize)
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000829 self.buffer += buf
830 if not buf or b"\n" in buf:
831 pos = self.buffer.find(b"\n") + 1
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000832 if pos == 0:
833 # no newline found.
834 pos = len(self.buffer)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000835 break
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000836
837 if size != -1:
838 pos = min(size, pos)
839
840 buf = self.buffer[:pos]
841 self.buffer = self.buffer[pos:]
842 self.position += len(buf)
843 return buf
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000844
845 def readlines(self):
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000846 """Return a list with all remaining lines.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000847 """
848 result = []
849 while True:
850 line = self.readline()
851 if not line: break
852 result.append(line)
853 return result
854
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000855 def tell(self):
856 """Return the current file position.
857 """
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000858 if self.closed:
859 raise ValueError("I/O operation on closed file")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000860
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000861 return self.position
862
863 def seek(self, pos, whence=os.SEEK_SET):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000864 """Seek to a position in the file.
865 """
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000866 if self.closed:
867 raise ValueError("I/O operation on closed file")
868
869 if whence == os.SEEK_SET:
870 self.position = min(max(pos, 0), self.size)
871 elif whence == os.SEEK_CUR:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000872 if pos < 0:
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000873 self.position = max(self.position + pos, 0)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000874 else:
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000875 self.position = min(self.position + pos, self.size)
876 elif whence == os.SEEK_END:
877 self.position = max(min(self.size + pos, self.size), 0)
878 else:
879 raise ValueError("Invalid argument")
880
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000881 self.buffer = b""
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000882 self.fileobj.seek(self.position)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000883
884 def close(self):
885 """Close the file object.
886 """
887 self.closed = True
Martin v. Löwisdf241532005-03-03 08:17:42 +0000888
889 def __iter__(self):
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000890 """Get an iterator over the file's lines.
Martin v. Löwisdf241532005-03-03 08:17:42 +0000891 """
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000892 while True:
893 line = self.readline()
894 if not line:
895 break
896 yield line
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000897#class ExFileObject
898
899#------------------
900# Exported Classes
901#------------------
902class TarInfo(object):
903 """Informational class which holds the details about an
904 archive member given by a tar header block.
905 TarInfo objects are returned by TarFile.getmember(),
906 TarFile.getmembers() and TarFile.gettarinfo() and are
907 usually created internally.
908 """
909
910 def __init__(self, name=""):
911 """Construct a TarInfo object. name is the optional name
912 of the member.
913 """
Guido van Rossumd8faa362007-04-27 19:54:29 +0000914 self.name = name # member name
Guido van Rossumcd16bf62007-06-13 18:07:49 +0000915 self.mode = 0o644 # file permissions
Thomas Wouters477c8d52006-05-27 19:21:47 +0000916 self.uid = 0 # user id
917 self.gid = 0 # group id
918 self.size = 0 # file size
919 self.mtime = 0 # modification time
920 self.chksum = 0 # header checksum
921 self.type = REGTYPE # member type
922 self.linkname = "" # link name
Guido van Rossumd8faa362007-04-27 19:54:29 +0000923 self.uname = "root" # user name
924 self.gname = "root" # group name
Thomas Wouters477c8d52006-05-27 19:21:47 +0000925 self.devmajor = 0 # device major number
926 self.devminor = 0 # device minor number
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000927
Thomas Wouters477c8d52006-05-27 19:21:47 +0000928 self.offset = 0 # the tar header starts here
929 self.offset_data = 0 # the file's data starts here
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000930
Guido van Rossumd8faa362007-04-27 19:54:29 +0000931 self.pax_headers = {} # pax header information
932
933 # In pax headers the "name" and "linkname" field are called
934 # "path" and "linkpath".
935 def _getpath(self):
936 return self.name
937 def _setpath(self, name):
938 self.name = name
939 path = property(_getpath, _setpath)
940
941 def _getlinkpath(self):
942 return self.linkname
943 def _setlinkpath(self, linkname):
944 self.linkname = linkname
945 linkpath = property(_getlinkpath, _setlinkpath)
946
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000947 def __repr__(self):
948 return "<%s %r at %#x>" % (self.__class__.__name__,self.name,id(self))
949
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000950 def get_info(self):
Guido van Rossumd8faa362007-04-27 19:54:29 +0000951 """Return the TarInfo's attributes as a dictionary.
952 """
953 info = {
954 "name": normpath(self.name),
Guido van Rossumcd16bf62007-06-13 18:07:49 +0000955 "mode": self.mode & 0o7777,
Guido van Rossumd8faa362007-04-27 19:54:29 +0000956 "uid": self.uid,
957 "gid": self.gid,
958 "size": self.size,
959 "mtime": self.mtime,
960 "chksum": self.chksum,
961 "type": self.type,
962 "linkname": normpath(self.linkname) if self.linkname else "",
963 "uname": self.uname,
964 "gname": self.gname,
965 "devmajor": self.devmajor,
966 "devminor": self.devminor
967 }
968
969 if info["type"] == DIRTYPE and not info["name"].endswith("/"):
970 info["name"] += "/"
971
972 return info
973
Guido van Rossume7ba4952007-06-06 23:52:48 +0000974 def tobuf(self, format=DEFAULT_FORMAT, encoding=ENCODING, errors="strict"):
Guido van Rossumd8faa362007-04-27 19:54:29 +0000975 """Return a tar header as a string of 512 byte blocks.
976 """
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000977 info = self.get_info()
Guido van Rossume7ba4952007-06-06 23:52:48 +0000978
Guido van Rossumd8faa362007-04-27 19:54:29 +0000979 if format == USTAR_FORMAT:
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000980 return self.create_ustar_header(info, encoding, errors)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000981 elif format == GNU_FORMAT:
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000982 return self.create_gnu_header(info, encoding, errors)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000983 elif format == PAX_FORMAT:
Lars Gustäbel3741eff2007-08-21 12:17:05 +0000984 return self.create_pax_header(info)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000985 else:
986 raise ValueError("invalid format")
987
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000988 def create_ustar_header(self, info, encoding, errors):
Guido van Rossumd8faa362007-04-27 19:54:29 +0000989 """Return the object as a ustar header block.
990 """
Guido van Rossumd8faa362007-04-27 19:54:29 +0000991 info["magic"] = POSIX_MAGIC
992
993 if len(info["linkname"]) > LENGTH_LINK:
994 raise ValueError("linkname is too long")
995
996 if len(info["name"]) > LENGTH_NAME:
997 info["prefix"], info["name"] = self._posix_split_name(info["name"])
998
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000999 return self._create_header(info, USTAR_FORMAT, encoding, errors)
Guido van Rossumd8faa362007-04-27 19:54:29 +00001000
Lars Gustäbelb506dc32007-08-07 18:36:16 +00001001 def create_gnu_header(self, info, encoding, errors):
Guido van Rossumd8faa362007-04-27 19:54:29 +00001002 """Return the object as a GNU header block sequence.
1003 """
Guido van Rossumd8faa362007-04-27 19:54:29 +00001004 info["magic"] = GNU_MAGIC
1005
Lars Gustäbelb506dc32007-08-07 18:36:16 +00001006 buf = b""
Guido van Rossumd8faa362007-04-27 19:54:29 +00001007 if len(info["linkname"]) > LENGTH_LINK:
Lars Gustäbelb506dc32007-08-07 18:36:16 +00001008 buf += self._create_gnu_long_header(info["linkname"], GNUTYPE_LONGLINK, encoding, errors)
Guido van Rossumd8faa362007-04-27 19:54:29 +00001009
1010 if len(info["name"]) > LENGTH_NAME:
Lars Gustäbelb506dc32007-08-07 18:36:16 +00001011 buf += self._create_gnu_long_header(info["name"], GNUTYPE_LONGNAME, encoding, errors)
Guido van Rossumd8faa362007-04-27 19:54:29 +00001012
Lars Gustäbelb506dc32007-08-07 18:36:16 +00001013 return buf + self._create_header(info, GNU_FORMAT, encoding, errors)
Guido van Rossumd8faa362007-04-27 19:54:29 +00001014
Lars Gustäbel3741eff2007-08-21 12:17:05 +00001015 def create_pax_header(self, info):
Guido van Rossumd8faa362007-04-27 19:54:29 +00001016 """Return the object as a ustar header block. If it cannot be
1017 represented this way, prepend a pax extended header sequence
1018 with supplement information.
1019 """
Guido van Rossumd8faa362007-04-27 19:54:29 +00001020 info["magic"] = POSIX_MAGIC
1021 pax_headers = self.pax_headers.copy()
1022
1023 # Test string fields for values that exceed the field length or cannot
1024 # be represented in ASCII encoding.
1025 for name, hname, length in (
1026 ("name", "path", LENGTH_NAME), ("linkname", "linkpath", LENGTH_LINK),
1027 ("uname", "uname", 32), ("gname", "gname", 32)):
1028
Guido van Rossume7ba4952007-06-06 23:52:48 +00001029 if hname in pax_headers:
1030 # The pax header has priority.
1031 continue
1032
Guido van Rossumd8faa362007-04-27 19:54:29 +00001033 # Try to encode the string as ASCII.
1034 try:
Lars Gustäbelb506dc32007-08-07 18:36:16 +00001035 info[name].encode("ascii", "strict")
Guido van Rossumd8faa362007-04-27 19:54:29 +00001036 except UnicodeEncodeError:
Lars Gustäbelb506dc32007-08-07 18:36:16 +00001037 pax_headers[hname] = info[name]
Guido van Rossumd8faa362007-04-27 19:54:29 +00001038 continue
1039
Guido van Rossume7ba4952007-06-06 23:52:48 +00001040 if len(info[name]) > length:
Lars Gustäbelb506dc32007-08-07 18:36:16 +00001041 pax_headers[hname] = info[name]
Guido van Rossumd8faa362007-04-27 19:54:29 +00001042
1043 # Test number fields for values that exceed the field limit or values
1044 # that like to be stored as float.
1045 for name, digits in (("uid", 8), ("gid", 8), ("size", 12), ("mtime", 12)):
Guido van Rossume7ba4952007-06-06 23:52:48 +00001046 if name in pax_headers:
1047 # The pax header has priority. Avoid overflow.
1048 info[name] = 0
1049 continue
1050
Guido van Rossumd8faa362007-04-27 19:54:29 +00001051 val = info[name]
1052 if not 0 <= val < 8 ** (digits - 1) or isinstance(val, float):
Guido van Rossumef87d6e2007-05-02 19:09:54 +00001053 pax_headers[name] = str(val)
Guido van Rossumd8faa362007-04-27 19:54:29 +00001054 info[name] = 0
1055
Guido van Rossume7ba4952007-06-06 23:52:48 +00001056 # Create a pax extended header if necessary.
Guido van Rossumd8faa362007-04-27 19:54:29 +00001057 if pax_headers:
Lars Gustäbel3741eff2007-08-21 12:17:05 +00001058 buf = self._create_pax_generic_header(pax_headers, XHDTYPE)
Guido van Rossumd8faa362007-04-27 19:54:29 +00001059 else:
Lars Gustäbelb506dc32007-08-07 18:36:16 +00001060 buf = b""
Guido van Rossumd8faa362007-04-27 19:54:29 +00001061
Lars Gustäbel3741eff2007-08-21 12:17:05 +00001062 return buf + self._create_header(info, USTAR_FORMAT, "ascii", "replace")
Guido van Rossumd8faa362007-04-27 19:54:29 +00001063
1064 @classmethod
Lars Gustäbel3741eff2007-08-21 12:17:05 +00001065 def create_pax_global_header(cls, pax_headers):
Guido van Rossumd8faa362007-04-27 19:54:29 +00001066 """Return the object as a pax global header block sequence.
1067 """
Lars Gustäbel3741eff2007-08-21 12:17:05 +00001068 return cls._create_pax_generic_header(pax_headers, XGLTYPE)
Guido van Rossumd8faa362007-04-27 19:54:29 +00001069
1070 def _posix_split_name(self, name):
1071 """Split a name longer than 100 chars into a prefix
1072 and a name part.
1073 """
1074 prefix = name[:LENGTH_PREFIX + 1]
1075 while prefix and prefix[-1] != "/":
1076 prefix = prefix[:-1]
1077
1078 name = name[len(prefix):]
1079 prefix = prefix[:-1]
1080
1081 if not prefix or len(name) > LENGTH_NAME:
1082 raise ValueError("name is too long")
1083 return prefix, name
1084
1085 @staticmethod
Lars Gustäbelb506dc32007-08-07 18:36:16 +00001086 def _create_header(info, format, encoding, errors):
Guido van Rossumd8faa362007-04-27 19:54:29 +00001087 """Return a header block. info is a dictionary with file
1088 information, format must be one of the *_FORMAT constants.
1089 """
1090 parts = [
Lars Gustäbelb506dc32007-08-07 18:36:16 +00001091 stn(info.get("name", ""), 100, encoding, errors),
Guido van Rossumcd16bf62007-06-13 18:07:49 +00001092 itn(info.get("mode", 0) & 0o7777, 8, format),
Guido van Rossumd8faa362007-04-27 19:54:29 +00001093 itn(info.get("uid", 0), 8, format),
1094 itn(info.get("gid", 0), 8, format),
1095 itn(info.get("size", 0), 12, format),
1096 itn(info.get("mtime", 0), 12, format),
Lars Gustäbelb506dc32007-08-07 18:36:16 +00001097 b" ", # checksum field
Guido van Rossumd8faa362007-04-27 19:54:29 +00001098 info.get("type", REGTYPE),
Lars Gustäbelb506dc32007-08-07 18:36:16 +00001099 stn(info.get("linkname", ""), 100, encoding, errors),
1100 info.get("magic", POSIX_MAGIC),
1101 stn(info.get("uname", "root"), 32, encoding, errors),
1102 stn(info.get("gname", "root"), 32, encoding, errors),
Guido van Rossumd8faa362007-04-27 19:54:29 +00001103 itn(info.get("devmajor", 0), 8, format),
1104 itn(info.get("devminor", 0), 8, format),
Lars Gustäbelb506dc32007-08-07 18:36:16 +00001105 stn(info.get("prefix", ""), 155, encoding, errors)
Guido van Rossumd8faa362007-04-27 19:54:29 +00001106 ]
1107
Lars Gustäbelb506dc32007-08-07 18:36:16 +00001108 buf = struct.pack("%ds" % BLOCKSIZE, b"".join(parts))
Guido van Rossumd8faa362007-04-27 19:54:29 +00001109 chksum = calc_chksums(buf[-BLOCKSIZE:])[0]
Lars Gustäbela280ca752007-08-28 07:34:33 +00001110 buf = buf[:-364] + bytes("%06o\0" % chksum, "ascii") + buf[-357:]
Guido van Rossumd8faa362007-04-27 19:54:29 +00001111 return buf
1112
1113 @staticmethod
1114 def _create_payload(payload):
1115 """Return the string payload filled with zero bytes
1116 up to the next 512 byte border.
1117 """
1118 blocks, remainder = divmod(len(payload), BLOCKSIZE)
1119 if remainder > 0:
1120 payload += (BLOCKSIZE - remainder) * NUL
1121 return payload
1122
1123 @classmethod
Lars Gustäbelb506dc32007-08-07 18:36:16 +00001124 def _create_gnu_long_header(cls, name, type, encoding, errors):
Guido van Rossumd8faa362007-04-27 19:54:29 +00001125 """Return a GNUTYPE_LONGNAME or GNUTYPE_LONGLINK sequence
1126 for name.
1127 """
Lars Gustäbelb506dc32007-08-07 18:36:16 +00001128 name = name.encode(encoding, errors) + NUL
Guido van Rossumd8faa362007-04-27 19:54:29 +00001129
1130 info = {}
1131 info["name"] = "././@LongLink"
1132 info["type"] = type
1133 info["size"] = len(name)
1134 info["magic"] = GNU_MAGIC
1135
1136 # create extended header + name blocks.
Lars Gustäbelb506dc32007-08-07 18:36:16 +00001137 return cls._create_header(info, USTAR_FORMAT, encoding, errors) + \
Guido van Rossumd8faa362007-04-27 19:54:29 +00001138 cls._create_payload(name)
1139
1140 @classmethod
Lars Gustäbel3741eff2007-08-21 12:17:05 +00001141 def _create_pax_generic_header(cls, pax_headers, type):
Guido van Rossumd8faa362007-04-27 19:54:29 +00001142 """Return a POSIX.1-2001 extended or global header sequence
1143 that contains a list of keyword, value pairs. The values
Lars Gustäbelb506dc32007-08-07 18:36:16 +00001144 must be strings.
Guido van Rossumd8faa362007-04-27 19:54:29 +00001145 """
Lars Gustäbelb506dc32007-08-07 18:36:16 +00001146 records = b""
Guido van Rossumd8faa362007-04-27 19:54:29 +00001147 for keyword, value in pax_headers.items():
1148 keyword = keyword.encode("utf8")
1149 value = value.encode("utf8")
1150 l = len(keyword) + len(value) + 3 # ' ' + '=' + '\n'
1151 n = p = 0
1152 while True:
1153 n = l + len(str(p))
1154 if n == p:
1155 break
1156 p = n
Lars Gustäbela280ca752007-08-28 07:34:33 +00001157 records += bytes(str(p), "ascii") + b" " + keyword + b"=" + value + b"\n"
Guido van Rossumd8faa362007-04-27 19:54:29 +00001158
1159 # We use a hardcoded "././@PaxHeader" name like star does
1160 # instead of the one that POSIX recommends.
1161 info = {}
1162 info["name"] = "././@PaxHeader"
1163 info["type"] = type
1164 info["size"] = len(records)
1165 info["magic"] = POSIX_MAGIC
1166
1167 # Create pax header + record blocks.
Lars Gustäbel3741eff2007-08-21 12:17:05 +00001168 return cls._create_header(info, USTAR_FORMAT, "ascii", "replace") + \
Guido van Rossumd8faa362007-04-27 19:54:29 +00001169 cls._create_payload(records)
1170
Guido van Rossum75b64e62005-01-16 00:16:11 +00001171 @classmethod
Lars Gustäbelb506dc32007-08-07 18:36:16 +00001172 def frombuf(cls, buf, encoding, errors):
1173 """Construct a TarInfo object from a 512 byte bytes object.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001174 """
Thomas Wouters477c8d52006-05-27 19:21:47 +00001175 if len(buf) != BLOCKSIZE:
Thomas Wouters902d6eb2007-01-09 23:18:33 +00001176 raise HeaderError("truncated header")
Thomas Wouters477c8d52006-05-27 19:21:47 +00001177 if buf.count(NUL) == BLOCKSIZE:
Thomas Wouters902d6eb2007-01-09 23:18:33 +00001178 raise HeaderError("empty header")
1179
1180 chksum = nti(buf[148:156])
1181 if chksum not in calc_chksums(buf):
1182 raise HeaderError("bad checksum")
Thomas Wouters477c8d52006-05-27 19:21:47 +00001183
Guido van Rossumd8faa362007-04-27 19:54:29 +00001184 obj = cls()
1185 obj.buf = buf
Lars Gustäbelb506dc32007-08-07 18:36:16 +00001186 obj.name = nts(buf[0:100], encoding, errors)
Guido van Rossumd8faa362007-04-27 19:54:29 +00001187 obj.mode = nti(buf[100:108])
1188 obj.uid = nti(buf[108:116])
1189 obj.gid = nti(buf[116:124])
1190 obj.size = nti(buf[124:136])
1191 obj.mtime = nti(buf[136:148])
1192 obj.chksum = chksum
1193 obj.type = buf[156:157]
Lars Gustäbelb506dc32007-08-07 18:36:16 +00001194 obj.linkname = nts(buf[157:257], encoding, errors)
1195 obj.uname = nts(buf[265:297], encoding, errors)
1196 obj.gname = nts(buf[297:329], encoding, errors)
Guido van Rossumd8faa362007-04-27 19:54:29 +00001197 obj.devmajor = nti(buf[329:337])
1198 obj.devminor = nti(buf[337:345])
Lars Gustäbelb506dc32007-08-07 18:36:16 +00001199 prefix = nts(buf[345:500], encoding, errors)
Thomas Wouters89f507f2006-12-13 04:49:30 +00001200
Guido van Rossumd8faa362007-04-27 19:54:29 +00001201 # Old V7 tar format represents a directory as a regular
1202 # file with a trailing slash.
1203 if obj.type == AREGTYPE and obj.name.endswith("/"):
1204 obj.type = DIRTYPE
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001205
Guido van Rossumd8faa362007-04-27 19:54:29 +00001206 # Remove redundant slashes from directories.
1207 if obj.isdir():
1208 obj.name = obj.name.rstrip("/")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001209
Guido van Rossumd8faa362007-04-27 19:54:29 +00001210 # Reconstruct a ustar longname.
1211 if prefix and obj.type not in GNU_TYPES:
1212 obj.name = prefix + "/" + obj.name
1213 return obj
1214
1215 @classmethod
1216 def fromtarfile(cls, tarfile):
1217 """Return the next TarInfo object from TarFile object
1218 tarfile.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001219 """
Guido van Rossumd8faa362007-04-27 19:54:29 +00001220 buf = tarfile.fileobj.read(BLOCKSIZE)
1221 if not buf:
1222 return
Lars Gustäbelb506dc32007-08-07 18:36:16 +00001223 obj = cls.frombuf(buf, tarfile.encoding, tarfile.errors)
Guido van Rossumd8faa362007-04-27 19:54:29 +00001224 obj.offset = tarfile.fileobj.tell() - BLOCKSIZE
1225 return obj._proc_member(tarfile)
Thomas Wouters89f507f2006-12-13 04:49:30 +00001226
Guido van Rossumd8faa362007-04-27 19:54:29 +00001227 #--------------------------------------------------------------------------
1228 # The following are methods that are called depending on the type of a
1229 # member. The entry point is _proc_member() which can be overridden in a
1230 # subclass to add custom _proc_*() methods. A _proc_*() method MUST
1231 # implement the following
1232 # operations:
1233 # 1. Set self.offset_data to the position where the data blocks begin,
1234 # if there is data that follows.
1235 # 2. Set tarfile.offset to the position where the next member's header will
1236 # begin.
1237 # 3. Return self or another valid TarInfo object.
1238 def _proc_member(self, tarfile):
1239 """Choose the right processing method depending on
1240 the type and call it.
Thomas Wouters89f507f2006-12-13 04:49:30 +00001241 """
Guido van Rossumd8faa362007-04-27 19:54:29 +00001242 if self.type in (GNUTYPE_LONGNAME, GNUTYPE_LONGLINK):
1243 return self._proc_gnulong(tarfile)
1244 elif self.type == GNUTYPE_SPARSE:
1245 return self._proc_sparse(tarfile)
1246 elif self.type in (XHDTYPE, XGLTYPE, SOLARIS_XHDTYPE):
1247 return self._proc_pax(tarfile)
1248 else:
1249 return self._proc_builtin(tarfile)
Thomas Wouters89f507f2006-12-13 04:49:30 +00001250
Guido van Rossumd8faa362007-04-27 19:54:29 +00001251 def _proc_builtin(self, tarfile):
1252 """Process a builtin type or an unknown type which
1253 will be treated as a regular file.
1254 """
1255 self.offset_data = tarfile.fileobj.tell()
1256 offset = self.offset_data
1257 if self.isreg() or self.type not in SUPPORTED_TYPES:
1258 # Skip the following data blocks.
1259 offset += self._block(self.size)
1260 tarfile.offset = offset
Thomas Wouters89f507f2006-12-13 04:49:30 +00001261
Guido van Rossume7ba4952007-06-06 23:52:48 +00001262 # Patch the TarInfo object with saved global
Guido van Rossumd8faa362007-04-27 19:54:29 +00001263 # header information.
Guido van Rossume7ba4952007-06-06 23:52:48 +00001264 self._apply_pax_info(tarfile.pax_headers, tarfile.encoding, tarfile.errors)
Guido van Rossumd8faa362007-04-27 19:54:29 +00001265
1266 return self
1267
1268 def _proc_gnulong(self, tarfile):
1269 """Process the blocks that hold a GNU longname
1270 or longlink member.
1271 """
1272 buf = tarfile.fileobj.read(self._block(self.size))
1273
1274 # Fetch the next header and process it.
Guido van Rossume7ba4952007-06-06 23:52:48 +00001275 next = self.fromtarfile(tarfile)
1276 if next is None:
1277 raise HeaderError("missing subsequent header")
Guido van Rossumd8faa362007-04-27 19:54:29 +00001278
1279 # Patch the TarInfo object from the next header with
1280 # the longname information.
1281 next.offset = self.offset
1282 if self.type == GNUTYPE_LONGNAME:
Lars Gustäbelb506dc32007-08-07 18:36:16 +00001283 next.name = nts(buf, tarfile.encoding, tarfile.errors)
Guido van Rossumd8faa362007-04-27 19:54:29 +00001284 elif self.type == GNUTYPE_LONGLINK:
Lars Gustäbelb506dc32007-08-07 18:36:16 +00001285 next.linkname = nts(buf, tarfile.encoding, tarfile.errors)
Guido van Rossumd8faa362007-04-27 19:54:29 +00001286
1287 return next
1288
1289 def _proc_sparse(self, tarfile):
1290 """Process a GNU sparse header plus extra headers.
1291 """
1292 buf = self.buf
1293 sp = _ringbuffer()
1294 pos = 386
1295 lastpos = 0
1296 realpos = 0
1297 # There are 4 possible sparse structs in the
1298 # first header.
Guido van Rossum805365e2007-05-07 22:24:25 +00001299 for i in range(4):
Guido van Rossumd8faa362007-04-27 19:54:29 +00001300 try:
1301 offset = nti(buf[pos:pos + 12])
1302 numbytes = nti(buf[pos + 12:pos + 24])
1303 except ValueError:
1304 break
1305 if offset > lastpos:
1306 sp.append(_hole(lastpos, offset - lastpos))
1307 sp.append(_data(offset, numbytes, realpos))
1308 realpos += numbytes
1309 lastpos = offset + numbytes
1310 pos += 24
1311
Lars Gustäbelb506dc32007-08-07 18:36:16 +00001312 isextended = bool(buf[482])
Guido van Rossumd8faa362007-04-27 19:54:29 +00001313 origsize = nti(buf[483:495])
1314
1315 # If the isextended flag is given,
1316 # there are extra headers to process.
Lars Gustäbelb506dc32007-08-07 18:36:16 +00001317 while isextended:
Guido van Rossumd8faa362007-04-27 19:54:29 +00001318 buf = tarfile.fileobj.read(BLOCKSIZE)
1319 pos = 0
Guido van Rossum805365e2007-05-07 22:24:25 +00001320 for i in range(21):
Guido van Rossumd8faa362007-04-27 19:54:29 +00001321 try:
1322 offset = nti(buf[pos:pos + 12])
1323 numbytes = nti(buf[pos + 12:pos + 24])
1324 except ValueError:
1325 break
1326 if offset > lastpos:
1327 sp.append(_hole(lastpos, offset - lastpos))
1328 sp.append(_data(offset, numbytes, realpos))
1329 realpos += numbytes
1330 lastpos = offset + numbytes
1331 pos += 24
Lars Gustäbelb506dc32007-08-07 18:36:16 +00001332 isextended = bool(buf[504])
Guido van Rossumd8faa362007-04-27 19:54:29 +00001333
1334 if lastpos < origsize:
1335 sp.append(_hole(lastpos, origsize - lastpos))
1336
1337 self.sparse = sp
1338
1339 self.offset_data = tarfile.fileobj.tell()
1340 tarfile.offset = self.offset_data + self._block(self.size)
1341 self.size = origsize
1342
1343 return self
1344
1345 def _proc_pax(self, tarfile):
1346 """Process an extended or global header as described in
1347 POSIX.1-2001.
1348 """
1349 # Read the header information.
1350 buf = tarfile.fileobj.read(self._block(self.size))
1351
1352 # A pax header stores supplemental information for either
1353 # the following file (extended) or all following files
1354 # (global).
1355 if self.type == XGLTYPE:
1356 pax_headers = tarfile.pax_headers
1357 else:
1358 pax_headers = tarfile.pax_headers.copy()
1359
Guido van Rossumd8faa362007-04-27 19:54:29 +00001360 # Parse pax header information. A record looks like that:
1361 # "%d %s=%s\n" % (length, keyword, value). length is the size
1362 # of the complete record including the length field itself and
Guido van Rossume7ba4952007-06-06 23:52:48 +00001363 # the newline. keyword and value are both UTF-8 encoded strings.
Guido van Rossumd8faa362007-04-27 19:54:29 +00001364 regex = re.compile(r"(\d+) ([^=]+)=", re.U)
1365 pos = 0
1366 while True:
1367 match = regex.match(buf, pos)
1368 if not match:
1369 break
1370
1371 length, keyword = match.groups()
1372 length = int(length)
1373 value = buf[match.end(2) + 1:match.start(1) + length - 1]
1374
1375 keyword = keyword.decode("utf8")
Guido van Rossumd8faa362007-04-27 19:54:29 +00001376 value = value.decode("utf8")
Guido van Rossumd8faa362007-04-27 19:54:29 +00001377
1378 pax_headers[keyword] = value
1379 pos += length
1380
Guido van Rossume7ba4952007-06-06 23:52:48 +00001381 # Fetch the next header.
1382 next = self.fromtarfile(tarfile)
Guido van Rossumd8faa362007-04-27 19:54:29 +00001383
Guido van Rossume7ba4952007-06-06 23:52:48 +00001384 if self.type in (XHDTYPE, SOLARIS_XHDTYPE):
1385 if next is None:
1386 raise HeaderError("missing subsequent header")
Guido van Rossumd8faa362007-04-27 19:54:29 +00001387
Guido van Rossume7ba4952007-06-06 23:52:48 +00001388 # Patch the TarInfo object with the extended header info.
1389 next._apply_pax_info(pax_headers, tarfile.encoding, tarfile.errors)
1390 next.offset = self.offset
1391
1392 if "size" in pax_headers:
1393 # If the extended header replaces the size field,
1394 # we need to recalculate the offset where the next
1395 # header starts.
1396 offset = next.offset_data
1397 if next.isreg() or next.type not in SUPPORTED_TYPES:
1398 offset += next._block(next.size)
1399 tarfile.offset = offset
1400
1401 return next
1402
1403 def _apply_pax_info(self, pax_headers, encoding, errors):
1404 """Replace fields with supplemental information from a previous
1405 pax extended or global header.
1406 """
1407 for keyword, value in pax_headers.items():
1408 if keyword not in PAX_FIELDS:
1409 continue
1410
1411 if keyword == "path":
1412 value = value.rstrip("/")
1413
1414 if keyword in PAX_NUMBER_FIELDS:
1415 try:
1416 value = PAX_NUMBER_FIELDS[keyword](value)
1417 except ValueError:
1418 value = 0
Guido van Rossume7ba4952007-06-06 23:52:48 +00001419
1420 setattr(self, keyword, value)
1421
1422 self.pax_headers = pax_headers.copy()
Guido van Rossumd8faa362007-04-27 19:54:29 +00001423
1424 def _block(self, count):
1425 """Round up a byte count by BLOCKSIZE and return it,
1426 e.g. _block(834) => 1024.
1427 """
1428 blocks, remainder = divmod(count, BLOCKSIZE)
1429 if remainder:
1430 blocks += 1
1431 return blocks * BLOCKSIZE
Thomas Wouters89f507f2006-12-13 04:49:30 +00001432
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001433 def isreg(self):
1434 return self.type in REGULAR_TYPES
1435 def isfile(self):
1436 return self.isreg()
1437 def isdir(self):
1438 return self.type == DIRTYPE
1439 def issym(self):
1440 return self.type == SYMTYPE
1441 def islnk(self):
1442 return self.type == LNKTYPE
1443 def ischr(self):
1444 return self.type == CHRTYPE
1445 def isblk(self):
1446 return self.type == BLKTYPE
1447 def isfifo(self):
1448 return self.type == FIFOTYPE
1449 def issparse(self):
1450 return self.type == GNUTYPE_SPARSE
1451 def isdev(self):
1452 return self.type in (CHRTYPE, BLKTYPE, FIFOTYPE)
1453# class TarInfo
1454
1455class TarFile(object):
1456 """The TarFile Class provides an interface to tar archives.
1457 """
1458
1459 debug = 0 # May be set from 0 (no msgs) to 3 (all msgs)
1460
1461 dereference = False # If true, add content of linked file to the
1462 # tar file, else the link.
1463
1464 ignore_zeros = False # If true, skips empty or invalid blocks and
1465 # continues processing.
1466
1467 errorlevel = 0 # If 0, fatal errors only appear in debug
1468 # messages (if debug >= 0). If > 0, errors
1469 # are passed to the caller as exceptions.
1470
Guido van Rossumd8faa362007-04-27 19:54:29 +00001471 format = DEFAULT_FORMAT # The format to use when creating an archive.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001472
Guido van Rossume7ba4952007-06-06 23:52:48 +00001473 encoding = ENCODING # Encoding for 8-bit character strings.
1474
1475 errors = None # Error handler for unicode conversion.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001476
Guido van Rossumd8faa362007-04-27 19:54:29 +00001477 tarinfo = TarInfo # The default TarInfo class to use.
1478
1479 fileobject = ExFileObject # The default ExFileObject class to use.
1480
1481 def __init__(self, name=None, mode="r", fileobj=None, format=None,
1482 tarinfo=None, dereference=None, ignore_zeros=None, encoding=None,
Guido van Rossume7ba4952007-06-06 23:52:48 +00001483 errors=None, pax_headers=None, debug=None, errorlevel=None):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001484 """Open an (uncompressed) tar archive `name'. `mode' is either 'r' to
1485 read from an existing archive, 'a' to append data to an existing
1486 file or 'w' to create a new file overwriting an existing one. `mode'
1487 defaults to 'r'.
1488 If `fileobj' is given, it is used for reading or writing data. If it
1489 can be determined, `mode' is overridden by `fileobj's mode.
1490 `fileobj' is not closed, when TarFile is closed.
1491 """
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001492 if len(mode) > 1 or mode not in "raw":
Thomas Wouters477c8d52006-05-27 19:21:47 +00001493 raise ValueError("mode must be 'r', 'a' or 'w'")
Guido van Rossumd8faa362007-04-27 19:54:29 +00001494 self.mode = mode
1495 self._mode = {"r": "rb", "a": "r+b", "w": "wb"}[mode]
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001496
1497 if not fileobj:
Guido van Rossumd8faa362007-04-27 19:54:29 +00001498 if self.mode == "a" and not os.path.exists(name):
Thomas Wouterscf297e42007-02-23 15:07:44 +00001499 # Create nonexistent files in append mode.
Guido van Rossumd8faa362007-04-27 19:54:29 +00001500 self.mode = "w"
1501 self._mode = "wb"
Guido van Rossume7ba4952007-06-06 23:52:48 +00001502 fileobj = bltn_open(name, self._mode)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001503 self._extfileobj = False
1504 else:
Guido van Rossumd8faa362007-04-27 19:54:29 +00001505 if name is None and hasattr(fileobj, "name"):
1506 name = fileobj.name
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001507 if hasattr(fileobj, "mode"):
Guido van Rossumd8faa362007-04-27 19:54:29 +00001508 self._mode = fileobj.mode
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001509 self._extfileobj = True
Thomas Woutersed03b412007-08-28 21:37:11 +00001510 self.name = os.path.abspath(name) if name else None
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001511 self.fileobj = fileobj
1512
Guido van Rossumd8faa362007-04-27 19:54:29 +00001513 # Init attributes.
1514 if format is not None:
1515 self.format = format
1516 if tarinfo is not None:
1517 self.tarinfo = tarinfo
1518 if dereference is not None:
1519 self.dereference = dereference
1520 if ignore_zeros is not None:
1521 self.ignore_zeros = ignore_zeros
1522 if encoding is not None:
1523 self.encoding = encoding
Guido van Rossume7ba4952007-06-06 23:52:48 +00001524
1525 if errors is not None:
1526 self.errors = errors
1527 elif mode == "r":
Lars Gustäbelb506dc32007-08-07 18:36:16 +00001528 self.errors = "replace"
Guido van Rossume7ba4952007-06-06 23:52:48 +00001529 else:
1530 self.errors = "strict"
1531
1532 if pax_headers is not None and self.format == PAX_FORMAT:
1533 self.pax_headers = pax_headers
1534 else:
1535 self.pax_headers = {}
1536
Guido van Rossumd8faa362007-04-27 19:54:29 +00001537 if debug is not None:
1538 self.debug = debug
1539 if errorlevel is not None:
1540 self.errorlevel = errorlevel
1541
1542 # Init datastructures.
Thomas Wouters477c8d52006-05-27 19:21:47 +00001543 self.closed = False
1544 self.members = [] # list of members as TarInfo objects
1545 self._loaded = False # flag if all members have been read
Lars Gustäbelb506dc32007-08-07 18:36:16 +00001546 self.offset = 0 # current position in the archive file
Thomas Wouters477c8d52006-05-27 19:21:47 +00001547 self.inodes = {} # dictionary caching the inodes of
1548 # archive members already added
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001549
Guido van Rossumd8faa362007-04-27 19:54:29 +00001550 if self.mode == "r":
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001551 self.firstmember = None
1552 self.firstmember = self.next()
1553
Guido van Rossumd8faa362007-04-27 19:54:29 +00001554 if self.mode == "a":
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001555 # Move to the end of the archive,
1556 # before the first empty block.
1557 self.firstmember = None
1558 while True:
Guido van Rossumd8faa362007-04-27 19:54:29 +00001559 if self.next() is None:
Thomas Wouterscf297e42007-02-23 15:07:44 +00001560 if self.offset > 0:
Lars Gustäbelb506dc32007-08-07 18:36:16 +00001561 self.fileobj.seek(self.fileobj.tell() - BLOCKSIZE)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001562 break
1563
Guido van Rossumd8faa362007-04-27 19:54:29 +00001564 if self.mode in "aw":
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001565 self._loaded = True
1566
Guido van Rossume7ba4952007-06-06 23:52:48 +00001567 if self.pax_headers:
Lars Gustäbel3741eff2007-08-21 12:17:05 +00001568 buf = self.tarinfo.create_pax_global_header(self.pax_headers.copy())
Guido van Rossumd8faa362007-04-27 19:54:29 +00001569 self.fileobj.write(buf)
1570 self.offset += len(buf)
1571
1572 def _getposix(self):
1573 return self.format == USTAR_FORMAT
1574 def _setposix(self, value):
1575 import warnings
1576 warnings.warn("use the format attribute instead", DeprecationWarning)
1577 if value:
1578 self.format = USTAR_FORMAT
1579 else:
1580 self.format = GNU_FORMAT
1581 posix = property(_getposix, _setposix)
1582
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001583 #--------------------------------------------------------------------------
1584 # Below are the classmethods which act as alternate constructors to the
1585 # TarFile class. The open() method is the only one that is needed for
1586 # public use; it is the "super"-constructor and is able to select an
1587 # adequate "sub"-constructor for a particular compression using the mapping
1588 # from OPEN_METH.
1589 #
1590 # This concept allows one to subclass TarFile without losing the comfort of
1591 # the super-constructor. A sub-constructor is registered and made available
1592 # by adding it to the mapping in OPEN_METH.
1593
Guido van Rossum75b64e62005-01-16 00:16:11 +00001594 @classmethod
Guido van Rossumd8faa362007-04-27 19:54:29 +00001595 def open(cls, name=None, mode="r", fileobj=None, bufsize=RECORDSIZE, **kwargs):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001596 """Open a tar archive for reading, writing or appending. Return
1597 an appropriate TarFile class.
1598
1599 mode:
Martin v. Löwis78be7df2005-03-05 12:47:42 +00001600 'r' or 'r:*' open for reading with transparent compression
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001601 'r:' open for reading exclusively uncompressed
1602 'r:gz' open for reading with gzip compression
1603 'r:bz2' open for reading with bzip2 compression
Thomas Wouterscf297e42007-02-23 15:07:44 +00001604 'a' or 'a:' open for appending, creating the file if necessary
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001605 'w' or 'w:' open for writing without compression
1606 'w:gz' open for writing with gzip compression
1607 'w:bz2' open for writing with bzip2 compression
Martin v. Löwis78be7df2005-03-05 12:47:42 +00001608
1609 'r|*' open a stream of tar blocks with transparent compression
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001610 'r|' open an uncompressed stream of tar blocks for reading
1611 'r|gz' open a gzip compressed stream of tar blocks
1612 'r|bz2' open a bzip2 compressed stream of tar blocks
1613 'w|' open an uncompressed stream for writing
1614 'w|gz' open a gzip compressed stream for writing
1615 'w|bz2' open a bzip2 compressed stream for writing
1616 """
1617
1618 if not name and not fileobj:
Thomas Wouters477c8d52006-05-27 19:21:47 +00001619 raise ValueError("nothing to open")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001620
Martin v. Löwis78be7df2005-03-05 12:47:42 +00001621 if mode in ("r", "r:*"):
1622 # Find out which *open() is appropriate for opening the file.
1623 for comptype in cls.OPEN_METH:
1624 func = getattr(cls, cls.OPEN_METH[comptype])
Thomas Wouters902d6eb2007-01-09 23:18:33 +00001625 if fileobj is not None:
1626 saved_pos = fileobj.tell()
Martin v. Löwis78be7df2005-03-05 12:47:42 +00001627 try:
Guido van Rossumd8faa362007-04-27 19:54:29 +00001628 return func(name, "r", fileobj, **kwargs)
1629 except (ReadError, CompressionError) as e:
Thomas Wouters902d6eb2007-01-09 23:18:33 +00001630 if fileobj is not None:
1631 fileobj.seek(saved_pos)
Martin v. Löwis78be7df2005-03-05 12:47:42 +00001632 continue
Thomas Wouters477c8d52006-05-27 19:21:47 +00001633 raise ReadError("file could not be opened successfully")
Martin v. Löwis78be7df2005-03-05 12:47:42 +00001634
1635 elif ":" in mode:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001636 filemode, comptype = mode.split(":", 1)
1637 filemode = filemode or "r"
1638 comptype = comptype or "tar"
1639
1640 # Select the *open() function according to
1641 # given compression.
1642 if comptype in cls.OPEN_METH:
1643 func = getattr(cls, cls.OPEN_METH[comptype])
1644 else:
Thomas Wouters477c8d52006-05-27 19:21:47 +00001645 raise CompressionError("unknown compression type %r" % comptype)
Guido van Rossumd8faa362007-04-27 19:54:29 +00001646 return func(name, filemode, fileobj, **kwargs)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001647
1648 elif "|" in mode:
1649 filemode, comptype = mode.split("|", 1)
1650 filemode = filemode or "r"
1651 comptype = comptype or "tar"
1652
1653 if filemode not in "rw":
Thomas Wouters477c8d52006-05-27 19:21:47 +00001654 raise ValueError("mode must be 'r' or 'w'")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001655
1656 t = cls(name, filemode,
Guido van Rossumd8faa362007-04-27 19:54:29 +00001657 _Stream(name, filemode, comptype, fileobj, bufsize),
1658 **kwargs)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001659 t._extfileobj = False
1660 return t
1661
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001662 elif mode in "aw":
Guido van Rossumd8faa362007-04-27 19:54:29 +00001663 return cls.taropen(name, mode, fileobj, **kwargs)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001664
Thomas Wouters477c8d52006-05-27 19:21:47 +00001665 raise ValueError("undiscernible mode")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001666
Guido van Rossum75b64e62005-01-16 00:16:11 +00001667 @classmethod
Guido van Rossumd8faa362007-04-27 19:54:29 +00001668 def taropen(cls, name, mode="r", fileobj=None, **kwargs):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001669 """Open uncompressed tar archive name for reading or writing.
1670 """
1671 if len(mode) > 1 or mode not in "raw":
Thomas Wouters477c8d52006-05-27 19:21:47 +00001672 raise ValueError("mode must be 'r', 'a' or 'w'")
Guido van Rossumd8faa362007-04-27 19:54:29 +00001673 return cls(name, mode, fileobj, **kwargs)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001674
Guido van Rossum75b64e62005-01-16 00:16:11 +00001675 @classmethod
Guido van Rossumd8faa362007-04-27 19:54:29 +00001676 def gzopen(cls, name, mode="r", fileobj=None, compresslevel=9, **kwargs):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001677 """Open gzip compressed tar archive name for reading or writing.
1678 Appending is not allowed.
1679 """
1680 if len(mode) > 1 or mode not in "rw":
Thomas Wouters477c8d52006-05-27 19:21:47 +00001681 raise ValueError("mode must be 'r' or 'w'")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001682
1683 try:
1684 import gzip
Neal Norwitz4ec68242003-04-11 03:05:56 +00001685 gzip.GzipFile
1686 except (ImportError, AttributeError):
Thomas Wouters477c8d52006-05-27 19:21:47 +00001687 raise CompressionError("gzip module is not available")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001688
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001689 if fileobj is None:
Guido van Rossume7ba4952007-06-06 23:52:48 +00001690 fileobj = bltn_open(name, mode + "b")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001691
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001692 try:
Thomas Wouters902d6eb2007-01-09 23:18:33 +00001693 t = cls.taropen(name, mode,
Guido van Rossumd8faa362007-04-27 19:54:29 +00001694 gzip.GzipFile(name, mode, compresslevel, fileobj),
1695 **kwargs)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001696 except IOError:
Thomas Wouters477c8d52006-05-27 19:21:47 +00001697 raise ReadError("not a gzip file")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001698 t._extfileobj = False
1699 return t
1700
Guido van Rossum75b64e62005-01-16 00:16:11 +00001701 @classmethod
Guido van Rossumd8faa362007-04-27 19:54:29 +00001702 def bz2open(cls, name, mode="r", fileobj=None, compresslevel=9, **kwargs):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001703 """Open bzip2 compressed tar archive name for reading or writing.
1704 Appending is not allowed.
1705 """
1706 if len(mode) > 1 or mode not in "rw":
Thomas Wouters477c8d52006-05-27 19:21:47 +00001707 raise ValueError("mode must be 'r' or 'w'.")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001708
1709 try:
1710 import bz2
1711 except ImportError:
Thomas Wouters477c8d52006-05-27 19:21:47 +00001712 raise CompressionError("bz2 module is not available")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001713
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001714 if fileobj is not None:
Thomas Wouters477c8d52006-05-27 19:21:47 +00001715 fileobj = _BZ2Proxy(fileobj, mode)
1716 else:
1717 fileobj = bz2.BZ2File(name, mode, compresslevel=compresslevel)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001718
1719 try:
Guido van Rossumd8faa362007-04-27 19:54:29 +00001720 t = cls.taropen(name, mode, fileobj, **kwargs)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001721 except IOError:
Thomas Wouters477c8d52006-05-27 19:21:47 +00001722 raise ReadError("not a bzip2 file")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001723 t._extfileobj = False
1724 return t
1725
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001726 # All *open() methods are registered here.
1727 OPEN_METH = {
1728 "tar": "taropen", # uncompressed tar
1729 "gz": "gzopen", # gzip compressed tar
1730 "bz2": "bz2open" # bzip2 compressed tar
1731 }
1732
1733 #--------------------------------------------------------------------------
1734 # The public methods which TarFile provides:
1735
1736 def close(self):
1737 """Close the TarFile. In write-mode, two finishing zero blocks are
1738 appended to the archive.
1739 """
1740 if self.closed:
1741 return
1742
Guido van Rossumd8faa362007-04-27 19:54:29 +00001743 if self.mode in "aw":
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001744 self.fileobj.write(NUL * (BLOCKSIZE * 2))
1745 self.offset += (BLOCKSIZE * 2)
1746 # fill up the end with zero-blocks
1747 # (like option -b20 for tar does)
1748 blocks, remainder = divmod(self.offset, RECORDSIZE)
1749 if remainder > 0:
1750 self.fileobj.write(NUL * (RECORDSIZE - remainder))
1751
1752 if not self._extfileobj:
1753 self.fileobj.close()
1754 self.closed = True
1755
1756 def getmember(self, name):
1757 """Return a TarInfo object for member `name'. If `name' can not be
1758 found in the archive, KeyError is raised. If a member occurs more
1759 than once in the archive, its last occurence is assumed to be the
1760 most up-to-date version.
1761 """
Martin v. Löwisf3c56112004-09-18 09:08:52 +00001762 tarinfo = self._getmember(name)
1763 if tarinfo is None:
Thomas Wouters477c8d52006-05-27 19:21:47 +00001764 raise KeyError("filename %r not found" % name)
Martin v. Löwisf3c56112004-09-18 09:08:52 +00001765 return tarinfo
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001766
1767 def getmembers(self):
1768 """Return the members of the archive as a list of TarInfo objects. The
1769 list has the same order as the members in the archive.
1770 """
1771 self._check()
1772 if not self._loaded: # if we want to obtain a list of
1773 self._load() # all members, we first have to
1774 # scan the whole archive.
1775 return self.members
1776
1777 def getnames(self):
1778 """Return the members of the archive as a list of their names. It has
1779 the same order as the list returned by getmembers().
1780 """
Martin v. Löwisf3c56112004-09-18 09:08:52 +00001781 return [tarinfo.name for tarinfo in self.getmembers()]
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001782
1783 def gettarinfo(self, name=None, arcname=None, fileobj=None):
1784 """Create a TarInfo object for either the file `name' or the file
1785 object `fileobj' (using os.fstat on its file descriptor). You can
1786 modify some of the TarInfo's attributes before you add it using
1787 addfile(). If given, `arcname' specifies an alternative name for the
1788 file in the archive.
1789 """
1790 self._check("aw")
1791
1792 # When fileobj is given, replace name by
1793 # fileobj's real name.
1794 if fileobj is not None:
1795 name = fileobj.name
1796
1797 # Building the name of the member in the archive.
1798 # Backward slashes are converted to forward slashes,
1799 # Absolute paths are turned to relative paths.
1800 if arcname is None:
1801 arcname = name
1802 arcname = normpath(arcname)
1803 drv, arcname = os.path.splitdrive(arcname)
1804 while arcname[0:1] == "/":
1805 arcname = arcname[1:]
1806
1807 # Now, fill the TarInfo object with
1808 # information specific for the file.
Guido van Rossumd8faa362007-04-27 19:54:29 +00001809 tarinfo = self.tarinfo()
1810 tarinfo.tarfile = self
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001811
1812 # Use os.stat or os.lstat, depending on platform
1813 # and if symlinks shall be resolved.
1814 if fileobj is None:
1815 if hasattr(os, "lstat") and not self.dereference:
1816 statres = os.lstat(name)
1817 else:
1818 statres = os.stat(name)
1819 else:
1820 statres = os.fstat(fileobj.fileno())
1821 linkname = ""
1822
1823 stmd = statres.st_mode
1824 if stat.S_ISREG(stmd):
1825 inode = (statres.st_ino, statres.st_dev)
Guido van Rossumd8faa362007-04-27 19:54:29 +00001826 if not self.dereference and statres.st_nlink > 1 and \
1827 inode in self.inodes and arcname != self.inodes[inode]:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001828 # Is it a hardlink to an already
1829 # archived file?
1830 type = LNKTYPE
1831 linkname = self.inodes[inode]
1832 else:
1833 # The inode is added only if its valid.
1834 # For win32 it is always 0.
1835 type = REGTYPE
1836 if inode[0]:
1837 self.inodes[inode] = arcname
1838 elif stat.S_ISDIR(stmd):
1839 type = DIRTYPE
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001840 elif stat.S_ISFIFO(stmd):
1841 type = FIFOTYPE
1842 elif stat.S_ISLNK(stmd):
1843 type = SYMTYPE
1844 linkname = os.readlink(name)
1845 elif stat.S_ISCHR(stmd):
1846 type = CHRTYPE
1847 elif stat.S_ISBLK(stmd):
1848 type = BLKTYPE
1849 else:
1850 return None
1851
1852 # Fill the TarInfo object with all
1853 # information we can get.
Martin v. Löwis5dbdc592005-08-27 10:07:56 +00001854 tarinfo.name = arcname
1855 tarinfo.mode = stmd
1856 tarinfo.uid = statres.st_uid
1857 tarinfo.gid = statres.st_gid
1858 if stat.S_ISREG(stmd):
Martin v. Löwis61d77e02004-08-20 06:35:46 +00001859 tarinfo.size = statres.st_size
Martin v. Löwis5dbdc592005-08-27 10:07:56 +00001860 else:
Guido van Rossume2a383d2007-01-15 16:59:06 +00001861 tarinfo.size = 0
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001862 tarinfo.mtime = statres.st_mtime
Martin v. Löwis5dbdc592005-08-27 10:07:56 +00001863 tarinfo.type = type
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001864 tarinfo.linkname = linkname
1865 if pwd:
1866 try:
1867 tarinfo.uname = pwd.getpwuid(tarinfo.uid)[0]
1868 except KeyError:
1869 pass
1870 if grp:
1871 try:
1872 tarinfo.gname = grp.getgrgid(tarinfo.gid)[0]
1873 except KeyError:
1874 pass
1875
1876 if type in (CHRTYPE, BLKTYPE):
1877 if hasattr(os, "major") and hasattr(os, "minor"):
1878 tarinfo.devmajor = os.major(statres.st_rdev)
1879 tarinfo.devminor = os.minor(statres.st_rdev)
1880 return tarinfo
1881
1882 def list(self, verbose=True):
1883 """Print a table of contents to sys.stdout. If `verbose' is False, only
1884 the names of the members are printed. If it is True, an `ls -l'-like
1885 output is produced.
1886 """
1887 self._check()
1888
1889 for tarinfo in self:
1890 if verbose:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001891 print(filemode(tarinfo.mode), end=' ')
1892 print("%s/%s" % (tarinfo.uname or tarinfo.uid,
1893 tarinfo.gname or tarinfo.gid), end=' ')
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001894 if tarinfo.ischr() or tarinfo.isblk():
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001895 print("%10s" % ("%d,%d" \
1896 % (tarinfo.devmajor, tarinfo.devminor)), end=' ')
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001897 else:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001898 print("%10d" % tarinfo.size, end=' ')
1899 print("%d-%02d-%02d %02d:%02d:%02d" \
1900 % time.localtime(tarinfo.mtime)[:6], end=' ')
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001901
Guido van Rossumd8faa362007-04-27 19:54:29 +00001902 print(tarinfo.name + ("/" if tarinfo.isdir() else ""), end=' ')
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001903
1904 if verbose:
1905 if tarinfo.issym():
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001906 print("->", tarinfo.linkname, end=' ')
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001907 if tarinfo.islnk():
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001908 print("link to", tarinfo.linkname, end=' ')
1909 print()
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001910
Guido van Rossum486364b2007-06-30 05:01:58 +00001911 def add(self, name, arcname=None, recursive=True, exclude=None):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001912 """Add the file `name' to the archive. `name' may be any type of file
1913 (directory, fifo, symbolic link, etc.). If given, `arcname'
1914 specifies an alternative name for the file in the archive.
1915 Directories are added recursively by default. This can be avoided by
Guido van Rossum486364b2007-06-30 05:01:58 +00001916 setting `recursive' to False. `exclude' is a function that should
1917 return True for each filename to be excluded.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001918 """
1919 self._check("aw")
1920
1921 if arcname is None:
1922 arcname = name
1923
Guido van Rossum486364b2007-06-30 05:01:58 +00001924 # Exclude pathnames.
1925 if exclude is not None and exclude(name):
1926 self._dbg(2, "tarfile: Excluded %r" % name)
1927 return
1928
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001929 # Skip if somebody tries to archive the archive...
Thomas Wouters902d6eb2007-01-09 23:18:33 +00001930 if self.name is not None and os.path.abspath(name) == self.name:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001931 self._dbg(2, "tarfile: Skipped %r" % name)
1932 return
1933
1934 # Special case: The user wants to add the current
1935 # working directory.
1936 if name == ".":
1937 if recursive:
1938 if arcname == ".":
1939 arcname = ""
Guido van Rossumd8faa362007-04-27 19:54:29 +00001940 for f in os.listdir(name):
Guido van Rossum486364b2007-06-30 05:01:58 +00001941 self.add(f, os.path.join(arcname, f), recursive, exclude)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001942 return
1943
1944 self._dbg(1, name)
1945
1946 # Create a TarInfo object from the file.
1947 tarinfo = self.gettarinfo(name, arcname)
1948
1949 if tarinfo is None:
1950 self._dbg(1, "tarfile: Unsupported type %r" % name)
1951 return
1952
1953 # Append the tar header and data to the archive.
1954 if tarinfo.isreg():
Guido van Rossume7ba4952007-06-06 23:52:48 +00001955 f = bltn_open(name, "rb")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001956 self.addfile(tarinfo, f)
1957 f.close()
1958
Martin v. Löwis5dbdc592005-08-27 10:07:56 +00001959 elif tarinfo.isdir():
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001960 self.addfile(tarinfo)
1961 if recursive:
1962 for f in os.listdir(name):
Guido van Rossum486364b2007-06-30 05:01:58 +00001963 self.add(os.path.join(name, f), os.path.join(arcname, f), recursive, exclude)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001964
Martin v. Löwis5dbdc592005-08-27 10:07:56 +00001965 else:
1966 self.addfile(tarinfo)
1967
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001968 def addfile(self, tarinfo, fileobj=None):
1969 """Add the TarInfo object `tarinfo' to the archive. If `fileobj' is
1970 given, tarinfo.size bytes are read from it and added to the archive.
1971 You can create TarInfo objects using gettarinfo().
1972 On Windows platforms, `fileobj' should always be opened with mode
1973 'rb' to avoid irritation about the file size.
1974 """
1975 self._check("aw")
1976
Thomas Wouters89f507f2006-12-13 04:49:30 +00001977 tarinfo = copy.copy(tarinfo)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001978
Guido van Rossume7ba4952007-06-06 23:52:48 +00001979 buf = tarinfo.tobuf(self.format, self.encoding, self.errors)
Thomas Wouters89f507f2006-12-13 04:49:30 +00001980 self.fileobj.write(buf)
1981 self.offset += len(buf)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001982
1983 # If there's data to follow, append it.
1984 if fileobj is not None:
1985 copyfileobj(fileobj, self.fileobj, tarinfo.size)
1986 blocks, remainder = divmod(tarinfo.size, BLOCKSIZE)
1987 if remainder > 0:
1988 self.fileobj.write(NUL * (BLOCKSIZE - remainder))
1989 blocks += 1
1990 self.offset += blocks * BLOCKSIZE
1991
Martin v. Löwisf3c56112004-09-18 09:08:52 +00001992 self.members.append(tarinfo)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001993
Martin v. Löwis00a73e72005-03-04 19:40:34 +00001994 def extractall(self, path=".", members=None):
1995 """Extract all members from the archive to the current working
1996 directory and set owner, modification time and permissions on
1997 directories afterwards. `path' specifies a different directory
1998 to extract to. `members' is optional and must be a subset of the
1999 list returned by getmembers().
2000 """
2001 directories = []
2002
2003 if members is None:
2004 members = self
2005
2006 for tarinfo in members:
2007 if tarinfo.isdir():
2008 # Extract directory with a safe mode, so that
2009 # all files below can be extracted as well.
2010 try:
Guido van Rossumcd16bf62007-06-13 18:07:49 +00002011 os.makedirs(os.path.join(path, tarinfo.name), 0o700)
Martin v. Löwis00a73e72005-03-04 19:40:34 +00002012 except EnvironmentError:
2013 pass
2014 directories.append(tarinfo)
2015 else:
2016 self.extract(tarinfo, path)
2017
2018 # Reverse sort directories.
2019 directories.sort(lambda a, b: cmp(a.name, b.name))
2020 directories.reverse()
2021
2022 # Set correct owner, mtime and filemode on directories.
2023 for tarinfo in directories:
2024 path = os.path.join(path, tarinfo.name)
2025 try:
2026 self.chown(tarinfo, path)
2027 self.utime(tarinfo, path)
2028 self.chmod(tarinfo, path)
Guido van Rossumb940e112007-01-10 16:19:56 +00002029 except ExtractError as e:
Martin v. Löwis00a73e72005-03-04 19:40:34 +00002030 if self.errorlevel > 1:
2031 raise
2032 else:
2033 self._dbg(1, "tarfile: %s" % e)
2034
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002035 def extract(self, member, path=""):
2036 """Extract a member from the archive to the current working directory,
2037 using its full name. Its file information is extracted as accurately
2038 as possible. `member' may be a filename or a TarInfo object. You can
2039 specify a different directory using `path'.
2040 """
2041 self._check("r")
2042
Guido van Rossum3172c5d2007-10-16 18:12:55 +00002043 if isinstance(member, str):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002044 tarinfo = self.getmember(member)
Guido van Rossumd8faa362007-04-27 19:54:29 +00002045 else:
2046 tarinfo = member
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002047
Neal Norwitza4f651a2004-07-20 22:07:44 +00002048 # Prepare the link target for makelink().
2049 if tarinfo.islnk():
2050 tarinfo._link_target = os.path.join(path, tarinfo.linkname)
2051
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002052 try:
2053 self._extract_member(tarinfo, os.path.join(path, tarinfo.name))
Guido van Rossumb940e112007-01-10 16:19:56 +00002054 except EnvironmentError as e:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002055 if self.errorlevel > 0:
2056 raise
2057 else:
2058 if e.filename is None:
2059 self._dbg(1, "tarfile: %s" % e.strerror)
2060 else:
2061 self._dbg(1, "tarfile: %s %r" % (e.strerror, e.filename))
Guido van Rossumb940e112007-01-10 16:19:56 +00002062 except ExtractError as e:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002063 if self.errorlevel > 1:
2064 raise
2065 else:
2066 self._dbg(1, "tarfile: %s" % e)
2067
2068 def extractfile(self, member):
2069 """Extract a member from the archive as a file object. `member' may be
2070 a filename or a TarInfo object. If `member' is a regular file, a
2071 file-like object is returned. If `member' is a link, a file-like
2072 object is constructed from the link's target. If `member' is none of
2073 the above, None is returned.
2074 The file-like object is read-only and provides the following
2075 methods: read(), readline(), readlines(), seek() and tell()
2076 """
2077 self._check("r")
2078
Guido van Rossum3172c5d2007-10-16 18:12:55 +00002079 if isinstance(member, str):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002080 tarinfo = self.getmember(member)
Guido van Rossumd8faa362007-04-27 19:54:29 +00002081 else:
2082 tarinfo = member
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002083
2084 if tarinfo.isreg():
2085 return self.fileobject(self, tarinfo)
2086
2087 elif tarinfo.type not in SUPPORTED_TYPES:
2088 # If a member's type is unknown, it is treated as a
2089 # regular file.
2090 return self.fileobject(self, tarinfo)
2091
2092 elif tarinfo.islnk() or tarinfo.issym():
2093 if isinstance(self.fileobj, _Stream):
2094 # A small but ugly workaround for the case that someone tries
2095 # to extract a (sym)link as a file-object from a non-seekable
2096 # stream of tar blocks.
Thomas Wouters477c8d52006-05-27 19:21:47 +00002097 raise StreamError("cannot extract (sym)link as file object")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002098 else:
Georg Brandl7eb4b7d2005-07-22 21:49:32 +00002099 # A (sym)link's file object is its target's file object.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002100 return self.extractfile(self._getmember(tarinfo.linkname,
2101 tarinfo))
2102 else:
2103 # If there's no data associated with the member (directory, chrdev,
2104 # blkdev, etc.), return None instead of a file object.
2105 return None
2106
2107 def _extract_member(self, tarinfo, targetpath):
2108 """Extract the TarInfo object tarinfo to a physical
2109 file called targetpath.
2110 """
2111 # Fetch the TarInfo object for the given name
2112 # and build the destination pathname, replacing
2113 # forward slashes to platform specific separators.
2114 if targetpath[-1:] == "/":
2115 targetpath = targetpath[:-1]
2116 targetpath = os.path.normpath(targetpath)
2117
2118 # Create all upper directories.
2119 upperdirs = os.path.dirname(targetpath)
2120 if upperdirs and not os.path.exists(upperdirs):
Thomas Woutersb2137042007-02-01 18:02:27 +00002121 os.makedirs(upperdirs)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002122
2123 if tarinfo.islnk() or tarinfo.issym():
2124 self._dbg(1, "%s -> %s" % (tarinfo.name, tarinfo.linkname))
2125 else:
2126 self._dbg(1, tarinfo.name)
2127
2128 if tarinfo.isreg():
2129 self.makefile(tarinfo, targetpath)
2130 elif tarinfo.isdir():
2131 self.makedir(tarinfo, targetpath)
2132 elif tarinfo.isfifo():
2133 self.makefifo(tarinfo, targetpath)
2134 elif tarinfo.ischr() or tarinfo.isblk():
2135 self.makedev(tarinfo, targetpath)
2136 elif tarinfo.islnk() or tarinfo.issym():
2137 self.makelink(tarinfo, targetpath)
2138 elif tarinfo.type not in SUPPORTED_TYPES:
2139 self.makeunknown(tarinfo, targetpath)
2140 else:
2141 self.makefile(tarinfo, targetpath)
2142
2143 self.chown(tarinfo, targetpath)
2144 if not tarinfo.issym():
2145 self.chmod(tarinfo, targetpath)
2146 self.utime(tarinfo, targetpath)
2147
2148 #--------------------------------------------------------------------------
2149 # Below are the different file methods. They are called via
2150 # _extract_member() when extract() is called. They can be replaced in a
2151 # subclass to implement other functionality.
2152
2153 def makedir(self, tarinfo, targetpath):
2154 """Make a directory called targetpath.
2155 """
2156 try:
2157 os.mkdir(targetpath)
Guido van Rossumb940e112007-01-10 16:19:56 +00002158 except EnvironmentError as e:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002159 if e.errno != errno.EEXIST:
2160 raise
2161
2162 def makefile(self, tarinfo, targetpath):
2163 """Make a file called targetpath.
2164 """
2165 source = self.extractfile(tarinfo)
Guido van Rossume7ba4952007-06-06 23:52:48 +00002166 target = bltn_open(targetpath, "wb")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002167 copyfileobj(source, target)
2168 source.close()
2169 target.close()
2170
2171 def makeunknown(self, tarinfo, targetpath):
2172 """Make a file from a TarInfo object with an unknown type
2173 at targetpath.
2174 """
2175 self.makefile(tarinfo, targetpath)
2176 self._dbg(1, "tarfile: Unknown file type %r, " \
2177 "extracted as regular file." % tarinfo.type)
2178
2179 def makefifo(self, tarinfo, targetpath):
2180 """Make a fifo called targetpath.
2181 """
2182 if hasattr(os, "mkfifo"):
2183 os.mkfifo(targetpath)
2184 else:
Thomas Wouters477c8d52006-05-27 19:21:47 +00002185 raise ExtractError("fifo not supported by system")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002186
2187 def makedev(self, tarinfo, targetpath):
2188 """Make a character or block device called targetpath.
2189 """
2190 if not hasattr(os, "mknod") or not hasattr(os, "makedev"):
Thomas Wouters477c8d52006-05-27 19:21:47 +00002191 raise ExtractError("special devices not supported by system")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002192
2193 mode = tarinfo.mode
2194 if tarinfo.isblk():
2195 mode |= stat.S_IFBLK
2196 else:
2197 mode |= stat.S_IFCHR
2198
2199 os.mknod(targetpath, mode,
2200 os.makedev(tarinfo.devmajor, tarinfo.devminor))
2201
2202 def makelink(self, tarinfo, targetpath):
2203 """Make a (symbolic) link called targetpath. If it cannot be created
2204 (platform limitation), we try to make a copy of the referenced file
2205 instead of a link.
2206 """
2207 linkpath = tarinfo.linkname
2208 try:
2209 if tarinfo.issym():
2210 os.symlink(linkpath, targetpath)
2211 else:
Neal Norwitza4f651a2004-07-20 22:07:44 +00002212 # See extract().
2213 os.link(tarinfo._link_target, targetpath)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002214 except AttributeError:
2215 if tarinfo.issym():
2216 linkpath = os.path.join(os.path.dirname(tarinfo.name),
2217 linkpath)
2218 linkpath = normpath(linkpath)
2219
2220 try:
2221 self._extract_member(self.getmember(linkpath), targetpath)
Guido van Rossumb940e112007-01-10 16:19:56 +00002222 except (EnvironmentError, KeyError) as e:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002223 linkpath = os.path.normpath(linkpath)
2224 try:
2225 shutil.copy2(linkpath, targetpath)
Guido van Rossumb940e112007-01-10 16:19:56 +00002226 except EnvironmentError as e:
Thomas Wouters477c8d52006-05-27 19:21:47 +00002227 raise IOError("link could not be created")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002228
2229 def chown(self, tarinfo, targetpath):
2230 """Set owner of targetpath according to tarinfo.
2231 """
2232 if pwd and hasattr(os, "geteuid") and os.geteuid() == 0:
2233 # We have to be root to do so.
2234 try:
2235 g = grp.getgrnam(tarinfo.gname)[2]
2236 except KeyError:
2237 try:
2238 g = grp.getgrgid(tarinfo.gid)[2]
2239 except KeyError:
2240 g = os.getgid()
2241 try:
2242 u = pwd.getpwnam(tarinfo.uname)[2]
2243 except KeyError:
2244 try:
2245 u = pwd.getpwuid(tarinfo.uid)[2]
2246 except KeyError:
2247 u = os.getuid()
2248 try:
2249 if tarinfo.issym() and hasattr(os, "lchown"):
2250 os.lchown(targetpath, u, g)
2251 else:
Andrew MacIntyre7970d202003-02-19 12:51:34 +00002252 if sys.platform != "os2emx":
2253 os.chown(targetpath, u, g)
Guido van Rossumb940e112007-01-10 16:19:56 +00002254 except EnvironmentError as e:
Thomas Wouters477c8d52006-05-27 19:21:47 +00002255 raise ExtractError("could not change owner")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002256
2257 def chmod(self, tarinfo, targetpath):
2258 """Set file permissions of targetpath according to tarinfo.
2259 """
Jack Jansen834eff62003-03-07 12:47:06 +00002260 if hasattr(os, 'chmod'):
2261 try:
2262 os.chmod(targetpath, tarinfo.mode)
Guido van Rossumb940e112007-01-10 16:19:56 +00002263 except EnvironmentError as e:
Thomas Wouters477c8d52006-05-27 19:21:47 +00002264 raise ExtractError("could not change mode")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002265
2266 def utime(self, tarinfo, targetpath):
2267 """Set modification time of targetpath according to tarinfo.
2268 """
Jack Jansen834eff62003-03-07 12:47:06 +00002269 if not hasattr(os, 'utime'):
Tim Petersf9347782003-03-07 15:36:41 +00002270 return
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002271 if sys.platform == "win32" and tarinfo.isdir():
2272 # According to msdn.microsoft.com, it is an error (EACCES)
2273 # to use utime() on directories.
2274 return
2275 try:
2276 os.utime(targetpath, (tarinfo.mtime, tarinfo.mtime))
Guido van Rossumb940e112007-01-10 16:19:56 +00002277 except EnvironmentError as e:
Thomas Wouters477c8d52006-05-27 19:21:47 +00002278 raise ExtractError("could not change modification time")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002279
2280 #--------------------------------------------------------------------------
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002281 def next(self):
2282 """Return the next member of the archive as a TarInfo object, when
2283 TarFile is opened for reading. Return None if there is no more
2284 available.
2285 """
2286 self._check("ra")
2287 if self.firstmember is not None:
2288 m = self.firstmember
2289 self.firstmember = None
2290 return m
2291
2292 # Read the next block.
Andrew M. Kuchling864bba12004-07-10 22:02:11 +00002293 self.fileobj.seek(self.offset)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002294 while True:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002295 try:
Guido van Rossumd8faa362007-04-27 19:54:29 +00002296 tarinfo = self.tarinfo.fromtarfile(self)
2297 if tarinfo is None:
2298 return
2299 self.members.append(tarinfo)
Thomas Wouters477c8d52006-05-27 19:21:47 +00002300
Guido van Rossumb940e112007-01-10 16:19:56 +00002301 except HeaderError as e:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002302 if self.ignore_zeros:
Thomas Wouters902d6eb2007-01-09 23:18:33 +00002303 self._dbg(2, "0x%X: %s" % (self.offset, e))
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002304 self.offset += BLOCKSIZE
2305 continue
2306 else:
Andrew M. Kuchling864bba12004-07-10 22:02:11 +00002307 if self.offset == 0:
Thomas Wouters902d6eb2007-01-09 23:18:33 +00002308 raise ReadError(str(e))
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002309 return None
2310 break
2311
Thomas Wouters477c8d52006-05-27 19:21:47 +00002312 return tarinfo
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002313
2314 #--------------------------------------------------------------------------
2315 # Little helper methods:
2316
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002317 def _getmember(self, name, tarinfo=None):
2318 """Find an archive member by name from bottom to top.
2319 If tarinfo is given, it is used as the starting point.
2320 """
Martin v. Löwisf3c56112004-09-18 09:08:52 +00002321 # Ensure that all members have been loaded.
2322 members = self.getmembers()
2323
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002324 if tarinfo is None:
Martin v. Löwisf3c56112004-09-18 09:08:52 +00002325 end = len(members)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002326 else:
Martin v. Löwisf3c56112004-09-18 09:08:52 +00002327 end = members.index(tarinfo)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002328
Guido van Rossum805365e2007-05-07 22:24:25 +00002329 for i in range(end - 1, -1, -1):
Martin v. Löwisf3c56112004-09-18 09:08:52 +00002330 if name == members[i].name:
2331 return members[i]
Andrew M. Kuchling864bba12004-07-10 22:02:11 +00002332
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002333 def _load(self):
2334 """Read through the entire archive file and look for readable
2335 members.
2336 """
2337 while True:
2338 tarinfo = self.next()
2339 if tarinfo is None:
2340 break
2341 self._loaded = True
2342
2343 def _check(self, mode=None):
2344 """Check if TarFile is still open, and if the operation's mode
2345 corresponds to TarFile's mode.
2346 """
2347 if self.closed:
Thomas Wouters477c8d52006-05-27 19:21:47 +00002348 raise IOError("%s is closed" % self.__class__.__name__)
Guido van Rossumd8faa362007-04-27 19:54:29 +00002349 if mode is not None and self.mode not in mode:
2350 raise IOError("bad operation for mode %r" % self.mode)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002351
2352 def __iter__(self):
2353 """Provide an iterator object.
2354 """
2355 if self._loaded:
2356 return iter(self.members)
2357 else:
2358 return TarIter(self)
2359
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002360 def _dbg(self, level, msg):
2361 """Write debugging output to sys.stderr.
2362 """
2363 if level <= self.debug:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00002364 print(msg, file=sys.stderr)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002365# class TarFile
2366
2367class TarIter:
2368 """Iterator Class.
2369
2370 for tarinfo in TarFile(...):
2371 suite...
2372 """
2373
2374 def __init__(self, tarfile):
2375 """Construct a TarIter object.
2376 """
2377 self.tarfile = tarfile
Martin v. Löwis637431b2005-03-03 23:12:42 +00002378 self.index = 0
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002379 def __iter__(self):
2380 """Return iterator object.
2381 """
2382 return self
Georg Brandla18af4e2007-04-21 15:47:16 +00002383 def __next__(self):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002384 """Return the next item using TarFile's next() method.
2385 When all members have been read, set TarFile as _loaded.
2386 """
Martin v. Löwis637431b2005-03-03 23:12:42 +00002387 # Fix for SF #1100429: Under rare circumstances it can
2388 # happen that getmembers() is called during iteration,
2389 # which will cause TarIter to stop prematurely.
2390 if not self.tarfile._loaded:
2391 tarinfo = self.tarfile.next()
2392 if not tarinfo:
2393 self.tarfile._loaded = True
2394 raise StopIteration
2395 else:
2396 try:
2397 tarinfo = self.tarfile.members[self.index]
2398 except IndexError:
2399 raise StopIteration
2400 self.index += 1
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002401 return tarinfo
2402
2403# Helper classes for sparse file support
2404class _section:
2405 """Base class for _data and _hole.
2406 """
2407 def __init__(self, offset, size):
2408 self.offset = offset
2409 self.size = size
2410 def __contains__(self, offset):
2411 return self.offset <= offset < self.offset + self.size
2412
2413class _data(_section):
2414 """Represent a data section in a sparse file.
2415 """
2416 def __init__(self, offset, size, realpos):
2417 _section.__init__(self, offset, size)
2418 self.realpos = realpos
2419
2420class _hole(_section):
2421 """Represent a hole section in a sparse file.
2422 """
2423 pass
2424
2425class _ringbuffer(list):
2426 """Ringbuffer class which increases performance
2427 over a regular list.
2428 """
2429 def __init__(self):
2430 self.idx = 0
2431 def find(self, offset):
2432 idx = self.idx
2433 while True:
2434 item = self[idx]
2435 if offset in item:
2436 break
2437 idx += 1
2438 if idx == len(self):
2439 idx = 0
2440 if idx == self.idx:
2441 # End of File
2442 return None
2443 self.idx = idx
2444 return item
2445
2446#---------------------------------------------
2447# zipfile compatible TarFile class
2448#---------------------------------------------
2449TAR_PLAIN = 0 # zipfile.ZIP_STORED
2450TAR_GZIPPED = 8 # zipfile.ZIP_DEFLATED
2451class TarFileCompat:
2452 """TarFile class compatible with standard module zipfile's
2453 ZipFile class.
2454 """
2455 def __init__(self, file, mode="r", compression=TAR_PLAIN):
2456 if compression == TAR_PLAIN:
2457 self.tarfile = TarFile.taropen(file, mode)
2458 elif compression == TAR_GZIPPED:
2459 self.tarfile = TarFile.gzopen(file, mode)
2460 else:
Thomas Wouters477c8d52006-05-27 19:21:47 +00002461 raise ValueError("unknown compression constant")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002462 if mode[0:1] == "r":
2463 members = self.tarfile.getmembers()
Raymond Hettingera1d09e22005-09-11 16:34:05 +00002464 for m in members:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002465 m.filename = m.name
2466 m.file_size = m.size
2467 m.date_time = time.gmtime(m.mtime)[:6]
2468 def namelist(self):
2469 return map(lambda m: m.name, self.infolist())
2470 def infolist(self):
2471 return filter(lambda m: m.type in REGULAR_TYPES,
2472 self.tarfile.getmembers())
2473 def printdir(self):
2474 self.tarfile.list()
2475 def testzip(self):
2476 return
2477 def getinfo(self, name):
2478 return self.tarfile.getmember(name)
2479 def read(self, name):
2480 return self.tarfile.extractfile(self.tarfile.getmember(name)).read()
2481 def write(self, filename, arcname=None, compress_type=None):
2482 self.tarfile.add(filename, arcname)
2483 def writestr(self, zinfo, bytes):
Guido van Rossum68937b42007-05-18 00:51:22 +00002484 from io import StringIO
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002485 import calendar
2486 zinfo.name = zinfo.filename
2487 zinfo.size = zinfo.file_size
2488 zinfo.mtime = calendar.timegm(zinfo.date_time)
Raymond Hettingera6172712004-12-31 19:15:26 +00002489 self.tarfile.addfile(zinfo, StringIO(bytes))
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002490 def close(self):
2491 self.tarfile.close()
2492#class TarFileCompat
2493
2494#--------------------
2495# exported functions
2496#--------------------
2497def is_tarfile(name):
2498 """Return True if name points to a tar archive that we
2499 are able to handle, else return False.
2500 """
2501 try:
2502 t = open(name)
2503 t.close()
2504 return True
2505 except TarError:
2506 return False
2507
Guido van Rossume7ba4952007-06-06 23:52:48 +00002508bltn_open = open
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002509open = TarFile.open