blob: 92daa5ae0a7b56969a9064690da425556cb79a54 [file] [log] [blame]
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001#!/usr/bin/env python
2# -*- coding: iso-8859-1 -*-
3#-------------------------------------------------------------------
4# tarfile.py
5#-------------------------------------------------------------------
6# Copyright (C) 2002 Lars Gustäbel <lars@gustaebel.de>
7# All rights reserved.
8#
9# Permission is hereby granted, free of charge, to any person
10# obtaining a copy of this software and associated documentation
11# files (the "Software"), to deal in the Software without
12# restriction, including without limitation the rights to use,
13# copy, modify, merge, publish, distribute, sublicense, and/or sell
14# copies of the Software, and to permit persons to whom the
15# Software is furnished to do so, subject to the following
16# conditions:
17#
18# The above copyright notice and this permission notice shall be
19# included in all copies or substantial portions of the Software.
20#
21# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
22# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
23# OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
24# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
25# HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
26# WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
27# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
28# OTHER DEALINGS IN THE SOFTWARE.
29#
30"""Read from and write to tar format archives.
31"""
32
33__version__ = "$Revision$"
34# $Source$
35
Guido van Rossumd8faa362007-04-27 19:54:29 +000036version = "0.9.0"
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +000037__author__ = "Lars Gustäbel (lars@gustaebel.de)"
38__date__ = "$Date$"
39__cvsid__ = "$Id$"
40__credits__ = "Gustavo Niemeyer, Niels Gustäbel, Richard Townsend."
41
42#---------
43# Imports
44#---------
45import sys
46import os
47import shutil
48import stat
49import errno
50import time
51import struct
Thomas Wouters89f507f2006-12-13 04:49:30 +000052import copy
Guido van Rossumd8faa362007-04-27 19:54:29 +000053import re
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +000054
Jack Jansencfc49022003-03-07 13:37:32 +000055if sys.platform == 'mac':
56 # This module needs work for MacOS9, especially in the area of pathname
57 # handling. In many places it is assumed a simple substitution of / by the
58 # local os.path.sep is good enough to convert pathnames, but this does not
59 # work with the mac rooted:path:name versus :nonrooted:path:name syntax
60 raise ImportError, "tarfile does not work for platform==mac"
61
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +000062try:
63 import grp, pwd
64except ImportError:
65 grp = pwd = None
66
67# from tarfile import *
68__all__ = ["TarFile", "TarInfo", "is_tarfile", "TarError"]
69
Guido van Rossum8f78fe92006-08-24 04:03:53 +000070from __builtin__ import open as _open # Since 'open' is TarFile.open
71
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +000072#---------------------------------------------------------
73# tar constants
74#---------------------------------------------------------
Guido van Rossumd8faa362007-04-27 19:54:29 +000075NUL = "\0" # the null character
76BLOCKSIZE = 512 # length of processing blocks
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +000077RECORDSIZE = BLOCKSIZE * 20 # length of records
Guido van Rossumd8faa362007-04-27 19:54:29 +000078GNU_MAGIC = "ustar \0" # magic gnu tar string
79POSIX_MAGIC = "ustar\x0000" # magic posix tar string
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +000080
Guido van Rossumd8faa362007-04-27 19:54:29 +000081LENGTH_NAME = 100 # maximum length of a filename
82LENGTH_LINK = 100 # maximum length of a linkname
83LENGTH_PREFIX = 155 # maximum length of the prefix field
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +000084
Guido van Rossumd8faa362007-04-27 19:54:29 +000085REGTYPE = "0" # regular file
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +000086AREGTYPE = "\0" # regular file
Guido van Rossumd8faa362007-04-27 19:54:29 +000087LNKTYPE = "1" # link (inside tarfile)
88SYMTYPE = "2" # symbolic link
89CHRTYPE = "3" # character special device
90BLKTYPE = "4" # block special device
91DIRTYPE = "5" # directory
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +000092FIFOTYPE = "6" # fifo special device
93CONTTYPE = "7" # contiguous file
94
Guido van Rossumd8faa362007-04-27 19:54:29 +000095GNUTYPE_LONGNAME = "L" # GNU tar longname
96GNUTYPE_LONGLINK = "K" # GNU tar longlink
97GNUTYPE_SPARSE = "S" # GNU tar sparse file
98
99XHDTYPE = "x" # POSIX.1-2001 extended header
100XGLTYPE = "g" # POSIX.1-2001 global header
101SOLARIS_XHDTYPE = "X" # Solaris extended header
102
103USTAR_FORMAT = 0 # POSIX.1-1988 (ustar) format
104GNU_FORMAT = 1 # GNU tar format
105PAX_FORMAT = 2 # POSIX.1-2001 (pax) format
106DEFAULT_FORMAT = GNU_FORMAT
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000107
108#---------------------------------------------------------
109# tarfile constants
110#---------------------------------------------------------
Guido van Rossumd8faa362007-04-27 19:54:29 +0000111# File types that tarfile supports:
112SUPPORTED_TYPES = (REGTYPE, AREGTYPE, LNKTYPE,
113 SYMTYPE, DIRTYPE, FIFOTYPE,
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000114 CONTTYPE, CHRTYPE, BLKTYPE,
115 GNUTYPE_LONGNAME, GNUTYPE_LONGLINK,
116 GNUTYPE_SPARSE)
117
Guido van Rossumd8faa362007-04-27 19:54:29 +0000118# File types that will be treated as a regular file.
119REGULAR_TYPES = (REGTYPE, AREGTYPE,
120 CONTTYPE, GNUTYPE_SPARSE)
121
122# File types that are part of the GNU tar format.
123GNU_TYPES = (GNUTYPE_LONGNAME, GNUTYPE_LONGLINK,
124 GNUTYPE_SPARSE)
125
126# Fields from a pax header that override a TarInfo attribute.
127PAX_FIELDS = ("path", "linkpath", "size", "mtime",
128 "uid", "gid", "uname", "gname")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000129
Guido van Rossume7ba4952007-06-06 23:52:48 +0000130# Fields in a pax header that are numbers, all other fields
131# are treated as strings.
132PAX_NUMBER_FIELDS = {
133 "atime": float,
134 "ctime": float,
135 "mtime": float,
136 "uid": int,
137 "gid": int,
138 "size": int
139}
140
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000141#---------------------------------------------------------
142# Bits used in the mode field, values in octal.
143#---------------------------------------------------------
Guido van Rossumcd16bf62007-06-13 18:07:49 +0000144S_IFLNK = 0o120000 # symbolic link
145S_IFREG = 0o100000 # regular file
146S_IFBLK = 0o060000 # block device
147S_IFDIR = 0o040000 # directory
148S_IFCHR = 0o020000 # character device
149S_IFIFO = 0o010000 # fifo
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000150
Guido van Rossumcd16bf62007-06-13 18:07:49 +0000151TSUID = 0o4000 # set UID on execution
152TSGID = 0o2000 # set GID on execution
153TSVTX = 0o1000 # reserved
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000154
Guido van Rossumcd16bf62007-06-13 18:07:49 +0000155TUREAD = 0o400 # read by owner
156TUWRITE = 0o200 # write by owner
157TUEXEC = 0o100 # execute/search by owner
158TGREAD = 0o040 # read by group
159TGWRITE = 0o020 # write by group
160TGEXEC = 0o010 # execute/search by group
161TOREAD = 0o004 # read by other
162TOWRITE = 0o002 # write by other
163TOEXEC = 0o001 # execute/search by other
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000164
165#---------------------------------------------------------
Guido van Rossumd8faa362007-04-27 19:54:29 +0000166# initialization
167#---------------------------------------------------------
168ENCODING = sys.getfilesystemencoding()
169if ENCODING is None:
Guido van Rossume7ba4952007-06-06 23:52:48 +0000170 ENCODING = sys.getdefaultencoding()
Guido van Rossumd8faa362007-04-27 19:54:29 +0000171
172#---------------------------------------------------------
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000173# Some useful functions
174#---------------------------------------------------------
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000175
Thomas Wouters477c8d52006-05-27 19:21:47 +0000176def stn(s, length):
177 """Convert a python string to a null-terminated string buffer.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000178 """
Thomas Wouters89f507f2006-12-13 04:49:30 +0000179 return s[:length] + (length - len(s)) * NUL
Thomas Wouters477c8d52006-05-27 19:21:47 +0000180
Guido van Rossumd8faa362007-04-27 19:54:29 +0000181def nts(s):
182 """Convert a null-terminated string field to a python string.
183 """
184 # Use the string up to the first null char.
185 p = s.find("\0")
186 if p == -1:
187 return s
188 return s[:p]
189
Thomas Wouters477c8d52006-05-27 19:21:47 +0000190def nti(s):
191 """Convert a number field to a python number.
192 """
193 # There are two possible encodings for a number field, see
194 # itn() below.
Guido van Rossumcd16bf62007-06-13 18:07:49 +0000195 if s[0] != chr(0o200):
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000196 try:
Guido van Rossumd8faa362007-04-27 19:54:29 +0000197 n = int(nts(s) or "0", 8)
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000198 except ValueError:
199 raise HeaderError("invalid header")
Thomas Wouters477c8d52006-05-27 19:21:47 +0000200 else:
Guido van Rossume2a383d2007-01-15 16:59:06 +0000201 n = 0
Guido van Rossum805365e2007-05-07 22:24:25 +0000202 for i in range(len(s) - 1):
Thomas Wouters477c8d52006-05-27 19:21:47 +0000203 n <<= 8
204 n += ord(s[i + 1])
205 return n
206
Guido van Rossumd8faa362007-04-27 19:54:29 +0000207def itn(n, digits=8, format=DEFAULT_FORMAT):
Thomas Wouters477c8d52006-05-27 19:21:47 +0000208 """Convert a python number to a number field.
209 """
210 # POSIX 1003.1-1988 requires numbers to be encoded as a string of
211 # octal digits followed by a null-byte, this allows values up to
212 # (8**(digits-1))-1. GNU tar allows storing numbers greater than
Guido van Rossumcd16bf62007-06-13 18:07:49 +0000213 # that if necessary. A leading 0o200 byte indicates this particular
Thomas Wouters477c8d52006-05-27 19:21:47 +0000214 # encoding, the following digits-1 bytes are a big-endian
215 # representation. This allows values up to (256**(digits-1))-1.
216 if 0 <= n < 8 ** (digits - 1):
217 s = "%0*o" % (digits - 1, n) + NUL
218 else:
Guido van Rossumd8faa362007-04-27 19:54:29 +0000219 if format != GNU_FORMAT or n >= 256 ** (digits - 1):
Thomas Wouters477c8d52006-05-27 19:21:47 +0000220 raise ValueError("overflow in number field")
221
222 if n < 0:
223 # XXX We mimic GNU tar's behaviour with negative numbers,
224 # this could raise OverflowError.
225 n = struct.unpack("L", struct.pack("l", n))[0]
226
227 s = ""
Guido van Rossum805365e2007-05-07 22:24:25 +0000228 for i in range(digits - 1):
Guido van Rossumcd16bf62007-06-13 18:07:49 +0000229 s = chr(n & 0o377) + s
Thomas Wouters477c8d52006-05-27 19:21:47 +0000230 n >>= 8
Guido van Rossumcd16bf62007-06-13 18:07:49 +0000231 s = chr(0o200) + s
Thomas Wouters477c8d52006-05-27 19:21:47 +0000232 return s
233
Guido van Rossume7ba4952007-06-06 23:52:48 +0000234def uts(s, encoding, errors):
235 """Convert a unicode object to a string.
236 """
237 if errors == "utf-8":
238 # An extra error handler similar to the -o invalid=UTF-8 option
239 # in POSIX.1-2001. Replace untranslatable characters with their
240 # UTF-8 representation.
241 try:
242 return s.encode(encoding, "strict")
243 except UnicodeEncodeError:
244 x = []
245 for c in s:
246 try:
247 x.append(c.encode(encoding, "strict"))
248 except UnicodeEncodeError:
249 x.append(c.encode("utf8"))
250 return "".join(x)
251 else:
252 return s.encode(encoding, errors)
253
Thomas Wouters477c8d52006-05-27 19:21:47 +0000254def calc_chksums(buf):
255 """Calculate the checksum for a member's header by summing up all
256 characters except for the chksum field which is treated as if
257 it was filled with spaces. According to the GNU tar sources,
258 some tars (Sun and NeXT) calculate chksum with signed char,
259 which will be different if there are chars in the buffer with
260 the high bit set. So we calculate two checksums, unsigned and
261 signed.
262 """
263 unsigned_chksum = 256 + sum(struct.unpack("148B", buf[:148]) + struct.unpack("356B", buf[156:512]))
264 signed_chksum = 256 + sum(struct.unpack("148b", buf[:148]) + struct.unpack("356b", buf[156:512]))
265 return unsigned_chksum, signed_chksum
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000266
267def copyfileobj(src, dst, length=None):
268 """Copy length bytes from fileobj src to fileobj dst.
269 If length is None, copy the entire content.
270 """
271 if length == 0:
272 return
273 if length is None:
274 shutil.copyfileobj(src, dst)
275 return
276
277 BUFSIZE = 16 * 1024
278 blocks, remainder = divmod(length, BUFSIZE)
Guido van Rossum805365e2007-05-07 22:24:25 +0000279 for b in range(blocks):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000280 buf = src.read(BUFSIZE)
281 if len(buf) < BUFSIZE:
Thomas Wouters477c8d52006-05-27 19:21:47 +0000282 raise IOError("end of file reached")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000283 dst.write(buf)
284
285 if remainder != 0:
286 buf = src.read(remainder)
287 if len(buf) < remainder:
Thomas Wouters477c8d52006-05-27 19:21:47 +0000288 raise IOError("end of file reached")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000289 dst.write(buf)
290 return
291
292filemode_table = (
Andrew M. Kuchling8bc462f2004-10-20 11:48:42 +0000293 ((S_IFLNK, "l"),
294 (S_IFREG, "-"),
295 (S_IFBLK, "b"),
296 (S_IFDIR, "d"),
297 (S_IFCHR, "c"),
298 (S_IFIFO, "p")),
299
300 ((TUREAD, "r"),),
301 ((TUWRITE, "w"),),
302 ((TUEXEC|TSUID, "s"),
303 (TSUID, "S"),
304 (TUEXEC, "x")),
305
306 ((TGREAD, "r"),),
307 ((TGWRITE, "w"),),
308 ((TGEXEC|TSGID, "s"),
309 (TSGID, "S"),
310 (TGEXEC, "x")),
311
312 ((TOREAD, "r"),),
313 ((TOWRITE, "w"),),
314 ((TOEXEC|TSVTX, "t"),
315 (TSVTX, "T"),
316 (TOEXEC, "x"))
317)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000318
319def filemode(mode):
320 """Convert a file's mode to a string of the form
321 -rwxrwxrwx.
322 Used by TarFile.list()
323 """
Andrew M. Kuchling8bc462f2004-10-20 11:48:42 +0000324 perm = []
325 for table in filemode_table:
326 for bit, char in table:
327 if mode & bit == bit:
328 perm.append(char)
329 break
330 else:
331 perm.append("-")
332 return "".join(perm)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000333
334if os.sep != "/":
335 normpath = lambda path: os.path.normpath(path).replace(os.sep, "/")
336else:
337 normpath = os.path.normpath
338
339class TarError(Exception):
340 """Base exception."""
341 pass
342class ExtractError(TarError):
343 """General exception for extract errors."""
344 pass
345class ReadError(TarError):
346 """Exception for unreadble tar archives."""
347 pass
348class CompressionError(TarError):
349 """Exception for unavailable compression methods."""
350 pass
351class StreamError(TarError):
352 """Exception for unsupported operations on stream-like TarFiles."""
353 pass
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000354class HeaderError(TarError):
355 """Exception for invalid headers."""
356 pass
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000357
358#---------------------------
359# internal stream interface
360#---------------------------
361class _LowLevelFile:
362 """Low-level file object. Supports reading and writing.
363 It is used instead of a regular file object for streaming
364 access.
365 """
366
367 def __init__(self, name, mode):
368 mode = {
369 "r": os.O_RDONLY,
370 "w": os.O_WRONLY | os.O_CREAT | os.O_TRUNC,
371 }[mode]
372 if hasattr(os, "O_BINARY"):
373 mode |= os.O_BINARY
374 self.fd = os.open(name, mode)
375
376 def close(self):
377 os.close(self.fd)
378
379 def read(self, size):
380 return os.read(self.fd, size)
381
382 def write(self, s):
383 os.write(self.fd, s)
384
385class _Stream:
386 """Class that serves as an adapter between TarFile and
387 a stream-like object. The stream-like object only
388 needs to have a read() or write() method and is accessed
389 blockwise. Use of gzip or bzip2 compression is possible.
390 A stream-like object could be for example: sys.stdin,
391 sys.stdout, a socket, a tape device etc.
392
393 _Stream is intended to be used only internally.
394 """
395
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000396 def __init__(self, name, mode, comptype, fileobj, bufsize):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000397 """Construct a _Stream object.
398 """
399 self._extfileobj = True
400 if fileobj is None:
401 fileobj = _LowLevelFile(name, mode)
402 self._extfileobj = False
403
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000404 if comptype == '*':
405 # Enable transparent compression detection for the
406 # stream interface
407 fileobj = _StreamProxy(fileobj)
408 comptype = fileobj.getcomptype()
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000409
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000410 self.name = name or ""
411 self.mode = mode
412 self.comptype = comptype
413 self.fileobj = fileobj
414 self.bufsize = bufsize
415 self.buf = ""
Guido van Rossume2a383d2007-01-15 16:59:06 +0000416 self.pos = 0
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000417 self.closed = False
418
419 if comptype == "gz":
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000420 try:
421 import zlib
422 except ImportError:
Thomas Wouters477c8d52006-05-27 19:21:47 +0000423 raise CompressionError("zlib module is not available")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000424 self.zlib = zlib
425 self.crc = zlib.crc32("")
426 if mode == "r":
427 self._init_read_gz()
428 else:
429 self._init_write_gz()
430
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000431 if comptype == "bz2":
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000432 try:
433 import bz2
434 except ImportError:
Thomas Wouters477c8d52006-05-27 19:21:47 +0000435 raise CompressionError("bz2 module is not available")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000436 if mode == "r":
437 self.dbuf = ""
438 self.cmp = bz2.BZ2Decompressor()
439 else:
440 self.cmp = bz2.BZ2Compressor()
441
442 def __del__(self):
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000443 if hasattr(self, "closed") and not self.closed:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000444 self.close()
445
446 def _init_write_gz(self):
447 """Initialize for writing with gzip compression.
448 """
449 self.cmp = self.zlib.compressobj(9, self.zlib.DEFLATED,
450 -self.zlib.MAX_WBITS,
451 self.zlib.DEF_MEM_LEVEL,
452 0)
Guido van Rossume2a383d2007-01-15 16:59:06 +0000453 timestamp = struct.pack("<L", int(time.time()))
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000454 self.__write("\037\213\010\010%s\002\377" % timestamp)
455 if self.name.endswith(".gz"):
456 self.name = self.name[:-3]
457 self.__write(self.name + NUL)
458
459 def write(self, s):
460 """Write string s to the stream.
461 """
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000462 if self.comptype == "gz":
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000463 self.crc = self.zlib.crc32(s, self.crc)
464 self.pos += len(s)
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000465 if self.comptype != "tar":
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000466 s = self.cmp.compress(s)
467 self.__write(s)
468
469 def __write(self, s):
470 """Write string s to the stream if a whole new block
471 is ready to be written.
472 """
473 self.buf += s
474 while len(self.buf) > self.bufsize:
475 self.fileobj.write(self.buf[:self.bufsize])
476 self.buf = self.buf[self.bufsize:]
477
478 def close(self):
479 """Close the _Stream object. No operation should be
480 done on it afterwards.
481 """
482 if self.closed:
483 return
484
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000485 if self.mode == "w" and self.comptype != "tar":
Martin v. Löwisc234a522004-08-22 21:28:33 +0000486 self.buf += self.cmp.flush()
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000487
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000488 if self.mode == "w" and self.buf:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000489 self.fileobj.write(self.buf)
490 self.buf = ""
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000491 if self.comptype == "gz":
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000492 # The native zlib crc is an unsigned 32-bit integer, but
493 # the Python wrapper implicitly casts that to a signed C
494 # long. So, on a 32-bit box self.crc may "look negative",
495 # while the same crc on a 64-bit box may "look positive".
496 # To avoid irksome warnings from the `struct` module, force
497 # it to look positive on all boxes.
Guido van Rossume2a383d2007-01-15 16:59:06 +0000498 self.fileobj.write(struct.pack("<L", self.crc & 0xffffffff))
499 self.fileobj.write(struct.pack("<L", self.pos & 0xffffFFFF))
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000500
501 if not self._extfileobj:
502 self.fileobj.close()
503
504 self.closed = True
505
506 def _init_read_gz(self):
507 """Initialize for reading a gzip compressed fileobj.
508 """
509 self.cmp = self.zlib.decompressobj(-self.zlib.MAX_WBITS)
510 self.dbuf = ""
511
512 # taken from gzip.GzipFile with some alterations
513 if self.__read(2) != "\037\213":
Thomas Wouters477c8d52006-05-27 19:21:47 +0000514 raise ReadError("not a gzip file")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000515 if self.__read(1) != "\010":
Thomas Wouters477c8d52006-05-27 19:21:47 +0000516 raise CompressionError("unsupported compression method")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000517
518 flag = ord(self.__read(1))
519 self.__read(6)
520
521 if flag & 4:
522 xlen = ord(self.__read(1)) + 256 * ord(self.__read(1))
523 self.read(xlen)
524 if flag & 8:
525 while True:
526 s = self.__read(1)
527 if not s or s == NUL:
528 break
529 if flag & 16:
530 while True:
531 s = self.__read(1)
532 if not s or s == NUL:
533 break
534 if flag & 2:
535 self.__read(2)
536
537 def tell(self):
538 """Return the stream's file pointer position.
539 """
540 return self.pos
541
542 def seek(self, pos=0):
543 """Set the stream's file pointer to pos. Negative seeking
544 is forbidden.
545 """
546 if pos - self.pos >= 0:
547 blocks, remainder = divmod(pos - self.pos, self.bufsize)
Guido van Rossum805365e2007-05-07 22:24:25 +0000548 for i in range(blocks):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000549 self.read(self.bufsize)
550 self.read(remainder)
551 else:
Thomas Wouters477c8d52006-05-27 19:21:47 +0000552 raise StreamError("seeking backwards is not allowed")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000553 return self.pos
554
555 def read(self, size=None):
556 """Return the next size number of bytes from the stream.
557 If size is not defined, return all bytes of the stream
558 up to EOF.
559 """
560 if size is None:
561 t = []
562 while True:
563 buf = self._read(self.bufsize)
564 if not buf:
565 break
566 t.append(buf)
567 buf = "".join(t)
568 else:
569 buf = self._read(size)
570 self.pos += len(buf)
571 return buf
572
573 def _read(self, size):
574 """Return size bytes from the stream.
575 """
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000576 if self.comptype == "tar":
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000577 return self.__read(size)
578
579 c = len(self.dbuf)
580 t = [self.dbuf]
581 while c < size:
582 buf = self.__read(self.bufsize)
583 if not buf:
584 break
Guido van Rossumd8faa362007-04-27 19:54:29 +0000585 try:
586 buf = self.cmp.decompress(buf)
587 except IOError:
588 raise ReadError("invalid compressed data")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000589 t.append(buf)
590 c += len(buf)
591 t = "".join(t)
592 self.dbuf = t[size:]
593 return t[:size]
594
595 def __read(self, size):
596 """Return size bytes from stream. If internal buffer is empty,
597 read another block from the stream.
598 """
599 c = len(self.buf)
600 t = [self.buf]
601 while c < size:
602 buf = self.fileobj.read(self.bufsize)
603 if not buf:
604 break
605 t.append(buf)
606 c += len(buf)
607 t = "".join(t)
608 self.buf = t[size:]
609 return t[:size]
610# class _Stream
611
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000612class _StreamProxy(object):
613 """Small proxy class that enables transparent compression
614 detection for the Stream interface (mode 'r|*').
615 """
616
617 def __init__(self, fileobj):
618 self.fileobj = fileobj
619 self.buf = self.fileobj.read(BLOCKSIZE)
620
621 def read(self, size):
622 self.read = self.fileobj.read
623 return self.buf
624
625 def getcomptype(self):
626 if self.buf.startswith("\037\213\010"):
627 return "gz"
628 if self.buf.startswith("BZh91"):
629 return "bz2"
630 return "tar"
631
632 def close(self):
633 self.fileobj.close()
634# class StreamProxy
635
Thomas Wouters477c8d52006-05-27 19:21:47 +0000636class _BZ2Proxy(object):
637 """Small proxy class that enables external file object
638 support for "r:bz2" and "w:bz2" modes. This is actually
639 a workaround for a limitation in bz2 module's BZ2File
640 class which (unlike gzip.GzipFile) has no support for
641 a file object argument.
642 """
643
644 blocksize = 16 * 1024
645
646 def __init__(self, fileobj, mode):
647 self.fileobj = fileobj
648 self.mode = mode
Guido van Rossumd8faa362007-04-27 19:54:29 +0000649 self.name = getattr(self.fileobj, "name", None)
Thomas Wouters477c8d52006-05-27 19:21:47 +0000650 self.init()
651
652 def init(self):
653 import bz2
654 self.pos = 0
655 if self.mode == "r":
656 self.bz2obj = bz2.BZ2Decompressor()
657 self.fileobj.seek(0)
658 self.buf = ""
659 else:
660 self.bz2obj = bz2.BZ2Compressor()
661
662 def read(self, size):
663 b = [self.buf]
664 x = len(self.buf)
665 while x < size:
666 try:
667 raw = self.fileobj.read(self.blocksize)
668 data = self.bz2obj.decompress(raw)
669 b.append(data)
670 except EOFError:
671 break
672 x += len(data)
673 self.buf = "".join(b)
674
675 buf = self.buf[:size]
676 self.buf = self.buf[size:]
677 self.pos += len(buf)
678 return buf
679
680 def seek(self, pos):
681 if pos < self.pos:
682 self.init()
683 self.read(pos - self.pos)
684
685 def tell(self):
686 return self.pos
687
688 def write(self, data):
689 self.pos += len(data)
690 raw = self.bz2obj.compress(data)
691 self.fileobj.write(raw)
692
693 def close(self):
694 if self.mode == "w":
695 raw = self.bz2obj.flush()
696 self.fileobj.write(raw)
697 self.fileobj.close()
698# class _BZ2Proxy
699
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000700#------------------------
701# Extraction file object
702#------------------------
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000703class _FileInFile(object):
704 """A thin wrapper around an existing file object that
705 provides a part of its data as an individual file
706 object.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000707 """
708
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000709 def __init__(self, fileobj, offset, size, sparse=None):
710 self.fileobj = fileobj
711 self.offset = offset
712 self.size = size
713 self.sparse = sparse
714 self.position = 0
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000715
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000716 def tell(self):
717 """Return the current file position.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000718 """
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000719 return self.position
720
721 def seek(self, position):
722 """Seek to a position in the file.
723 """
724 self.position = position
725
726 def read(self, size=None):
727 """Read data from the file.
728 """
729 if size is None:
730 size = self.size - self.position
731 else:
732 size = min(size, self.size - self.position)
733
734 if self.sparse is None:
735 return self.readnormal(size)
736 else:
737 return self.readsparse(size)
738
739 def readnormal(self, size):
740 """Read operation for regular files.
741 """
742 self.fileobj.seek(self.offset + self.position)
743 self.position += size
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000744 return self.fileobj.read(size)
745
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000746 def readsparse(self, size):
747 """Read operation for sparse files.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000748 """
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000749 data = []
750 while size > 0:
751 buf = self.readsparsesection(size)
752 if not buf:
753 break
754 size -= len(buf)
755 data.append(buf)
756 return "".join(data)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000757
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000758 def readsparsesection(self, size):
759 """Read a single section of a sparse file.
760 """
761 section = self.sparse.find(self.position)
762
763 if section is None:
764 return ""
765
766 size = min(size, section.offset + section.size - self.position)
767
768 if isinstance(section, _data):
769 realpos = section.realpos + self.position - section.offset
770 self.fileobj.seek(self.offset + realpos)
771 self.position += size
772 return self.fileobj.read(size)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000773 else:
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000774 self.position += size
775 return NUL * size
776#class _FileInFile
777
778
779class ExFileObject(object):
780 """File-like object for reading an archive member.
781 Is returned by TarFile.extractfile().
782 """
783 blocksize = 1024
784
785 def __init__(self, tarfile, tarinfo):
786 self.fileobj = _FileInFile(tarfile.fileobj,
787 tarinfo.offset_data,
788 tarinfo.size,
789 getattr(tarinfo, "sparse", None))
790 self.name = tarinfo.name
791 self.mode = "r"
792 self.closed = False
793 self.size = tarinfo.size
794
795 self.position = 0
796 self.buffer = ""
797
798 def read(self, size=None):
799 """Read at most size bytes from the file. If size is not
800 present or None, read all data until EOF is reached.
801 """
802 if self.closed:
803 raise ValueError("I/O operation on closed file")
804
805 buf = ""
806 if self.buffer:
807 if size is None:
808 buf = self.buffer
809 self.buffer = ""
810 else:
811 buf = self.buffer[:size]
812 self.buffer = self.buffer[size:]
813
814 if size is None:
815 buf += self.fileobj.read()
816 else:
817 buf += self.fileobj.read(size - len(buf))
818
819 self.position += len(buf)
820 return buf
821
822 def readline(self, size=-1):
823 """Read one entire line from the file. If size is present
824 and non-negative, return a string with at most that
825 size, which may be an incomplete line.
826 """
827 if self.closed:
828 raise ValueError("I/O operation on closed file")
829
830 if "\n" in self.buffer:
831 pos = self.buffer.find("\n") + 1
832 else:
833 buffers = [self.buffer]
834 while True:
835 buf = self.fileobj.read(self.blocksize)
836 buffers.append(buf)
837 if not buf or "\n" in buf:
838 self.buffer = "".join(buffers)
839 pos = self.buffer.find("\n") + 1
840 if pos == 0:
841 # no newline found.
842 pos = len(self.buffer)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000843 break
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000844
845 if size != -1:
846 pos = min(size, pos)
847
848 buf = self.buffer[:pos]
849 self.buffer = self.buffer[pos:]
850 self.position += len(buf)
851 return buf
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000852
853 def readlines(self):
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000854 """Return a list with all remaining lines.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000855 """
856 result = []
857 while True:
858 line = self.readline()
859 if not line: break
860 result.append(line)
861 return result
862
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000863 def tell(self):
864 """Return the current file position.
865 """
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000866 if self.closed:
867 raise ValueError("I/O operation on closed file")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000868
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000869 return self.position
870
871 def seek(self, pos, whence=os.SEEK_SET):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000872 """Seek to a position in the file.
873 """
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000874 if self.closed:
875 raise ValueError("I/O operation on closed file")
876
877 if whence == os.SEEK_SET:
878 self.position = min(max(pos, 0), self.size)
879 elif whence == os.SEEK_CUR:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000880 if pos < 0:
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000881 self.position = max(self.position + pos, 0)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000882 else:
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000883 self.position = min(self.position + pos, self.size)
884 elif whence == os.SEEK_END:
885 self.position = max(min(self.size + pos, self.size), 0)
886 else:
887 raise ValueError("Invalid argument")
888
889 self.buffer = ""
890 self.fileobj.seek(self.position)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000891
892 def close(self):
893 """Close the file object.
894 """
895 self.closed = True
Martin v. Löwisdf241532005-03-03 08:17:42 +0000896
897 def __iter__(self):
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000898 """Get an iterator over the file's lines.
Martin v. Löwisdf241532005-03-03 08:17:42 +0000899 """
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000900 while True:
901 line = self.readline()
902 if not line:
903 break
904 yield line
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000905#class ExFileObject
906
907#------------------
908# Exported Classes
909#------------------
910class TarInfo(object):
911 """Informational class which holds the details about an
912 archive member given by a tar header block.
913 TarInfo objects are returned by TarFile.getmember(),
914 TarFile.getmembers() and TarFile.gettarinfo() and are
915 usually created internally.
916 """
917
918 def __init__(self, name=""):
919 """Construct a TarInfo object. name is the optional name
920 of the member.
921 """
Guido van Rossumd8faa362007-04-27 19:54:29 +0000922 self.name = name # member name
Guido van Rossumcd16bf62007-06-13 18:07:49 +0000923 self.mode = 0o644 # file permissions
Thomas Wouters477c8d52006-05-27 19:21:47 +0000924 self.uid = 0 # user id
925 self.gid = 0 # group id
926 self.size = 0 # file size
927 self.mtime = 0 # modification time
928 self.chksum = 0 # header checksum
929 self.type = REGTYPE # member type
930 self.linkname = "" # link name
Guido van Rossumd8faa362007-04-27 19:54:29 +0000931 self.uname = "root" # user name
932 self.gname = "root" # group name
Thomas Wouters477c8d52006-05-27 19:21:47 +0000933 self.devmajor = 0 # device major number
934 self.devminor = 0 # device minor number
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000935
Thomas Wouters477c8d52006-05-27 19:21:47 +0000936 self.offset = 0 # the tar header starts here
937 self.offset_data = 0 # the file's data starts here
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000938
Guido van Rossumd8faa362007-04-27 19:54:29 +0000939 self.pax_headers = {} # pax header information
940
941 # In pax headers the "name" and "linkname" field are called
942 # "path" and "linkpath".
943 def _getpath(self):
944 return self.name
945 def _setpath(self, name):
946 self.name = name
947 path = property(_getpath, _setpath)
948
949 def _getlinkpath(self):
950 return self.linkname
951 def _setlinkpath(self, linkname):
952 self.linkname = linkname
953 linkpath = property(_getlinkpath, _setlinkpath)
954
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000955 def __repr__(self):
956 return "<%s %r at %#x>" % (self.__class__.__name__,self.name,id(self))
957
Guido van Rossume7ba4952007-06-06 23:52:48 +0000958 def get_info(self, encoding, errors):
Guido van Rossumd8faa362007-04-27 19:54:29 +0000959 """Return the TarInfo's attributes as a dictionary.
960 """
961 info = {
962 "name": normpath(self.name),
Guido van Rossumcd16bf62007-06-13 18:07:49 +0000963 "mode": self.mode & 0o7777,
Guido van Rossumd8faa362007-04-27 19:54:29 +0000964 "uid": self.uid,
965 "gid": self.gid,
966 "size": self.size,
967 "mtime": self.mtime,
968 "chksum": self.chksum,
969 "type": self.type,
970 "linkname": normpath(self.linkname) if self.linkname else "",
971 "uname": self.uname,
972 "gname": self.gname,
973 "devmajor": self.devmajor,
974 "devminor": self.devminor
975 }
976
977 if info["type"] == DIRTYPE and not info["name"].endswith("/"):
978 info["name"] += "/"
979
Guido van Rossume7ba4952007-06-06 23:52:48 +0000980 for key in ("name", "linkname", "uname", "gname"):
Walter Dörwald2d5c2192007-06-12 18:07:38 +0000981 if isinstance(info[key], str):
Guido van Rossume7ba4952007-06-06 23:52:48 +0000982 info[key] = info[key].encode(encoding, errors)
983
Guido van Rossumd8faa362007-04-27 19:54:29 +0000984 return info
985
Guido van Rossume7ba4952007-06-06 23:52:48 +0000986 def tobuf(self, format=DEFAULT_FORMAT, encoding=ENCODING, errors="strict"):
Guido van Rossumd8faa362007-04-27 19:54:29 +0000987 """Return a tar header as a string of 512 byte blocks.
988 """
Guido van Rossume7ba4952007-06-06 23:52:48 +0000989 info = self.get_info(encoding, errors)
990
Guido van Rossumd8faa362007-04-27 19:54:29 +0000991 if format == USTAR_FORMAT:
Guido van Rossume7ba4952007-06-06 23:52:48 +0000992 return self.create_ustar_header(info)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000993 elif format == GNU_FORMAT:
Guido van Rossume7ba4952007-06-06 23:52:48 +0000994 return self.create_gnu_header(info)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000995 elif format == PAX_FORMAT:
Guido van Rossume7ba4952007-06-06 23:52:48 +0000996 return self.create_pax_header(info, encoding, errors)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000997 else:
998 raise ValueError("invalid format")
999
Guido van Rossume7ba4952007-06-06 23:52:48 +00001000 def create_ustar_header(self, info):
Guido van Rossumd8faa362007-04-27 19:54:29 +00001001 """Return the object as a ustar header block.
1002 """
Guido van Rossumd8faa362007-04-27 19:54:29 +00001003 info["magic"] = POSIX_MAGIC
1004
1005 if len(info["linkname"]) > LENGTH_LINK:
1006 raise ValueError("linkname is too long")
1007
1008 if len(info["name"]) > LENGTH_NAME:
1009 info["prefix"], info["name"] = self._posix_split_name(info["name"])
1010
1011 return self._create_header(info, USTAR_FORMAT)
1012
Guido van Rossume7ba4952007-06-06 23:52:48 +00001013 def create_gnu_header(self, info):
Guido van Rossumd8faa362007-04-27 19:54:29 +00001014 """Return the object as a GNU header block sequence.
1015 """
Guido van Rossumd8faa362007-04-27 19:54:29 +00001016 info["magic"] = GNU_MAGIC
1017
1018 buf = ""
1019 if len(info["linkname"]) > LENGTH_LINK:
1020 buf += self._create_gnu_long_header(info["linkname"], GNUTYPE_LONGLINK)
1021
1022 if len(info["name"]) > LENGTH_NAME:
1023 buf += self._create_gnu_long_header(info["name"], GNUTYPE_LONGNAME)
1024
1025 return buf + self._create_header(info, GNU_FORMAT)
1026
Guido van Rossume7ba4952007-06-06 23:52:48 +00001027 def create_pax_header(self, info, encoding, errors):
Guido van Rossumd8faa362007-04-27 19:54:29 +00001028 """Return the object as a ustar header block. If it cannot be
1029 represented this way, prepend a pax extended header sequence
1030 with supplement information.
1031 """
Guido van Rossumd8faa362007-04-27 19:54:29 +00001032 info["magic"] = POSIX_MAGIC
1033 pax_headers = self.pax_headers.copy()
1034
1035 # Test string fields for values that exceed the field length or cannot
1036 # be represented in ASCII encoding.
1037 for name, hname, length in (
1038 ("name", "path", LENGTH_NAME), ("linkname", "linkpath", LENGTH_LINK),
1039 ("uname", "uname", 32), ("gname", "gname", 32)):
1040
Guido van Rossume7ba4952007-06-06 23:52:48 +00001041 if hname in pax_headers:
1042 # The pax header has priority.
1043 continue
1044
1045 val = info[name].decode(encoding, errors)
Guido van Rossumd8faa362007-04-27 19:54:29 +00001046
1047 # Try to encode the string as ASCII.
1048 try:
1049 val.encode("ascii")
1050 except UnicodeEncodeError:
1051 pax_headers[hname] = val
1052 continue
1053
Guido van Rossume7ba4952007-06-06 23:52:48 +00001054 if len(info[name]) > length:
1055 pax_headers[hname] = val
Guido van Rossumd8faa362007-04-27 19:54:29 +00001056
1057 # Test number fields for values that exceed the field limit or values
1058 # that like to be stored as float.
1059 for name, digits in (("uid", 8), ("gid", 8), ("size", 12), ("mtime", 12)):
Guido van Rossume7ba4952007-06-06 23:52:48 +00001060 if name in pax_headers:
1061 # The pax header has priority. Avoid overflow.
1062 info[name] = 0
1063 continue
1064
Guido van Rossumd8faa362007-04-27 19:54:29 +00001065 val = info[name]
1066 if not 0 <= val < 8 ** (digits - 1) or isinstance(val, float):
Guido van Rossumef87d6e2007-05-02 19:09:54 +00001067 pax_headers[name] = str(val)
Guido van Rossumd8faa362007-04-27 19:54:29 +00001068 info[name] = 0
1069
Guido van Rossume7ba4952007-06-06 23:52:48 +00001070 # Create a pax extended header if necessary.
Guido van Rossumd8faa362007-04-27 19:54:29 +00001071 if pax_headers:
1072 buf = self._create_pax_generic_header(pax_headers)
1073 else:
1074 buf = ""
1075
1076 return buf + self._create_header(info, USTAR_FORMAT)
1077
1078 @classmethod
Guido van Rossume7ba4952007-06-06 23:52:48 +00001079 def create_pax_global_header(cls, pax_headers):
Guido van Rossumd8faa362007-04-27 19:54:29 +00001080 """Return the object as a pax global header block sequence.
1081 """
Guido van Rossume7ba4952007-06-06 23:52:48 +00001082 return cls._create_pax_generic_header(pax_headers, type=XGLTYPE)
Guido van Rossumd8faa362007-04-27 19:54:29 +00001083
1084 def _posix_split_name(self, name):
1085 """Split a name longer than 100 chars into a prefix
1086 and a name part.
1087 """
1088 prefix = name[:LENGTH_PREFIX + 1]
1089 while prefix and prefix[-1] != "/":
1090 prefix = prefix[:-1]
1091
1092 name = name[len(prefix):]
1093 prefix = prefix[:-1]
1094
1095 if not prefix or len(name) > LENGTH_NAME:
1096 raise ValueError("name is too long")
1097 return prefix, name
1098
1099 @staticmethod
1100 def _create_header(info, format):
1101 """Return a header block. info is a dictionary with file
1102 information, format must be one of the *_FORMAT constants.
1103 """
1104 parts = [
1105 stn(info.get("name", ""), 100),
Guido van Rossumcd16bf62007-06-13 18:07:49 +00001106 itn(info.get("mode", 0) & 0o7777, 8, format),
Guido van Rossumd8faa362007-04-27 19:54:29 +00001107 itn(info.get("uid", 0), 8, format),
1108 itn(info.get("gid", 0), 8, format),
1109 itn(info.get("size", 0), 12, format),
1110 itn(info.get("mtime", 0), 12, format),
1111 " ", # checksum field
1112 info.get("type", REGTYPE),
1113 stn(info.get("linkname", ""), 100),
Guido van Rossume7ba4952007-06-06 23:52:48 +00001114 stn(info.get("magic", POSIX_MAGIC), 8),
1115 stn(info.get("uname", "root"), 32),
1116 stn(info.get("gname", "root"), 32),
Guido van Rossumd8faa362007-04-27 19:54:29 +00001117 itn(info.get("devmajor", 0), 8, format),
1118 itn(info.get("devminor", 0), 8, format),
1119 stn(info.get("prefix", ""), 155)
1120 ]
1121
1122 buf = struct.pack("%ds" % BLOCKSIZE, "".join(parts))
1123 chksum = calc_chksums(buf[-BLOCKSIZE:])[0]
1124 buf = buf[:-364] + "%06o\0" % chksum + buf[-357:]
1125 return buf
1126
1127 @staticmethod
1128 def _create_payload(payload):
1129 """Return the string payload filled with zero bytes
1130 up to the next 512 byte border.
1131 """
1132 blocks, remainder = divmod(len(payload), BLOCKSIZE)
1133 if remainder > 0:
1134 payload += (BLOCKSIZE - remainder) * NUL
1135 return payload
1136
1137 @classmethod
1138 def _create_gnu_long_header(cls, name, type):
1139 """Return a GNUTYPE_LONGNAME or GNUTYPE_LONGLINK sequence
1140 for name.
1141 """
1142 name += NUL
1143
1144 info = {}
1145 info["name"] = "././@LongLink"
1146 info["type"] = type
1147 info["size"] = len(name)
1148 info["magic"] = GNU_MAGIC
1149
1150 # create extended header + name blocks.
1151 return cls._create_header(info, USTAR_FORMAT) + \
1152 cls._create_payload(name)
1153
1154 @classmethod
1155 def _create_pax_generic_header(cls, pax_headers, type=XHDTYPE):
1156 """Return a POSIX.1-2001 extended or global header sequence
1157 that contains a list of keyword, value pairs. The values
1158 must be unicode objects.
1159 """
1160 records = []
1161 for keyword, value in pax_headers.items():
1162 keyword = keyword.encode("utf8")
1163 value = value.encode("utf8")
1164 l = len(keyword) + len(value) + 3 # ' ' + '=' + '\n'
1165 n = p = 0
1166 while True:
1167 n = l + len(str(p))
1168 if n == p:
1169 break
1170 p = n
1171 records.append("%d %s=%s\n" % (p, keyword, value))
1172 records = "".join(records)
1173
1174 # We use a hardcoded "././@PaxHeader" name like star does
1175 # instead of the one that POSIX recommends.
1176 info = {}
1177 info["name"] = "././@PaxHeader"
1178 info["type"] = type
1179 info["size"] = len(records)
1180 info["magic"] = POSIX_MAGIC
1181
1182 # Create pax header + record blocks.
1183 return cls._create_header(info, USTAR_FORMAT) + \
1184 cls._create_payload(records)
1185
Guido van Rossum75b64e62005-01-16 00:16:11 +00001186 @classmethod
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001187 def frombuf(cls, buf):
1188 """Construct a TarInfo object from a 512 byte string buffer.
1189 """
Thomas Wouters477c8d52006-05-27 19:21:47 +00001190 if len(buf) != BLOCKSIZE:
Thomas Wouters902d6eb2007-01-09 23:18:33 +00001191 raise HeaderError("truncated header")
Thomas Wouters477c8d52006-05-27 19:21:47 +00001192 if buf.count(NUL) == BLOCKSIZE:
Thomas Wouters902d6eb2007-01-09 23:18:33 +00001193 raise HeaderError("empty header")
1194
1195 chksum = nti(buf[148:156])
1196 if chksum not in calc_chksums(buf):
1197 raise HeaderError("bad checksum")
Thomas Wouters477c8d52006-05-27 19:21:47 +00001198
Guido van Rossumd8faa362007-04-27 19:54:29 +00001199 obj = cls()
1200 obj.buf = buf
1201 obj.name = nts(buf[0:100])
1202 obj.mode = nti(buf[100:108])
1203 obj.uid = nti(buf[108:116])
1204 obj.gid = nti(buf[116:124])
1205 obj.size = nti(buf[124:136])
1206 obj.mtime = nti(buf[136:148])
1207 obj.chksum = chksum
1208 obj.type = buf[156:157]
1209 obj.linkname = nts(buf[157:257])
1210 obj.uname = nts(buf[265:297])
1211 obj.gname = nts(buf[297:329])
1212 obj.devmajor = nti(buf[329:337])
1213 obj.devminor = nti(buf[337:345])
1214 prefix = nts(buf[345:500])
Thomas Wouters89f507f2006-12-13 04:49:30 +00001215
Guido van Rossumd8faa362007-04-27 19:54:29 +00001216 # Old V7 tar format represents a directory as a regular
1217 # file with a trailing slash.
1218 if obj.type == AREGTYPE and obj.name.endswith("/"):
1219 obj.type = DIRTYPE
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001220
Guido van Rossumd8faa362007-04-27 19:54:29 +00001221 # Remove redundant slashes from directories.
1222 if obj.isdir():
1223 obj.name = obj.name.rstrip("/")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001224
Guido van Rossumd8faa362007-04-27 19:54:29 +00001225 # Reconstruct a ustar longname.
1226 if prefix and obj.type not in GNU_TYPES:
1227 obj.name = prefix + "/" + obj.name
1228 return obj
1229
1230 @classmethod
1231 def fromtarfile(cls, tarfile):
1232 """Return the next TarInfo object from TarFile object
1233 tarfile.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001234 """
Guido van Rossumd8faa362007-04-27 19:54:29 +00001235 buf = tarfile.fileobj.read(BLOCKSIZE)
1236 if not buf:
1237 return
1238 obj = cls.frombuf(buf)
1239 obj.offset = tarfile.fileobj.tell() - BLOCKSIZE
1240 return obj._proc_member(tarfile)
Thomas Wouters89f507f2006-12-13 04:49:30 +00001241
Guido van Rossumd8faa362007-04-27 19:54:29 +00001242 #--------------------------------------------------------------------------
1243 # The following are methods that are called depending on the type of a
1244 # member. The entry point is _proc_member() which can be overridden in a
1245 # subclass to add custom _proc_*() methods. A _proc_*() method MUST
1246 # implement the following
1247 # operations:
1248 # 1. Set self.offset_data to the position where the data blocks begin,
1249 # if there is data that follows.
1250 # 2. Set tarfile.offset to the position where the next member's header will
1251 # begin.
1252 # 3. Return self or another valid TarInfo object.
1253 def _proc_member(self, tarfile):
1254 """Choose the right processing method depending on
1255 the type and call it.
Thomas Wouters89f507f2006-12-13 04:49:30 +00001256 """
Guido van Rossumd8faa362007-04-27 19:54:29 +00001257 if self.type in (GNUTYPE_LONGNAME, GNUTYPE_LONGLINK):
1258 return self._proc_gnulong(tarfile)
1259 elif self.type == GNUTYPE_SPARSE:
1260 return self._proc_sparse(tarfile)
1261 elif self.type in (XHDTYPE, XGLTYPE, SOLARIS_XHDTYPE):
1262 return self._proc_pax(tarfile)
1263 else:
1264 return self._proc_builtin(tarfile)
Thomas Wouters89f507f2006-12-13 04:49:30 +00001265
Guido van Rossumd8faa362007-04-27 19:54:29 +00001266 def _proc_builtin(self, tarfile):
1267 """Process a builtin type or an unknown type which
1268 will be treated as a regular file.
1269 """
1270 self.offset_data = tarfile.fileobj.tell()
1271 offset = self.offset_data
1272 if self.isreg() or self.type not in SUPPORTED_TYPES:
1273 # Skip the following data blocks.
1274 offset += self._block(self.size)
1275 tarfile.offset = offset
Thomas Wouters89f507f2006-12-13 04:49:30 +00001276
Guido van Rossume7ba4952007-06-06 23:52:48 +00001277 # Patch the TarInfo object with saved global
Guido van Rossumd8faa362007-04-27 19:54:29 +00001278 # header information.
Guido van Rossume7ba4952007-06-06 23:52:48 +00001279 self._apply_pax_info(tarfile.pax_headers, tarfile.encoding, tarfile.errors)
Guido van Rossumd8faa362007-04-27 19:54:29 +00001280
1281 return self
1282
1283 def _proc_gnulong(self, tarfile):
1284 """Process the blocks that hold a GNU longname
1285 or longlink member.
1286 """
1287 buf = tarfile.fileobj.read(self._block(self.size))
1288
1289 # Fetch the next header and process it.
Guido van Rossume7ba4952007-06-06 23:52:48 +00001290 next = self.fromtarfile(tarfile)
1291 if next is None:
1292 raise HeaderError("missing subsequent header")
Guido van Rossumd8faa362007-04-27 19:54:29 +00001293
1294 # Patch the TarInfo object from the next header with
1295 # the longname information.
1296 next.offset = self.offset
1297 if self.type == GNUTYPE_LONGNAME:
Guido van Rossume7ba4952007-06-06 23:52:48 +00001298 next.name = nts(buf)
Guido van Rossumd8faa362007-04-27 19:54:29 +00001299 elif self.type == GNUTYPE_LONGLINK:
Guido van Rossume7ba4952007-06-06 23:52:48 +00001300 next.linkname = nts(buf)
Guido van Rossumd8faa362007-04-27 19:54:29 +00001301
1302 return next
1303
1304 def _proc_sparse(self, tarfile):
1305 """Process a GNU sparse header plus extra headers.
1306 """
1307 buf = self.buf
1308 sp = _ringbuffer()
1309 pos = 386
1310 lastpos = 0
1311 realpos = 0
1312 # There are 4 possible sparse structs in the
1313 # first header.
Guido van Rossum805365e2007-05-07 22:24:25 +00001314 for i in range(4):
Guido van Rossumd8faa362007-04-27 19:54:29 +00001315 try:
1316 offset = nti(buf[pos:pos + 12])
1317 numbytes = nti(buf[pos + 12:pos + 24])
1318 except ValueError:
1319 break
1320 if offset > lastpos:
1321 sp.append(_hole(lastpos, offset - lastpos))
1322 sp.append(_data(offset, numbytes, realpos))
1323 realpos += numbytes
1324 lastpos = offset + numbytes
1325 pos += 24
1326
1327 isextended = ord(buf[482])
1328 origsize = nti(buf[483:495])
1329
1330 # If the isextended flag is given,
1331 # there are extra headers to process.
1332 while isextended == 1:
1333 buf = tarfile.fileobj.read(BLOCKSIZE)
1334 pos = 0
Guido van Rossum805365e2007-05-07 22:24:25 +00001335 for i in range(21):
Guido van Rossumd8faa362007-04-27 19:54:29 +00001336 try:
1337 offset = nti(buf[pos:pos + 12])
1338 numbytes = nti(buf[pos + 12:pos + 24])
1339 except ValueError:
1340 break
1341 if offset > lastpos:
1342 sp.append(_hole(lastpos, offset - lastpos))
1343 sp.append(_data(offset, numbytes, realpos))
1344 realpos += numbytes
1345 lastpos = offset + numbytes
1346 pos += 24
1347 isextended = ord(buf[504])
1348
1349 if lastpos < origsize:
1350 sp.append(_hole(lastpos, origsize - lastpos))
1351
1352 self.sparse = sp
1353
1354 self.offset_data = tarfile.fileobj.tell()
1355 tarfile.offset = self.offset_data + self._block(self.size)
1356 self.size = origsize
1357
1358 return self
1359
1360 def _proc_pax(self, tarfile):
1361 """Process an extended or global header as described in
1362 POSIX.1-2001.
1363 """
1364 # Read the header information.
1365 buf = tarfile.fileobj.read(self._block(self.size))
1366
1367 # A pax header stores supplemental information for either
1368 # the following file (extended) or all following files
1369 # (global).
1370 if self.type == XGLTYPE:
1371 pax_headers = tarfile.pax_headers
1372 else:
1373 pax_headers = tarfile.pax_headers.copy()
1374
Guido van Rossumd8faa362007-04-27 19:54:29 +00001375 # Parse pax header information. A record looks like that:
1376 # "%d %s=%s\n" % (length, keyword, value). length is the size
1377 # of the complete record including the length field itself and
Guido van Rossume7ba4952007-06-06 23:52:48 +00001378 # the newline. keyword and value are both UTF-8 encoded strings.
Guido van Rossumd8faa362007-04-27 19:54:29 +00001379 regex = re.compile(r"(\d+) ([^=]+)=", re.U)
1380 pos = 0
1381 while True:
1382 match = regex.match(buf, pos)
1383 if not match:
1384 break
1385
1386 length, keyword = match.groups()
1387 length = int(length)
1388 value = buf[match.end(2) + 1:match.start(1) + length - 1]
1389
1390 keyword = keyword.decode("utf8")
Guido van Rossumd8faa362007-04-27 19:54:29 +00001391 value = value.decode("utf8")
Guido van Rossumd8faa362007-04-27 19:54:29 +00001392
1393 pax_headers[keyword] = value
1394 pos += length
1395
Guido van Rossume7ba4952007-06-06 23:52:48 +00001396 # Fetch the next header.
1397 next = self.fromtarfile(tarfile)
Guido van Rossumd8faa362007-04-27 19:54:29 +00001398
Guido van Rossume7ba4952007-06-06 23:52:48 +00001399 if self.type in (XHDTYPE, SOLARIS_XHDTYPE):
1400 if next is None:
1401 raise HeaderError("missing subsequent header")
Guido van Rossumd8faa362007-04-27 19:54:29 +00001402
Guido van Rossume7ba4952007-06-06 23:52:48 +00001403 # Patch the TarInfo object with the extended header info.
1404 next._apply_pax_info(pax_headers, tarfile.encoding, tarfile.errors)
1405 next.offset = self.offset
1406
1407 if "size" in pax_headers:
1408 # If the extended header replaces the size field,
1409 # we need to recalculate the offset where the next
1410 # header starts.
1411 offset = next.offset_data
1412 if next.isreg() or next.type not in SUPPORTED_TYPES:
1413 offset += next._block(next.size)
1414 tarfile.offset = offset
1415
1416 return next
1417
1418 def _apply_pax_info(self, pax_headers, encoding, errors):
1419 """Replace fields with supplemental information from a previous
1420 pax extended or global header.
1421 """
1422 for keyword, value in pax_headers.items():
1423 if keyword not in PAX_FIELDS:
1424 continue
1425
1426 if keyword == "path":
1427 value = value.rstrip("/")
1428
1429 if keyword in PAX_NUMBER_FIELDS:
1430 try:
1431 value = PAX_NUMBER_FIELDS[keyword](value)
1432 except ValueError:
1433 value = 0
1434 else:
1435 value = uts(value, encoding, errors)
1436
1437 setattr(self, keyword, value)
1438
1439 self.pax_headers = pax_headers.copy()
Guido van Rossumd8faa362007-04-27 19:54:29 +00001440
1441 def _block(self, count):
1442 """Round up a byte count by BLOCKSIZE and return it,
1443 e.g. _block(834) => 1024.
1444 """
1445 blocks, remainder = divmod(count, BLOCKSIZE)
1446 if remainder:
1447 blocks += 1
1448 return blocks * BLOCKSIZE
Thomas Wouters89f507f2006-12-13 04:49:30 +00001449
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001450 def isreg(self):
1451 return self.type in REGULAR_TYPES
1452 def isfile(self):
1453 return self.isreg()
1454 def isdir(self):
1455 return self.type == DIRTYPE
1456 def issym(self):
1457 return self.type == SYMTYPE
1458 def islnk(self):
1459 return self.type == LNKTYPE
1460 def ischr(self):
1461 return self.type == CHRTYPE
1462 def isblk(self):
1463 return self.type == BLKTYPE
1464 def isfifo(self):
1465 return self.type == FIFOTYPE
1466 def issparse(self):
1467 return self.type == GNUTYPE_SPARSE
1468 def isdev(self):
1469 return self.type in (CHRTYPE, BLKTYPE, FIFOTYPE)
1470# class TarInfo
1471
1472class TarFile(object):
1473 """The TarFile Class provides an interface to tar archives.
1474 """
1475
1476 debug = 0 # May be set from 0 (no msgs) to 3 (all msgs)
1477
1478 dereference = False # If true, add content of linked file to the
1479 # tar file, else the link.
1480
1481 ignore_zeros = False # If true, skips empty or invalid blocks and
1482 # continues processing.
1483
1484 errorlevel = 0 # If 0, fatal errors only appear in debug
1485 # messages (if debug >= 0). If > 0, errors
1486 # are passed to the caller as exceptions.
1487
Guido van Rossumd8faa362007-04-27 19:54:29 +00001488 format = DEFAULT_FORMAT # The format to use when creating an archive.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001489
Guido van Rossume7ba4952007-06-06 23:52:48 +00001490 encoding = ENCODING # Encoding for 8-bit character strings.
1491
1492 errors = None # Error handler for unicode conversion.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001493
Guido van Rossumd8faa362007-04-27 19:54:29 +00001494 tarinfo = TarInfo # The default TarInfo class to use.
1495
1496 fileobject = ExFileObject # The default ExFileObject class to use.
1497
1498 def __init__(self, name=None, mode="r", fileobj=None, format=None,
1499 tarinfo=None, dereference=None, ignore_zeros=None, encoding=None,
Guido van Rossume7ba4952007-06-06 23:52:48 +00001500 errors=None, pax_headers=None, debug=None, errorlevel=None):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001501 """Open an (uncompressed) tar archive `name'. `mode' is either 'r' to
1502 read from an existing archive, 'a' to append data to an existing
1503 file or 'w' to create a new file overwriting an existing one. `mode'
1504 defaults to 'r'.
1505 If `fileobj' is given, it is used for reading or writing data. If it
1506 can be determined, `mode' is overridden by `fileobj's mode.
1507 `fileobj' is not closed, when TarFile is closed.
1508 """
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001509 if len(mode) > 1 or mode not in "raw":
Thomas Wouters477c8d52006-05-27 19:21:47 +00001510 raise ValueError("mode must be 'r', 'a' or 'w'")
Guido van Rossumd8faa362007-04-27 19:54:29 +00001511 self.mode = mode
1512 self._mode = {"r": "rb", "a": "r+b", "w": "wb"}[mode]
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001513
1514 if not fileobj:
Guido van Rossumd8faa362007-04-27 19:54:29 +00001515 if self.mode == "a" and not os.path.exists(name):
Thomas Wouterscf297e42007-02-23 15:07:44 +00001516 # Create nonexistent files in append mode.
Guido van Rossumd8faa362007-04-27 19:54:29 +00001517 self.mode = "w"
1518 self._mode = "wb"
Guido van Rossume7ba4952007-06-06 23:52:48 +00001519 fileobj = bltn_open(name, self._mode)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001520 self._extfileobj = False
1521 else:
Guido van Rossumd8faa362007-04-27 19:54:29 +00001522 if name is None and hasattr(fileobj, "name"):
1523 name = fileobj.name
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001524 if hasattr(fileobj, "mode"):
Guido van Rossumd8faa362007-04-27 19:54:29 +00001525 self._mode = fileobj.mode
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001526 self._extfileobj = True
Guido van Rossumd8faa362007-04-27 19:54:29 +00001527 self.name = os.path.abspath(name)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001528 self.fileobj = fileobj
1529
Guido van Rossumd8faa362007-04-27 19:54:29 +00001530 # Init attributes.
1531 if format is not None:
1532 self.format = format
1533 if tarinfo is not None:
1534 self.tarinfo = tarinfo
1535 if dereference is not None:
1536 self.dereference = dereference
1537 if ignore_zeros is not None:
1538 self.ignore_zeros = ignore_zeros
1539 if encoding is not None:
1540 self.encoding = encoding
Guido van Rossume7ba4952007-06-06 23:52:48 +00001541
1542 if errors is not None:
1543 self.errors = errors
1544 elif mode == "r":
1545 self.errors = "utf-8"
1546 else:
1547 self.errors = "strict"
1548
1549 if pax_headers is not None and self.format == PAX_FORMAT:
1550 self.pax_headers = pax_headers
1551 else:
1552 self.pax_headers = {}
1553
Guido van Rossumd8faa362007-04-27 19:54:29 +00001554 if debug is not None:
1555 self.debug = debug
1556 if errorlevel is not None:
1557 self.errorlevel = errorlevel
1558
1559 # Init datastructures.
Thomas Wouters477c8d52006-05-27 19:21:47 +00001560 self.closed = False
1561 self.members = [] # list of members as TarInfo objects
1562 self._loaded = False # flag if all members have been read
Guido van Rossume2a383d2007-01-15 16:59:06 +00001563 self.offset = 0 # current position in the archive file
Thomas Wouters477c8d52006-05-27 19:21:47 +00001564 self.inodes = {} # dictionary caching the inodes of
1565 # archive members already added
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001566
Guido van Rossumd8faa362007-04-27 19:54:29 +00001567 if self.mode == "r":
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001568 self.firstmember = None
1569 self.firstmember = self.next()
1570
Guido van Rossumd8faa362007-04-27 19:54:29 +00001571 if self.mode == "a":
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001572 # Move to the end of the archive,
1573 # before the first empty block.
1574 self.firstmember = None
1575 while True:
Guido van Rossumd8faa362007-04-27 19:54:29 +00001576 if self.next() is None:
Thomas Wouterscf297e42007-02-23 15:07:44 +00001577 if self.offset > 0:
1578 self.fileobj.seek(- BLOCKSIZE, 1)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001579 break
1580
Guido van Rossumd8faa362007-04-27 19:54:29 +00001581 if self.mode in "aw":
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001582 self._loaded = True
1583
Guido van Rossume7ba4952007-06-06 23:52:48 +00001584 if self.pax_headers:
1585 buf = self.tarinfo.create_pax_global_header(self.pax_headers.copy())
Guido van Rossumd8faa362007-04-27 19:54:29 +00001586 self.fileobj.write(buf)
1587 self.offset += len(buf)
1588
1589 def _getposix(self):
1590 return self.format == USTAR_FORMAT
1591 def _setposix(self, value):
1592 import warnings
1593 warnings.warn("use the format attribute instead", DeprecationWarning)
1594 if value:
1595 self.format = USTAR_FORMAT
1596 else:
1597 self.format = GNU_FORMAT
1598 posix = property(_getposix, _setposix)
1599
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001600 #--------------------------------------------------------------------------
1601 # Below are the classmethods which act as alternate constructors to the
1602 # TarFile class. The open() method is the only one that is needed for
1603 # public use; it is the "super"-constructor and is able to select an
1604 # adequate "sub"-constructor for a particular compression using the mapping
1605 # from OPEN_METH.
1606 #
1607 # This concept allows one to subclass TarFile without losing the comfort of
1608 # the super-constructor. A sub-constructor is registered and made available
1609 # by adding it to the mapping in OPEN_METH.
1610
Guido van Rossum75b64e62005-01-16 00:16:11 +00001611 @classmethod
Guido van Rossumd8faa362007-04-27 19:54:29 +00001612 def open(cls, name=None, mode="r", fileobj=None, bufsize=RECORDSIZE, **kwargs):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001613 """Open a tar archive for reading, writing or appending. Return
1614 an appropriate TarFile class.
1615
1616 mode:
Martin v. Löwis78be7df2005-03-05 12:47:42 +00001617 'r' or 'r:*' open for reading with transparent compression
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001618 'r:' open for reading exclusively uncompressed
1619 'r:gz' open for reading with gzip compression
1620 'r:bz2' open for reading with bzip2 compression
Thomas Wouterscf297e42007-02-23 15:07:44 +00001621 'a' or 'a:' open for appending, creating the file if necessary
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001622 'w' or 'w:' open for writing without compression
1623 'w:gz' open for writing with gzip compression
1624 'w:bz2' open for writing with bzip2 compression
Martin v. Löwis78be7df2005-03-05 12:47:42 +00001625
1626 'r|*' open a stream of tar blocks with transparent compression
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001627 'r|' open an uncompressed stream of tar blocks for reading
1628 'r|gz' open a gzip compressed stream of tar blocks
1629 'r|bz2' open a bzip2 compressed stream of tar blocks
1630 'w|' open an uncompressed stream for writing
1631 'w|gz' open a gzip compressed stream for writing
1632 'w|bz2' open a bzip2 compressed stream for writing
1633 """
1634
1635 if not name and not fileobj:
Thomas Wouters477c8d52006-05-27 19:21:47 +00001636 raise ValueError("nothing to open")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001637
Martin v. Löwis78be7df2005-03-05 12:47:42 +00001638 if mode in ("r", "r:*"):
1639 # Find out which *open() is appropriate for opening the file.
1640 for comptype in cls.OPEN_METH:
1641 func = getattr(cls, cls.OPEN_METH[comptype])
Thomas Wouters902d6eb2007-01-09 23:18:33 +00001642 if fileobj is not None:
1643 saved_pos = fileobj.tell()
Martin v. Löwis78be7df2005-03-05 12:47:42 +00001644 try:
Guido van Rossumd8faa362007-04-27 19:54:29 +00001645 return func(name, "r", fileobj, **kwargs)
1646 except (ReadError, CompressionError) as e:
Thomas Wouters902d6eb2007-01-09 23:18:33 +00001647 if fileobj is not None:
1648 fileobj.seek(saved_pos)
Martin v. Löwis78be7df2005-03-05 12:47:42 +00001649 continue
Thomas Wouters477c8d52006-05-27 19:21:47 +00001650 raise ReadError("file could not be opened successfully")
Martin v. Löwis78be7df2005-03-05 12:47:42 +00001651
1652 elif ":" in mode:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001653 filemode, comptype = mode.split(":", 1)
1654 filemode = filemode or "r"
1655 comptype = comptype or "tar"
1656
1657 # Select the *open() function according to
1658 # given compression.
1659 if comptype in cls.OPEN_METH:
1660 func = getattr(cls, cls.OPEN_METH[comptype])
1661 else:
Thomas Wouters477c8d52006-05-27 19:21:47 +00001662 raise CompressionError("unknown compression type %r" % comptype)
Guido van Rossumd8faa362007-04-27 19:54:29 +00001663 return func(name, filemode, fileobj, **kwargs)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001664
1665 elif "|" in mode:
1666 filemode, comptype = mode.split("|", 1)
1667 filemode = filemode or "r"
1668 comptype = comptype or "tar"
1669
1670 if filemode not in "rw":
Thomas Wouters477c8d52006-05-27 19:21:47 +00001671 raise ValueError("mode must be 'r' or 'w'")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001672
1673 t = cls(name, filemode,
Guido van Rossumd8faa362007-04-27 19:54:29 +00001674 _Stream(name, filemode, comptype, fileobj, bufsize),
1675 **kwargs)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001676 t._extfileobj = False
1677 return t
1678
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001679 elif mode in "aw":
Guido van Rossumd8faa362007-04-27 19:54:29 +00001680 return cls.taropen(name, mode, fileobj, **kwargs)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001681
Thomas Wouters477c8d52006-05-27 19:21:47 +00001682 raise ValueError("undiscernible mode")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001683
Guido van Rossum75b64e62005-01-16 00:16:11 +00001684 @classmethod
Guido van Rossumd8faa362007-04-27 19:54:29 +00001685 def taropen(cls, name, mode="r", fileobj=None, **kwargs):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001686 """Open uncompressed tar archive name for reading or writing.
1687 """
1688 if len(mode) > 1 or mode not in "raw":
Thomas Wouters477c8d52006-05-27 19:21:47 +00001689 raise ValueError("mode must be 'r', 'a' or 'w'")
Guido van Rossumd8faa362007-04-27 19:54:29 +00001690 return cls(name, mode, fileobj, **kwargs)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001691
Guido van Rossum75b64e62005-01-16 00:16:11 +00001692 @classmethod
Guido van Rossumd8faa362007-04-27 19:54:29 +00001693 def gzopen(cls, name, mode="r", fileobj=None, compresslevel=9, **kwargs):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001694 """Open gzip compressed tar archive name for reading or writing.
1695 Appending is not allowed.
1696 """
1697 if len(mode) > 1 or mode not in "rw":
Thomas Wouters477c8d52006-05-27 19:21:47 +00001698 raise ValueError("mode must be 'r' or 'w'")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001699
1700 try:
1701 import gzip
Neal Norwitz4ec68242003-04-11 03:05:56 +00001702 gzip.GzipFile
1703 except (ImportError, AttributeError):
Thomas Wouters477c8d52006-05-27 19:21:47 +00001704 raise CompressionError("gzip module is not available")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001705
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001706 if fileobj is None:
Guido van Rossume7ba4952007-06-06 23:52:48 +00001707 fileobj = bltn_open(name, mode + "b")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001708
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001709 try:
Thomas Wouters902d6eb2007-01-09 23:18:33 +00001710 t = cls.taropen(name, mode,
Guido van Rossumd8faa362007-04-27 19:54:29 +00001711 gzip.GzipFile(name, mode, compresslevel, fileobj),
1712 **kwargs)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001713 except IOError:
Thomas Wouters477c8d52006-05-27 19:21:47 +00001714 raise ReadError("not a gzip file")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001715 t._extfileobj = False
1716 return t
1717
Guido van Rossum75b64e62005-01-16 00:16:11 +00001718 @classmethod
Guido van Rossumd8faa362007-04-27 19:54:29 +00001719 def bz2open(cls, name, mode="r", fileobj=None, compresslevel=9, **kwargs):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001720 """Open bzip2 compressed tar archive name for reading or writing.
1721 Appending is not allowed.
1722 """
1723 if len(mode) > 1 or mode not in "rw":
Thomas Wouters477c8d52006-05-27 19:21:47 +00001724 raise ValueError("mode must be 'r' or 'w'.")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001725
1726 try:
1727 import bz2
1728 except ImportError:
Thomas Wouters477c8d52006-05-27 19:21:47 +00001729 raise CompressionError("bz2 module is not available")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001730
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001731 if fileobj is not None:
Thomas Wouters477c8d52006-05-27 19:21:47 +00001732 fileobj = _BZ2Proxy(fileobj, mode)
1733 else:
1734 fileobj = bz2.BZ2File(name, mode, compresslevel=compresslevel)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001735
1736 try:
Guido van Rossumd8faa362007-04-27 19:54:29 +00001737 t = cls.taropen(name, mode, fileobj, **kwargs)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001738 except IOError:
Thomas Wouters477c8d52006-05-27 19:21:47 +00001739 raise ReadError("not a bzip2 file")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001740 t._extfileobj = False
1741 return t
1742
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001743 # All *open() methods are registered here.
1744 OPEN_METH = {
1745 "tar": "taropen", # uncompressed tar
1746 "gz": "gzopen", # gzip compressed tar
1747 "bz2": "bz2open" # bzip2 compressed tar
1748 }
1749
1750 #--------------------------------------------------------------------------
1751 # The public methods which TarFile provides:
1752
1753 def close(self):
1754 """Close the TarFile. In write-mode, two finishing zero blocks are
1755 appended to the archive.
1756 """
1757 if self.closed:
1758 return
1759
Guido van Rossumd8faa362007-04-27 19:54:29 +00001760 if self.mode in "aw":
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001761 self.fileobj.write(NUL * (BLOCKSIZE * 2))
1762 self.offset += (BLOCKSIZE * 2)
1763 # fill up the end with zero-blocks
1764 # (like option -b20 for tar does)
1765 blocks, remainder = divmod(self.offset, RECORDSIZE)
1766 if remainder > 0:
1767 self.fileobj.write(NUL * (RECORDSIZE - remainder))
1768
1769 if not self._extfileobj:
1770 self.fileobj.close()
1771 self.closed = True
1772
1773 def getmember(self, name):
1774 """Return a TarInfo object for member `name'. If `name' can not be
1775 found in the archive, KeyError is raised. If a member occurs more
1776 than once in the archive, its last occurence is assumed to be the
1777 most up-to-date version.
1778 """
Martin v. Löwisf3c56112004-09-18 09:08:52 +00001779 tarinfo = self._getmember(name)
1780 if tarinfo is None:
Thomas Wouters477c8d52006-05-27 19:21:47 +00001781 raise KeyError("filename %r not found" % name)
Martin v. Löwisf3c56112004-09-18 09:08:52 +00001782 return tarinfo
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001783
1784 def getmembers(self):
1785 """Return the members of the archive as a list of TarInfo objects. The
1786 list has the same order as the members in the archive.
1787 """
1788 self._check()
1789 if not self._loaded: # if we want to obtain a list of
1790 self._load() # all members, we first have to
1791 # scan the whole archive.
1792 return self.members
1793
1794 def getnames(self):
1795 """Return the members of the archive as a list of their names. It has
1796 the same order as the list returned by getmembers().
1797 """
Martin v. Löwisf3c56112004-09-18 09:08:52 +00001798 return [tarinfo.name for tarinfo in self.getmembers()]
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001799
1800 def gettarinfo(self, name=None, arcname=None, fileobj=None):
1801 """Create a TarInfo object for either the file `name' or the file
1802 object `fileobj' (using os.fstat on its file descriptor). You can
1803 modify some of the TarInfo's attributes before you add it using
1804 addfile(). If given, `arcname' specifies an alternative name for the
1805 file in the archive.
1806 """
1807 self._check("aw")
1808
1809 # When fileobj is given, replace name by
1810 # fileobj's real name.
1811 if fileobj is not None:
1812 name = fileobj.name
1813
1814 # Building the name of the member in the archive.
1815 # Backward slashes are converted to forward slashes,
1816 # Absolute paths are turned to relative paths.
1817 if arcname is None:
1818 arcname = name
1819 arcname = normpath(arcname)
1820 drv, arcname = os.path.splitdrive(arcname)
1821 while arcname[0:1] == "/":
1822 arcname = arcname[1:]
1823
1824 # Now, fill the TarInfo object with
1825 # information specific for the file.
Guido van Rossumd8faa362007-04-27 19:54:29 +00001826 tarinfo = self.tarinfo()
1827 tarinfo.tarfile = self
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001828
1829 # Use os.stat or os.lstat, depending on platform
1830 # and if symlinks shall be resolved.
1831 if fileobj is None:
1832 if hasattr(os, "lstat") and not self.dereference:
1833 statres = os.lstat(name)
1834 else:
1835 statres = os.stat(name)
1836 else:
1837 statres = os.fstat(fileobj.fileno())
1838 linkname = ""
1839
1840 stmd = statres.st_mode
1841 if stat.S_ISREG(stmd):
1842 inode = (statres.st_ino, statres.st_dev)
Guido van Rossumd8faa362007-04-27 19:54:29 +00001843 if not self.dereference and statres.st_nlink > 1 and \
1844 inode in self.inodes and arcname != self.inodes[inode]:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001845 # Is it a hardlink to an already
1846 # archived file?
1847 type = LNKTYPE
1848 linkname = self.inodes[inode]
1849 else:
1850 # The inode is added only if its valid.
1851 # For win32 it is always 0.
1852 type = REGTYPE
1853 if inode[0]:
1854 self.inodes[inode] = arcname
1855 elif stat.S_ISDIR(stmd):
1856 type = DIRTYPE
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001857 elif stat.S_ISFIFO(stmd):
1858 type = FIFOTYPE
1859 elif stat.S_ISLNK(stmd):
1860 type = SYMTYPE
1861 linkname = os.readlink(name)
1862 elif stat.S_ISCHR(stmd):
1863 type = CHRTYPE
1864 elif stat.S_ISBLK(stmd):
1865 type = BLKTYPE
1866 else:
1867 return None
1868
1869 # Fill the TarInfo object with all
1870 # information we can get.
Martin v. Löwis5dbdc592005-08-27 10:07:56 +00001871 tarinfo.name = arcname
1872 tarinfo.mode = stmd
1873 tarinfo.uid = statres.st_uid
1874 tarinfo.gid = statres.st_gid
1875 if stat.S_ISREG(stmd):
Martin v. Löwis61d77e02004-08-20 06:35:46 +00001876 tarinfo.size = statres.st_size
Martin v. Löwis5dbdc592005-08-27 10:07:56 +00001877 else:
Guido van Rossume2a383d2007-01-15 16:59:06 +00001878 tarinfo.size = 0
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001879 tarinfo.mtime = statres.st_mtime
Martin v. Löwis5dbdc592005-08-27 10:07:56 +00001880 tarinfo.type = type
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001881 tarinfo.linkname = linkname
1882 if pwd:
1883 try:
1884 tarinfo.uname = pwd.getpwuid(tarinfo.uid)[0]
1885 except KeyError:
1886 pass
1887 if grp:
1888 try:
1889 tarinfo.gname = grp.getgrgid(tarinfo.gid)[0]
1890 except KeyError:
1891 pass
1892
1893 if type in (CHRTYPE, BLKTYPE):
1894 if hasattr(os, "major") and hasattr(os, "minor"):
1895 tarinfo.devmajor = os.major(statres.st_rdev)
1896 tarinfo.devminor = os.minor(statres.st_rdev)
1897 return tarinfo
1898
1899 def list(self, verbose=True):
1900 """Print a table of contents to sys.stdout. If `verbose' is False, only
1901 the names of the members are printed. If it is True, an `ls -l'-like
1902 output is produced.
1903 """
1904 self._check()
1905
1906 for tarinfo in self:
1907 if verbose:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001908 print(filemode(tarinfo.mode), end=' ')
1909 print("%s/%s" % (tarinfo.uname or tarinfo.uid,
1910 tarinfo.gname or tarinfo.gid), end=' ')
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001911 if tarinfo.ischr() or tarinfo.isblk():
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001912 print("%10s" % ("%d,%d" \
1913 % (tarinfo.devmajor, tarinfo.devminor)), end=' ')
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001914 else:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001915 print("%10d" % tarinfo.size, end=' ')
1916 print("%d-%02d-%02d %02d:%02d:%02d" \
1917 % time.localtime(tarinfo.mtime)[:6], end=' ')
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001918
Guido van Rossumd8faa362007-04-27 19:54:29 +00001919 print(tarinfo.name + ("/" if tarinfo.isdir() else ""), end=' ')
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001920
1921 if verbose:
1922 if tarinfo.issym():
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001923 print("->", tarinfo.linkname, end=' ')
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001924 if tarinfo.islnk():
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001925 print("link to", tarinfo.linkname, end=' ')
1926 print()
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001927
Guido van Rossum486364b2007-06-30 05:01:58 +00001928 def add(self, name, arcname=None, recursive=True, exclude=None):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001929 """Add the file `name' to the archive. `name' may be any type of file
1930 (directory, fifo, symbolic link, etc.). If given, `arcname'
1931 specifies an alternative name for the file in the archive.
1932 Directories are added recursively by default. This can be avoided by
Guido van Rossum486364b2007-06-30 05:01:58 +00001933 setting `recursive' to False. `exclude' is a function that should
1934 return True for each filename to be excluded.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001935 """
1936 self._check("aw")
1937
1938 if arcname is None:
1939 arcname = name
1940
Guido van Rossum486364b2007-06-30 05:01:58 +00001941 # Exclude pathnames.
1942 if exclude is not None and exclude(name):
1943 self._dbg(2, "tarfile: Excluded %r" % name)
1944 return
1945
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001946 # Skip if somebody tries to archive the archive...
Thomas Wouters902d6eb2007-01-09 23:18:33 +00001947 if self.name is not None and os.path.abspath(name) == self.name:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001948 self._dbg(2, "tarfile: Skipped %r" % name)
1949 return
1950
1951 # Special case: The user wants to add the current
1952 # working directory.
1953 if name == ".":
1954 if recursive:
1955 if arcname == ".":
1956 arcname = ""
Guido van Rossumd8faa362007-04-27 19:54:29 +00001957 for f in os.listdir(name):
Guido van Rossum486364b2007-06-30 05:01:58 +00001958 self.add(f, os.path.join(arcname, f), recursive, exclude)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001959 return
1960
1961 self._dbg(1, name)
1962
1963 # Create a TarInfo object from the file.
1964 tarinfo = self.gettarinfo(name, arcname)
1965
1966 if tarinfo is None:
1967 self._dbg(1, "tarfile: Unsupported type %r" % name)
1968 return
1969
1970 # Append the tar header and data to the archive.
1971 if tarinfo.isreg():
Guido van Rossume7ba4952007-06-06 23:52:48 +00001972 f = bltn_open(name, "rb")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001973 self.addfile(tarinfo, f)
1974 f.close()
1975
Martin v. Löwis5dbdc592005-08-27 10:07:56 +00001976 elif tarinfo.isdir():
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001977 self.addfile(tarinfo)
1978 if recursive:
1979 for f in os.listdir(name):
Guido van Rossum486364b2007-06-30 05:01:58 +00001980 self.add(os.path.join(name, f), os.path.join(arcname, f), recursive, exclude)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001981
Martin v. Löwis5dbdc592005-08-27 10:07:56 +00001982 else:
1983 self.addfile(tarinfo)
1984
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001985 def addfile(self, tarinfo, fileobj=None):
1986 """Add the TarInfo object `tarinfo' to the archive. If `fileobj' is
1987 given, tarinfo.size bytes are read from it and added to the archive.
1988 You can create TarInfo objects using gettarinfo().
1989 On Windows platforms, `fileobj' should always be opened with mode
1990 'rb' to avoid irritation about the file size.
1991 """
1992 self._check("aw")
1993
Thomas Wouters89f507f2006-12-13 04:49:30 +00001994 tarinfo = copy.copy(tarinfo)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001995
Guido van Rossume7ba4952007-06-06 23:52:48 +00001996 buf = tarinfo.tobuf(self.format, self.encoding, self.errors)
Thomas Wouters89f507f2006-12-13 04:49:30 +00001997 self.fileobj.write(buf)
1998 self.offset += len(buf)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001999
2000 # If there's data to follow, append it.
2001 if fileobj is not None:
2002 copyfileobj(fileobj, self.fileobj, tarinfo.size)
2003 blocks, remainder = divmod(tarinfo.size, BLOCKSIZE)
2004 if remainder > 0:
2005 self.fileobj.write(NUL * (BLOCKSIZE - remainder))
2006 blocks += 1
2007 self.offset += blocks * BLOCKSIZE
2008
Martin v. Löwisf3c56112004-09-18 09:08:52 +00002009 self.members.append(tarinfo)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002010
Martin v. Löwis00a73e72005-03-04 19:40:34 +00002011 def extractall(self, path=".", members=None):
2012 """Extract all members from the archive to the current working
2013 directory and set owner, modification time and permissions on
2014 directories afterwards. `path' specifies a different directory
2015 to extract to. `members' is optional and must be a subset of the
2016 list returned by getmembers().
2017 """
2018 directories = []
2019
2020 if members is None:
2021 members = self
2022
2023 for tarinfo in members:
2024 if tarinfo.isdir():
2025 # Extract directory with a safe mode, so that
2026 # all files below can be extracted as well.
2027 try:
Guido van Rossumcd16bf62007-06-13 18:07:49 +00002028 os.makedirs(os.path.join(path, tarinfo.name), 0o700)
Martin v. Löwis00a73e72005-03-04 19:40:34 +00002029 except EnvironmentError:
2030 pass
2031 directories.append(tarinfo)
2032 else:
2033 self.extract(tarinfo, path)
2034
2035 # Reverse sort directories.
2036 directories.sort(lambda a, b: cmp(a.name, b.name))
2037 directories.reverse()
2038
2039 # Set correct owner, mtime and filemode on directories.
2040 for tarinfo in directories:
2041 path = os.path.join(path, tarinfo.name)
2042 try:
2043 self.chown(tarinfo, path)
2044 self.utime(tarinfo, path)
2045 self.chmod(tarinfo, path)
Guido van Rossumb940e112007-01-10 16:19:56 +00002046 except ExtractError as e:
Martin v. Löwis00a73e72005-03-04 19:40:34 +00002047 if self.errorlevel > 1:
2048 raise
2049 else:
2050 self._dbg(1, "tarfile: %s" % e)
2051
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002052 def extract(self, member, path=""):
2053 """Extract a member from the archive to the current working directory,
2054 using its full name. Its file information is extracted as accurately
2055 as possible. `member' may be a filename or a TarInfo object. You can
2056 specify a different directory using `path'.
2057 """
2058 self._check("r")
2059
Guido van Rossumd8faa362007-04-27 19:54:29 +00002060 if isinstance(member, basestring):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002061 tarinfo = self.getmember(member)
Guido van Rossumd8faa362007-04-27 19:54:29 +00002062 else:
2063 tarinfo = member
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002064
Neal Norwitza4f651a2004-07-20 22:07:44 +00002065 # Prepare the link target for makelink().
2066 if tarinfo.islnk():
2067 tarinfo._link_target = os.path.join(path, tarinfo.linkname)
2068
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002069 try:
2070 self._extract_member(tarinfo, os.path.join(path, tarinfo.name))
Guido van Rossumb940e112007-01-10 16:19:56 +00002071 except EnvironmentError as e:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002072 if self.errorlevel > 0:
2073 raise
2074 else:
2075 if e.filename is None:
2076 self._dbg(1, "tarfile: %s" % e.strerror)
2077 else:
2078 self._dbg(1, "tarfile: %s %r" % (e.strerror, e.filename))
Guido van Rossumb940e112007-01-10 16:19:56 +00002079 except ExtractError as e:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002080 if self.errorlevel > 1:
2081 raise
2082 else:
2083 self._dbg(1, "tarfile: %s" % e)
2084
2085 def extractfile(self, member):
2086 """Extract a member from the archive as a file object. `member' may be
2087 a filename or a TarInfo object. If `member' is a regular file, a
2088 file-like object is returned. If `member' is a link, a file-like
2089 object is constructed from the link's target. If `member' is none of
2090 the above, None is returned.
2091 The file-like object is read-only and provides the following
2092 methods: read(), readline(), readlines(), seek() and tell()
2093 """
2094 self._check("r")
2095
Guido van Rossumd8faa362007-04-27 19:54:29 +00002096 if isinstance(member, basestring):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002097 tarinfo = self.getmember(member)
Guido van Rossumd8faa362007-04-27 19:54:29 +00002098 else:
2099 tarinfo = member
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002100
2101 if tarinfo.isreg():
2102 return self.fileobject(self, tarinfo)
2103
2104 elif tarinfo.type not in SUPPORTED_TYPES:
2105 # If a member's type is unknown, it is treated as a
2106 # regular file.
2107 return self.fileobject(self, tarinfo)
2108
2109 elif tarinfo.islnk() or tarinfo.issym():
2110 if isinstance(self.fileobj, _Stream):
2111 # A small but ugly workaround for the case that someone tries
2112 # to extract a (sym)link as a file-object from a non-seekable
2113 # stream of tar blocks.
Thomas Wouters477c8d52006-05-27 19:21:47 +00002114 raise StreamError("cannot extract (sym)link as file object")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002115 else:
Georg Brandl7eb4b7d2005-07-22 21:49:32 +00002116 # A (sym)link's file object is its target's file object.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002117 return self.extractfile(self._getmember(tarinfo.linkname,
2118 tarinfo))
2119 else:
2120 # If there's no data associated with the member (directory, chrdev,
2121 # blkdev, etc.), return None instead of a file object.
2122 return None
2123
2124 def _extract_member(self, tarinfo, targetpath):
2125 """Extract the TarInfo object tarinfo to a physical
2126 file called targetpath.
2127 """
2128 # Fetch the TarInfo object for the given name
2129 # and build the destination pathname, replacing
2130 # forward slashes to platform specific separators.
2131 if targetpath[-1:] == "/":
2132 targetpath = targetpath[:-1]
2133 targetpath = os.path.normpath(targetpath)
2134
2135 # Create all upper directories.
2136 upperdirs = os.path.dirname(targetpath)
2137 if upperdirs and not os.path.exists(upperdirs):
Thomas Woutersb2137042007-02-01 18:02:27 +00002138 os.makedirs(upperdirs)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002139
2140 if tarinfo.islnk() or tarinfo.issym():
2141 self._dbg(1, "%s -> %s" % (tarinfo.name, tarinfo.linkname))
2142 else:
2143 self._dbg(1, tarinfo.name)
2144
2145 if tarinfo.isreg():
2146 self.makefile(tarinfo, targetpath)
2147 elif tarinfo.isdir():
2148 self.makedir(tarinfo, targetpath)
2149 elif tarinfo.isfifo():
2150 self.makefifo(tarinfo, targetpath)
2151 elif tarinfo.ischr() or tarinfo.isblk():
2152 self.makedev(tarinfo, targetpath)
2153 elif tarinfo.islnk() or tarinfo.issym():
2154 self.makelink(tarinfo, targetpath)
2155 elif tarinfo.type not in SUPPORTED_TYPES:
2156 self.makeunknown(tarinfo, targetpath)
2157 else:
2158 self.makefile(tarinfo, targetpath)
2159
2160 self.chown(tarinfo, targetpath)
2161 if not tarinfo.issym():
2162 self.chmod(tarinfo, targetpath)
2163 self.utime(tarinfo, targetpath)
2164
2165 #--------------------------------------------------------------------------
2166 # Below are the different file methods. They are called via
2167 # _extract_member() when extract() is called. They can be replaced in a
2168 # subclass to implement other functionality.
2169
2170 def makedir(self, tarinfo, targetpath):
2171 """Make a directory called targetpath.
2172 """
2173 try:
2174 os.mkdir(targetpath)
Guido van Rossumb940e112007-01-10 16:19:56 +00002175 except EnvironmentError as e:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002176 if e.errno != errno.EEXIST:
2177 raise
2178
2179 def makefile(self, tarinfo, targetpath):
2180 """Make a file called targetpath.
2181 """
2182 source = self.extractfile(tarinfo)
Guido van Rossume7ba4952007-06-06 23:52:48 +00002183 target = bltn_open(targetpath, "wb")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002184 copyfileobj(source, target)
2185 source.close()
2186 target.close()
2187
2188 def makeunknown(self, tarinfo, targetpath):
2189 """Make a file from a TarInfo object with an unknown type
2190 at targetpath.
2191 """
2192 self.makefile(tarinfo, targetpath)
2193 self._dbg(1, "tarfile: Unknown file type %r, " \
2194 "extracted as regular file." % tarinfo.type)
2195
2196 def makefifo(self, tarinfo, targetpath):
2197 """Make a fifo called targetpath.
2198 """
2199 if hasattr(os, "mkfifo"):
2200 os.mkfifo(targetpath)
2201 else:
Thomas Wouters477c8d52006-05-27 19:21:47 +00002202 raise ExtractError("fifo not supported by system")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002203
2204 def makedev(self, tarinfo, targetpath):
2205 """Make a character or block device called targetpath.
2206 """
2207 if not hasattr(os, "mknod") or not hasattr(os, "makedev"):
Thomas Wouters477c8d52006-05-27 19:21:47 +00002208 raise ExtractError("special devices not supported by system")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002209
2210 mode = tarinfo.mode
2211 if tarinfo.isblk():
2212 mode |= stat.S_IFBLK
2213 else:
2214 mode |= stat.S_IFCHR
2215
2216 os.mknod(targetpath, mode,
2217 os.makedev(tarinfo.devmajor, tarinfo.devminor))
2218
2219 def makelink(self, tarinfo, targetpath):
2220 """Make a (symbolic) link called targetpath. If it cannot be created
2221 (platform limitation), we try to make a copy of the referenced file
2222 instead of a link.
2223 """
2224 linkpath = tarinfo.linkname
2225 try:
2226 if tarinfo.issym():
2227 os.symlink(linkpath, targetpath)
2228 else:
Neal Norwitza4f651a2004-07-20 22:07:44 +00002229 # See extract().
2230 os.link(tarinfo._link_target, targetpath)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002231 except AttributeError:
2232 if tarinfo.issym():
2233 linkpath = os.path.join(os.path.dirname(tarinfo.name),
2234 linkpath)
2235 linkpath = normpath(linkpath)
2236
2237 try:
2238 self._extract_member(self.getmember(linkpath), targetpath)
Guido van Rossumb940e112007-01-10 16:19:56 +00002239 except (EnvironmentError, KeyError) as e:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002240 linkpath = os.path.normpath(linkpath)
2241 try:
2242 shutil.copy2(linkpath, targetpath)
Guido van Rossumb940e112007-01-10 16:19:56 +00002243 except EnvironmentError as e:
Thomas Wouters477c8d52006-05-27 19:21:47 +00002244 raise IOError("link could not be created")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002245
2246 def chown(self, tarinfo, targetpath):
2247 """Set owner of targetpath according to tarinfo.
2248 """
2249 if pwd and hasattr(os, "geteuid") and os.geteuid() == 0:
2250 # We have to be root to do so.
2251 try:
2252 g = grp.getgrnam(tarinfo.gname)[2]
2253 except KeyError:
2254 try:
2255 g = grp.getgrgid(tarinfo.gid)[2]
2256 except KeyError:
2257 g = os.getgid()
2258 try:
2259 u = pwd.getpwnam(tarinfo.uname)[2]
2260 except KeyError:
2261 try:
2262 u = pwd.getpwuid(tarinfo.uid)[2]
2263 except KeyError:
2264 u = os.getuid()
2265 try:
2266 if tarinfo.issym() and hasattr(os, "lchown"):
2267 os.lchown(targetpath, u, g)
2268 else:
Andrew MacIntyre7970d202003-02-19 12:51:34 +00002269 if sys.platform != "os2emx":
2270 os.chown(targetpath, u, g)
Guido van Rossumb940e112007-01-10 16:19:56 +00002271 except EnvironmentError as e:
Thomas Wouters477c8d52006-05-27 19:21:47 +00002272 raise ExtractError("could not change owner")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002273
2274 def chmod(self, tarinfo, targetpath):
2275 """Set file permissions of targetpath according to tarinfo.
2276 """
Jack Jansen834eff62003-03-07 12:47:06 +00002277 if hasattr(os, 'chmod'):
2278 try:
2279 os.chmod(targetpath, tarinfo.mode)
Guido van Rossumb940e112007-01-10 16:19:56 +00002280 except EnvironmentError as e:
Thomas Wouters477c8d52006-05-27 19:21:47 +00002281 raise ExtractError("could not change mode")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002282
2283 def utime(self, tarinfo, targetpath):
2284 """Set modification time of targetpath according to tarinfo.
2285 """
Jack Jansen834eff62003-03-07 12:47:06 +00002286 if not hasattr(os, 'utime'):
Tim Petersf9347782003-03-07 15:36:41 +00002287 return
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002288 if sys.platform == "win32" and tarinfo.isdir():
2289 # According to msdn.microsoft.com, it is an error (EACCES)
2290 # to use utime() on directories.
2291 return
2292 try:
2293 os.utime(targetpath, (tarinfo.mtime, tarinfo.mtime))
Guido van Rossumb940e112007-01-10 16:19:56 +00002294 except EnvironmentError as e:
Thomas Wouters477c8d52006-05-27 19:21:47 +00002295 raise ExtractError("could not change modification time")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002296
2297 #--------------------------------------------------------------------------
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002298 def next(self):
2299 """Return the next member of the archive as a TarInfo object, when
2300 TarFile is opened for reading. Return None if there is no more
2301 available.
2302 """
2303 self._check("ra")
2304 if self.firstmember is not None:
2305 m = self.firstmember
2306 self.firstmember = None
2307 return m
2308
2309 # Read the next block.
Andrew M. Kuchling864bba12004-07-10 22:02:11 +00002310 self.fileobj.seek(self.offset)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002311 while True:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002312 try:
Guido van Rossumd8faa362007-04-27 19:54:29 +00002313 tarinfo = self.tarinfo.fromtarfile(self)
2314 if tarinfo is None:
2315 return
2316 self.members.append(tarinfo)
Thomas Wouters477c8d52006-05-27 19:21:47 +00002317
Guido van Rossumb940e112007-01-10 16:19:56 +00002318 except HeaderError as e:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002319 if self.ignore_zeros:
Thomas Wouters902d6eb2007-01-09 23:18:33 +00002320 self._dbg(2, "0x%X: %s" % (self.offset, e))
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002321 self.offset += BLOCKSIZE
2322 continue
2323 else:
Andrew M. Kuchling864bba12004-07-10 22:02:11 +00002324 if self.offset == 0:
Thomas Wouters902d6eb2007-01-09 23:18:33 +00002325 raise ReadError(str(e))
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002326 return None
2327 break
2328
Thomas Wouters477c8d52006-05-27 19:21:47 +00002329 return tarinfo
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002330
2331 #--------------------------------------------------------------------------
2332 # Little helper methods:
2333
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002334 def _getmember(self, name, tarinfo=None):
2335 """Find an archive member by name from bottom to top.
2336 If tarinfo is given, it is used as the starting point.
2337 """
Martin v. Löwisf3c56112004-09-18 09:08:52 +00002338 # Ensure that all members have been loaded.
2339 members = self.getmembers()
2340
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002341 if tarinfo is None:
Martin v. Löwisf3c56112004-09-18 09:08:52 +00002342 end = len(members)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002343 else:
Martin v. Löwisf3c56112004-09-18 09:08:52 +00002344 end = members.index(tarinfo)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002345
Guido van Rossum805365e2007-05-07 22:24:25 +00002346 for i in range(end - 1, -1, -1):
Martin v. Löwisf3c56112004-09-18 09:08:52 +00002347 if name == members[i].name:
2348 return members[i]
Andrew M. Kuchling864bba12004-07-10 22:02:11 +00002349
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002350 def _load(self):
2351 """Read through the entire archive file and look for readable
2352 members.
2353 """
2354 while True:
2355 tarinfo = self.next()
2356 if tarinfo is None:
2357 break
2358 self._loaded = True
2359
2360 def _check(self, mode=None):
2361 """Check if TarFile is still open, and if the operation's mode
2362 corresponds to TarFile's mode.
2363 """
2364 if self.closed:
Thomas Wouters477c8d52006-05-27 19:21:47 +00002365 raise IOError("%s is closed" % self.__class__.__name__)
Guido van Rossumd8faa362007-04-27 19:54:29 +00002366 if mode is not None and self.mode not in mode:
2367 raise IOError("bad operation for mode %r" % self.mode)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002368
2369 def __iter__(self):
2370 """Provide an iterator object.
2371 """
2372 if self._loaded:
2373 return iter(self.members)
2374 else:
2375 return TarIter(self)
2376
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002377 def _dbg(self, level, msg):
2378 """Write debugging output to sys.stderr.
2379 """
2380 if level <= self.debug:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00002381 print(msg, file=sys.stderr)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002382# class TarFile
2383
2384class TarIter:
2385 """Iterator Class.
2386
2387 for tarinfo in TarFile(...):
2388 suite...
2389 """
2390
2391 def __init__(self, tarfile):
2392 """Construct a TarIter object.
2393 """
2394 self.tarfile = tarfile
Martin v. Löwis637431b2005-03-03 23:12:42 +00002395 self.index = 0
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002396 def __iter__(self):
2397 """Return iterator object.
2398 """
2399 return self
Georg Brandla18af4e2007-04-21 15:47:16 +00002400 def __next__(self):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002401 """Return the next item using TarFile's next() method.
2402 When all members have been read, set TarFile as _loaded.
2403 """
Martin v. Löwis637431b2005-03-03 23:12:42 +00002404 # Fix for SF #1100429: Under rare circumstances it can
2405 # happen that getmembers() is called during iteration,
2406 # which will cause TarIter to stop prematurely.
2407 if not self.tarfile._loaded:
2408 tarinfo = self.tarfile.next()
2409 if not tarinfo:
2410 self.tarfile._loaded = True
2411 raise StopIteration
2412 else:
2413 try:
2414 tarinfo = self.tarfile.members[self.index]
2415 except IndexError:
2416 raise StopIteration
2417 self.index += 1
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002418 return tarinfo
2419
2420# Helper classes for sparse file support
2421class _section:
2422 """Base class for _data and _hole.
2423 """
2424 def __init__(self, offset, size):
2425 self.offset = offset
2426 self.size = size
2427 def __contains__(self, offset):
2428 return self.offset <= offset < self.offset + self.size
2429
2430class _data(_section):
2431 """Represent a data section in a sparse file.
2432 """
2433 def __init__(self, offset, size, realpos):
2434 _section.__init__(self, offset, size)
2435 self.realpos = realpos
2436
2437class _hole(_section):
2438 """Represent a hole section in a sparse file.
2439 """
2440 pass
2441
2442class _ringbuffer(list):
2443 """Ringbuffer class which increases performance
2444 over a regular list.
2445 """
2446 def __init__(self):
2447 self.idx = 0
2448 def find(self, offset):
2449 idx = self.idx
2450 while True:
2451 item = self[idx]
2452 if offset in item:
2453 break
2454 idx += 1
2455 if idx == len(self):
2456 idx = 0
2457 if idx == self.idx:
2458 # End of File
2459 return None
2460 self.idx = idx
2461 return item
2462
2463#---------------------------------------------
2464# zipfile compatible TarFile class
2465#---------------------------------------------
2466TAR_PLAIN = 0 # zipfile.ZIP_STORED
2467TAR_GZIPPED = 8 # zipfile.ZIP_DEFLATED
2468class TarFileCompat:
2469 """TarFile class compatible with standard module zipfile's
2470 ZipFile class.
2471 """
2472 def __init__(self, file, mode="r", compression=TAR_PLAIN):
2473 if compression == TAR_PLAIN:
2474 self.tarfile = TarFile.taropen(file, mode)
2475 elif compression == TAR_GZIPPED:
2476 self.tarfile = TarFile.gzopen(file, mode)
2477 else:
Thomas Wouters477c8d52006-05-27 19:21:47 +00002478 raise ValueError("unknown compression constant")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002479 if mode[0:1] == "r":
2480 members = self.tarfile.getmembers()
Raymond Hettingera1d09e22005-09-11 16:34:05 +00002481 for m in members:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002482 m.filename = m.name
2483 m.file_size = m.size
2484 m.date_time = time.gmtime(m.mtime)[:6]
2485 def namelist(self):
2486 return map(lambda m: m.name, self.infolist())
2487 def infolist(self):
2488 return filter(lambda m: m.type in REGULAR_TYPES,
2489 self.tarfile.getmembers())
2490 def printdir(self):
2491 self.tarfile.list()
2492 def testzip(self):
2493 return
2494 def getinfo(self, name):
2495 return self.tarfile.getmember(name)
2496 def read(self, name):
2497 return self.tarfile.extractfile(self.tarfile.getmember(name)).read()
2498 def write(self, filename, arcname=None, compress_type=None):
2499 self.tarfile.add(filename, arcname)
2500 def writestr(self, zinfo, bytes):
Guido van Rossum68937b42007-05-18 00:51:22 +00002501 from io import StringIO
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002502 import calendar
2503 zinfo.name = zinfo.filename
2504 zinfo.size = zinfo.file_size
2505 zinfo.mtime = calendar.timegm(zinfo.date_time)
Raymond Hettingera6172712004-12-31 19:15:26 +00002506 self.tarfile.addfile(zinfo, StringIO(bytes))
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002507 def close(self):
2508 self.tarfile.close()
2509#class TarFileCompat
2510
2511#--------------------
2512# exported functions
2513#--------------------
2514def is_tarfile(name):
2515 """Return True if name points to a tar archive that we
2516 are able to handle, else return False.
2517 """
2518 try:
2519 t = open(name)
2520 t.close()
2521 return True
2522 except TarError:
2523 return False
2524
Guido van Rossume7ba4952007-06-06 23:52:48 +00002525bltn_open = open
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002526open = TarFile.open