blob: 94dac98ba87adeb1a9753168d5dfbb85536012be [file] [log] [blame]
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001#!/usr/bin/env python
2# -*- coding: iso-8859-1 -*-
3#-------------------------------------------------------------------
4# tarfile.py
5#-------------------------------------------------------------------
6# Copyright (C) 2002 Lars Gustäbel <lars@gustaebel.de>
7# All rights reserved.
8#
9# Permission is hereby granted, free of charge, to any person
10# obtaining a copy of this software and associated documentation
11# files (the "Software"), to deal in the Software without
12# restriction, including without limitation the rights to use,
13# copy, modify, merge, publish, distribute, sublicense, and/or sell
14# copies of the Software, and to permit persons to whom the
15# Software is furnished to do so, subject to the following
16# conditions:
17#
18# The above copyright notice and this permission notice shall be
19# included in all copies or substantial portions of the Software.
20#
21# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
22# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
23# OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
24# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
25# HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
26# WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
27# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
28# OTHER DEALINGS IN THE SOFTWARE.
29#
30"""Read from and write to tar format archives.
31"""
32
33__version__ = "$Revision$"
34# $Source$
35
Guido van Rossumd8faa362007-04-27 19:54:29 +000036version = "0.9.0"
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +000037__author__ = "Lars Gustäbel (lars@gustaebel.de)"
38__date__ = "$Date$"
39__cvsid__ = "$Id$"
40__credits__ = "Gustavo Niemeyer, Niels Gustäbel, Richard Townsend."
41
42#---------
43# Imports
44#---------
45import sys
46import os
47import shutil
48import stat
49import errno
50import time
51import struct
Thomas Wouters89f507f2006-12-13 04:49:30 +000052import copy
Guido van Rossumd8faa362007-04-27 19:54:29 +000053import re
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +000054
Jack Jansencfc49022003-03-07 13:37:32 +000055if sys.platform == 'mac':
56 # This module needs work for MacOS9, especially in the area of pathname
57 # handling. In many places it is assumed a simple substitution of / by the
58 # local os.path.sep is good enough to convert pathnames, but this does not
59 # work with the mac rooted:path:name versus :nonrooted:path:name syntax
60 raise ImportError, "tarfile does not work for platform==mac"
61
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +000062try:
63 import grp, pwd
64except ImportError:
65 grp = pwd = None
66
67# from tarfile import *
68__all__ = ["TarFile", "TarInfo", "is_tarfile", "TarError"]
69
Guido van Rossum8f78fe92006-08-24 04:03:53 +000070from __builtin__ import open as _open # Since 'open' is TarFile.open
71
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +000072#---------------------------------------------------------
73# tar constants
74#---------------------------------------------------------
Guido van Rossumd8faa362007-04-27 19:54:29 +000075NUL = "\0" # the null character
76BLOCKSIZE = 512 # length of processing blocks
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +000077RECORDSIZE = BLOCKSIZE * 20 # length of records
Guido van Rossumd8faa362007-04-27 19:54:29 +000078GNU_MAGIC = "ustar \0" # magic gnu tar string
79POSIX_MAGIC = "ustar\x0000" # magic posix tar string
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +000080
Guido van Rossumd8faa362007-04-27 19:54:29 +000081LENGTH_NAME = 100 # maximum length of a filename
82LENGTH_LINK = 100 # maximum length of a linkname
83LENGTH_PREFIX = 155 # maximum length of the prefix field
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +000084
Guido van Rossumd8faa362007-04-27 19:54:29 +000085REGTYPE = "0" # regular file
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +000086AREGTYPE = "\0" # regular file
Guido van Rossumd8faa362007-04-27 19:54:29 +000087LNKTYPE = "1" # link (inside tarfile)
88SYMTYPE = "2" # symbolic link
89CHRTYPE = "3" # character special device
90BLKTYPE = "4" # block special device
91DIRTYPE = "5" # directory
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +000092FIFOTYPE = "6" # fifo special device
93CONTTYPE = "7" # contiguous file
94
Guido van Rossumd8faa362007-04-27 19:54:29 +000095GNUTYPE_LONGNAME = "L" # GNU tar longname
96GNUTYPE_LONGLINK = "K" # GNU tar longlink
97GNUTYPE_SPARSE = "S" # GNU tar sparse file
98
99XHDTYPE = "x" # POSIX.1-2001 extended header
100XGLTYPE = "g" # POSIX.1-2001 global header
101SOLARIS_XHDTYPE = "X" # Solaris extended header
102
103USTAR_FORMAT = 0 # POSIX.1-1988 (ustar) format
104GNU_FORMAT = 1 # GNU tar format
105PAX_FORMAT = 2 # POSIX.1-2001 (pax) format
106DEFAULT_FORMAT = GNU_FORMAT
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000107
108#---------------------------------------------------------
109# tarfile constants
110#---------------------------------------------------------
Guido van Rossumd8faa362007-04-27 19:54:29 +0000111# File types that tarfile supports:
112SUPPORTED_TYPES = (REGTYPE, AREGTYPE, LNKTYPE,
113 SYMTYPE, DIRTYPE, FIFOTYPE,
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000114 CONTTYPE, CHRTYPE, BLKTYPE,
115 GNUTYPE_LONGNAME, GNUTYPE_LONGLINK,
116 GNUTYPE_SPARSE)
117
Guido van Rossumd8faa362007-04-27 19:54:29 +0000118# File types that will be treated as a regular file.
119REGULAR_TYPES = (REGTYPE, AREGTYPE,
120 CONTTYPE, GNUTYPE_SPARSE)
121
122# File types that are part of the GNU tar format.
123GNU_TYPES = (GNUTYPE_LONGNAME, GNUTYPE_LONGLINK,
124 GNUTYPE_SPARSE)
125
126# Fields from a pax header that override a TarInfo attribute.
127PAX_FIELDS = ("path", "linkpath", "size", "mtime",
128 "uid", "gid", "uname", "gname")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000129
Guido van Rossume7ba4952007-06-06 23:52:48 +0000130# Fields in a pax header that are numbers, all other fields
131# are treated as strings.
132PAX_NUMBER_FIELDS = {
133 "atime": float,
134 "ctime": float,
135 "mtime": float,
136 "uid": int,
137 "gid": int,
138 "size": int
139}
140
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000141#---------------------------------------------------------
142# Bits used in the mode field, values in octal.
143#---------------------------------------------------------
Guido van Rossumcd16bf62007-06-13 18:07:49 +0000144S_IFLNK = 0o120000 # symbolic link
145S_IFREG = 0o100000 # regular file
146S_IFBLK = 0o060000 # block device
147S_IFDIR = 0o040000 # directory
148S_IFCHR = 0o020000 # character device
149S_IFIFO = 0o010000 # fifo
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000150
Guido van Rossumcd16bf62007-06-13 18:07:49 +0000151TSUID = 0o4000 # set UID on execution
152TSGID = 0o2000 # set GID on execution
153TSVTX = 0o1000 # reserved
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000154
Guido van Rossumcd16bf62007-06-13 18:07:49 +0000155TUREAD = 0o400 # read by owner
156TUWRITE = 0o200 # write by owner
157TUEXEC = 0o100 # execute/search by owner
158TGREAD = 0o040 # read by group
159TGWRITE = 0o020 # write by group
160TGEXEC = 0o010 # execute/search by group
161TOREAD = 0o004 # read by other
162TOWRITE = 0o002 # write by other
163TOEXEC = 0o001 # execute/search by other
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000164
165#---------------------------------------------------------
Guido van Rossumd8faa362007-04-27 19:54:29 +0000166# initialization
167#---------------------------------------------------------
168ENCODING = sys.getfilesystemencoding()
169if ENCODING is None:
Guido van Rossume7ba4952007-06-06 23:52:48 +0000170 ENCODING = sys.getdefaultencoding()
Guido van Rossumd8faa362007-04-27 19:54:29 +0000171
172#---------------------------------------------------------
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000173# Some useful functions
174#---------------------------------------------------------
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000175
Thomas Wouters477c8d52006-05-27 19:21:47 +0000176def stn(s, length):
177 """Convert a python string to a null-terminated string buffer.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000178 """
Thomas Wouters89f507f2006-12-13 04:49:30 +0000179 return s[:length] + (length - len(s)) * NUL
Thomas Wouters477c8d52006-05-27 19:21:47 +0000180
Guido van Rossumd8faa362007-04-27 19:54:29 +0000181def nts(s):
182 """Convert a null-terminated string field to a python string.
183 """
184 # Use the string up to the first null char.
185 p = s.find("\0")
186 if p == -1:
187 return s
188 return s[:p]
189
Thomas Wouters477c8d52006-05-27 19:21:47 +0000190def nti(s):
191 """Convert a number field to a python number.
192 """
193 # There are two possible encodings for a number field, see
194 # itn() below.
Guido van Rossumcd16bf62007-06-13 18:07:49 +0000195 if s[0] != chr(0o200):
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000196 try:
Guido van Rossumd8faa362007-04-27 19:54:29 +0000197 n = int(nts(s) or "0", 8)
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000198 except ValueError:
199 raise HeaderError("invalid header")
Thomas Wouters477c8d52006-05-27 19:21:47 +0000200 else:
Guido van Rossume2a383d2007-01-15 16:59:06 +0000201 n = 0
Guido van Rossum805365e2007-05-07 22:24:25 +0000202 for i in range(len(s) - 1):
Thomas Wouters477c8d52006-05-27 19:21:47 +0000203 n <<= 8
204 n += ord(s[i + 1])
205 return n
206
Guido van Rossumd8faa362007-04-27 19:54:29 +0000207def itn(n, digits=8, format=DEFAULT_FORMAT):
Thomas Wouters477c8d52006-05-27 19:21:47 +0000208 """Convert a python number to a number field.
209 """
210 # POSIX 1003.1-1988 requires numbers to be encoded as a string of
211 # octal digits followed by a null-byte, this allows values up to
212 # (8**(digits-1))-1. GNU tar allows storing numbers greater than
Guido van Rossumcd16bf62007-06-13 18:07:49 +0000213 # that if necessary. A leading 0o200 byte indicates this particular
Thomas Wouters477c8d52006-05-27 19:21:47 +0000214 # encoding, the following digits-1 bytes are a big-endian
215 # representation. This allows values up to (256**(digits-1))-1.
216 if 0 <= n < 8 ** (digits - 1):
217 s = "%0*o" % (digits - 1, n) + NUL
218 else:
Guido van Rossumd8faa362007-04-27 19:54:29 +0000219 if format != GNU_FORMAT or n >= 256 ** (digits - 1):
Thomas Wouters477c8d52006-05-27 19:21:47 +0000220 raise ValueError("overflow in number field")
221
222 if n < 0:
223 # XXX We mimic GNU tar's behaviour with negative numbers,
224 # this could raise OverflowError.
225 n = struct.unpack("L", struct.pack("l", n))[0]
226
227 s = ""
Guido van Rossum805365e2007-05-07 22:24:25 +0000228 for i in range(digits - 1):
Guido van Rossumcd16bf62007-06-13 18:07:49 +0000229 s = chr(n & 0o377) + s
Thomas Wouters477c8d52006-05-27 19:21:47 +0000230 n >>= 8
Guido van Rossumcd16bf62007-06-13 18:07:49 +0000231 s = chr(0o200) + s
Thomas Wouters477c8d52006-05-27 19:21:47 +0000232 return s
233
Guido van Rossume7ba4952007-06-06 23:52:48 +0000234def uts(s, encoding, errors):
235 """Convert a unicode object to a string.
236 """
237 if errors == "utf-8":
238 # An extra error handler similar to the -o invalid=UTF-8 option
239 # in POSIX.1-2001. Replace untranslatable characters with their
240 # UTF-8 representation.
241 try:
242 return s.encode(encoding, "strict")
243 except UnicodeEncodeError:
244 x = []
245 for c in s:
246 try:
247 x.append(c.encode(encoding, "strict"))
248 except UnicodeEncodeError:
249 x.append(c.encode("utf8"))
250 return "".join(x)
251 else:
252 return s.encode(encoding, errors)
253
Thomas Wouters477c8d52006-05-27 19:21:47 +0000254def calc_chksums(buf):
255 """Calculate the checksum for a member's header by summing up all
256 characters except for the chksum field which is treated as if
257 it was filled with spaces. According to the GNU tar sources,
258 some tars (Sun and NeXT) calculate chksum with signed char,
259 which will be different if there are chars in the buffer with
260 the high bit set. So we calculate two checksums, unsigned and
261 signed.
262 """
263 unsigned_chksum = 256 + sum(struct.unpack("148B", buf[:148]) + struct.unpack("356B", buf[156:512]))
264 signed_chksum = 256 + sum(struct.unpack("148b", buf[:148]) + struct.unpack("356b", buf[156:512]))
265 return unsigned_chksum, signed_chksum
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000266
267def copyfileobj(src, dst, length=None):
268 """Copy length bytes from fileobj src to fileobj dst.
269 If length is None, copy the entire content.
270 """
271 if length == 0:
272 return
273 if length is None:
274 shutil.copyfileobj(src, dst)
275 return
276
277 BUFSIZE = 16 * 1024
278 blocks, remainder = divmod(length, BUFSIZE)
Guido van Rossum805365e2007-05-07 22:24:25 +0000279 for b in range(blocks):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000280 buf = src.read(BUFSIZE)
281 if len(buf) < BUFSIZE:
Thomas Wouters477c8d52006-05-27 19:21:47 +0000282 raise IOError("end of file reached")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000283 dst.write(buf)
284
285 if remainder != 0:
286 buf = src.read(remainder)
287 if len(buf) < remainder:
Thomas Wouters477c8d52006-05-27 19:21:47 +0000288 raise IOError("end of file reached")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000289 dst.write(buf)
290 return
291
292filemode_table = (
Andrew M. Kuchling8bc462f2004-10-20 11:48:42 +0000293 ((S_IFLNK, "l"),
294 (S_IFREG, "-"),
295 (S_IFBLK, "b"),
296 (S_IFDIR, "d"),
297 (S_IFCHR, "c"),
298 (S_IFIFO, "p")),
299
300 ((TUREAD, "r"),),
301 ((TUWRITE, "w"),),
302 ((TUEXEC|TSUID, "s"),
303 (TSUID, "S"),
304 (TUEXEC, "x")),
305
306 ((TGREAD, "r"),),
307 ((TGWRITE, "w"),),
308 ((TGEXEC|TSGID, "s"),
309 (TSGID, "S"),
310 (TGEXEC, "x")),
311
312 ((TOREAD, "r"),),
313 ((TOWRITE, "w"),),
314 ((TOEXEC|TSVTX, "t"),
315 (TSVTX, "T"),
316 (TOEXEC, "x"))
317)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000318
319def filemode(mode):
320 """Convert a file's mode to a string of the form
321 -rwxrwxrwx.
322 Used by TarFile.list()
323 """
Andrew M. Kuchling8bc462f2004-10-20 11:48:42 +0000324 perm = []
325 for table in filemode_table:
326 for bit, char in table:
327 if mode & bit == bit:
328 perm.append(char)
329 break
330 else:
331 perm.append("-")
332 return "".join(perm)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000333
334if os.sep != "/":
335 normpath = lambda path: os.path.normpath(path).replace(os.sep, "/")
336else:
337 normpath = os.path.normpath
338
339class TarError(Exception):
340 """Base exception."""
341 pass
342class ExtractError(TarError):
343 """General exception for extract errors."""
344 pass
345class ReadError(TarError):
346 """Exception for unreadble tar archives."""
347 pass
348class CompressionError(TarError):
349 """Exception for unavailable compression methods."""
350 pass
351class StreamError(TarError):
352 """Exception for unsupported operations on stream-like TarFiles."""
353 pass
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000354class HeaderError(TarError):
355 """Exception for invalid headers."""
356 pass
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000357
358#---------------------------
359# internal stream interface
360#---------------------------
361class _LowLevelFile:
362 """Low-level file object. Supports reading and writing.
363 It is used instead of a regular file object for streaming
364 access.
365 """
366
367 def __init__(self, name, mode):
368 mode = {
369 "r": os.O_RDONLY,
370 "w": os.O_WRONLY | os.O_CREAT | os.O_TRUNC,
371 }[mode]
372 if hasattr(os, "O_BINARY"):
373 mode |= os.O_BINARY
374 self.fd = os.open(name, mode)
375
376 def close(self):
377 os.close(self.fd)
378
379 def read(self, size):
380 return os.read(self.fd, size)
381
382 def write(self, s):
383 os.write(self.fd, s)
384
385class _Stream:
386 """Class that serves as an adapter between TarFile and
387 a stream-like object. The stream-like object only
388 needs to have a read() or write() method and is accessed
389 blockwise. Use of gzip or bzip2 compression is possible.
390 A stream-like object could be for example: sys.stdin,
391 sys.stdout, a socket, a tape device etc.
392
393 _Stream is intended to be used only internally.
394 """
395
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000396 def __init__(self, name, mode, comptype, fileobj, bufsize):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000397 """Construct a _Stream object.
398 """
399 self._extfileobj = True
400 if fileobj is None:
401 fileobj = _LowLevelFile(name, mode)
402 self._extfileobj = False
403
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000404 if comptype == '*':
405 # Enable transparent compression detection for the
406 # stream interface
407 fileobj = _StreamProxy(fileobj)
408 comptype = fileobj.getcomptype()
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000409
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000410 self.name = name or ""
411 self.mode = mode
412 self.comptype = comptype
413 self.fileobj = fileobj
414 self.bufsize = bufsize
415 self.buf = ""
Guido van Rossume2a383d2007-01-15 16:59:06 +0000416 self.pos = 0
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000417 self.closed = False
418
419 if comptype == "gz":
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000420 try:
421 import zlib
422 except ImportError:
Thomas Wouters477c8d52006-05-27 19:21:47 +0000423 raise CompressionError("zlib module is not available")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000424 self.zlib = zlib
425 self.crc = zlib.crc32("")
426 if mode == "r":
427 self._init_read_gz()
428 else:
429 self._init_write_gz()
430
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000431 if comptype == "bz2":
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000432 try:
433 import bz2
434 except ImportError:
Thomas Wouters477c8d52006-05-27 19:21:47 +0000435 raise CompressionError("bz2 module is not available")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000436 if mode == "r":
437 self.dbuf = ""
438 self.cmp = bz2.BZ2Decompressor()
439 else:
440 self.cmp = bz2.BZ2Compressor()
441
442 def __del__(self):
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000443 if hasattr(self, "closed") and not self.closed:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000444 self.close()
445
446 def _init_write_gz(self):
447 """Initialize for writing with gzip compression.
448 """
449 self.cmp = self.zlib.compressobj(9, self.zlib.DEFLATED,
450 -self.zlib.MAX_WBITS,
451 self.zlib.DEF_MEM_LEVEL,
452 0)
Guido van Rossume2a383d2007-01-15 16:59:06 +0000453 timestamp = struct.pack("<L", int(time.time()))
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000454 self.__write("\037\213\010\010%s\002\377" % timestamp)
455 if self.name.endswith(".gz"):
456 self.name = self.name[:-3]
457 self.__write(self.name + NUL)
458
459 def write(self, s):
460 """Write string s to the stream.
461 """
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000462 if self.comptype == "gz":
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000463 self.crc = self.zlib.crc32(s, self.crc)
464 self.pos += len(s)
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000465 if self.comptype != "tar":
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000466 s = self.cmp.compress(s)
467 self.__write(s)
468
469 def __write(self, s):
470 """Write string s to the stream if a whole new block
471 is ready to be written.
472 """
473 self.buf += s
474 while len(self.buf) > self.bufsize:
475 self.fileobj.write(self.buf[:self.bufsize])
476 self.buf = self.buf[self.bufsize:]
477
478 def close(self):
479 """Close the _Stream object. No operation should be
480 done on it afterwards.
481 """
482 if self.closed:
483 return
484
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000485 if self.mode == "w" and self.comptype != "tar":
Martin v. Löwisc234a522004-08-22 21:28:33 +0000486 self.buf += self.cmp.flush()
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000487
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000488 if self.mode == "w" and self.buf:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000489 self.fileobj.write(self.buf)
490 self.buf = ""
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000491 if self.comptype == "gz":
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000492 # The native zlib crc is an unsigned 32-bit integer, but
493 # the Python wrapper implicitly casts that to a signed C
494 # long. So, on a 32-bit box self.crc may "look negative",
495 # while the same crc on a 64-bit box may "look positive".
496 # To avoid irksome warnings from the `struct` module, force
497 # it to look positive on all boxes.
Guido van Rossume2a383d2007-01-15 16:59:06 +0000498 self.fileobj.write(struct.pack("<L", self.crc & 0xffffffff))
499 self.fileobj.write(struct.pack("<L", self.pos & 0xffffFFFF))
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000500
501 if not self._extfileobj:
502 self.fileobj.close()
503
504 self.closed = True
505
506 def _init_read_gz(self):
507 """Initialize for reading a gzip compressed fileobj.
508 """
509 self.cmp = self.zlib.decompressobj(-self.zlib.MAX_WBITS)
510 self.dbuf = ""
511
512 # taken from gzip.GzipFile with some alterations
513 if self.__read(2) != "\037\213":
Thomas Wouters477c8d52006-05-27 19:21:47 +0000514 raise ReadError("not a gzip file")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000515 if self.__read(1) != "\010":
Thomas Wouters477c8d52006-05-27 19:21:47 +0000516 raise CompressionError("unsupported compression method")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000517
518 flag = ord(self.__read(1))
519 self.__read(6)
520
521 if flag & 4:
522 xlen = ord(self.__read(1)) + 256 * ord(self.__read(1))
523 self.read(xlen)
524 if flag & 8:
525 while True:
526 s = self.__read(1)
527 if not s or s == NUL:
528 break
529 if flag & 16:
530 while True:
531 s = self.__read(1)
532 if not s or s == NUL:
533 break
534 if flag & 2:
535 self.__read(2)
536
537 def tell(self):
538 """Return the stream's file pointer position.
539 """
540 return self.pos
541
542 def seek(self, pos=0):
543 """Set the stream's file pointer to pos. Negative seeking
544 is forbidden.
545 """
546 if pos - self.pos >= 0:
547 blocks, remainder = divmod(pos - self.pos, self.bufsize)
Guido van Rossum805365e2007-05-07 22:24:25 +0000548 for i in range(blocks):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000549 self.read(self.bufsize)
550 self.read(remainder)
551 else:
Thomas Wouters477c8d52006-05-27 19:21:47 +0000552 raise StreamError("seeking backwards is not allowed")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000553 return self.pos
554
555 def read(self, size=None):
556 """Return the next size number of bytes from the stream.
557 If size is not defined, return all bytes of the stream
558 up to EOF.
559 """
560 if size is None:
561 t = []
562 while True:
563 buf = self._read(self.bufsize)
564 if not buf:
565 break
566 t.append(buf)
567 buf = "".join(t)
568 else:
569 buf = self._read(size)
570 self.pos += len(buf)
571 return buf
572
573 def _read(self, size):
574 """Return size bytes from the stream.
575 """
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000576 if self.comptype == "tar":
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000577 return self.__read(size)
578
579 c = len(self.dbuf)
580 t = [self.dbuf]
581 while c < size:
582 buf = self.__read(self.bufsize)
583 if not buf:
584 break
Guido van Rossumd8faa362007-04-27 19:54:29 +0000585 try:
586 buf = self.cmp.decompress(buf)
587 except IOError:
588 raise ReadError("invalid compressed data")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000589 t.append(buf)
590 c += len(buf)
591 t = "".join(t)
592 self.dbuf = t[size:]
593 return t[:size]
594
595 def __read(self, size):
596 """Return size bytes from stream. If internal buffer is empty,
597 read another block from the stream.
598 """
599 c = len(self.buf)
600 t = [self.buf]
601 while c < size:
602 buf = self.fileobj.read(self.bufsize)
603 if not buf:
604 break
605 t.append(buf)
606 c += len(buf)
607 t = "".join(t)
608 self.buf = t[size:]
609 return t[:size]
610# class _Stream
611
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000612class _StreamProxy(object):
613 """Small proxy class that enables transparent compression
614 detection for the Stream interface (mode 'r|*').
615 """
616
617 def __init__(self, fileobj):
618 self.fileobj = fileobj
619 self.buf = self.fileobj.read(BLOCKSIZE)
620
621 def read(self, size):
622 self.read = self.fileobj.read
623 return self.buf
624
625 def getcomptype(self):
626 if self.buf.startswith("\037\213\010"):
627 return "gz"
628 if self.buf.startswith("BZh91"):
629 return "bz2"
630 return "tar"
631
632 def close(self):
633 self.fileobj.close()
634# class StreamProxy
635
Thomas Wouters477c8d52006-05-27 19:21:47 +0000636class _BZ2Proxy(object):
637 """Small proxy class that enables external file object
638 support for "r:bz2" and "w:bz2" modes. This is actually
639 a workaround for a limitation in bz2 module's BZ2File
640 class which (unlike gzip.GzipFile) has no support for
641 a file object argument.
642 """
643
644 blocksize = 16 * 1024
645
646 def __init__(self, fileobj, mode):
647 self.fileobj = fileobj
648 self.mode = mode
Guido van Rossumd8faa362007-04-27 19:54:29 +0000649 self.name = getattr(self.fileobj, "name", None)
Thomas Wouters477c8d52006-05-27 19:21:47 +0000650 self.init()
651
652 def init(self):
653 import bz2
654 self.pos = 0
655 if self.mode == "r":
656 self.bz2obj = bz2.BZ2Decompressor()
657 self.fileobj.seek(0)
658 self.buf = ""
659 else:
660 self.bz2obj = bz2.BZ2Compressor()
661
662 def read(self, size):
663 b = [self.buf]
664 x = len(self.buf)
665 while x < size:
666 try:
667 raw = self.fileobj.read(self.blocksize)
668 data = self.bz2obj.decompress(raw)
669 b.append(data)
670 except EOFError:
671 break
672 x += len(data)
673 self.buf = "".join(b)
674
675 buf = self.buf[:size]
676 self.buf = self.buf[size:]
677 self.pos += len(buf)
678 return buf
679
680 def seek(self, pos):
681 if pos < self.pos:
682 self.init()
683 self.read(pos - self.pos)
684
685 def tell(self):
686 return self.pos
687
688 def write(self, data):
689 self.pos += len(data)
690 raw = self.bz2obj.compress(data)
691 self.fileobj.write(raw)
692
693 def close(self):
694 if self.mode == "w":
695 raw = self.bz2obj.flush()
696 self.fileobj.write(raw)
697 self.fileobj.close()
698# class _BZ2Proxy
699
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000700#------------------------
701# Extraction file object
702#------------------------
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000703class _FileInFile(object):
704 """A thin wrapper around an existing file object that
705 provides a part of its data as an individual file
706 object.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000707 """
708
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000709 def __init__(self, fileobj, offset, size, sparse=None):
710 self.fileobj = fileobj
711 self.offset = offset
712 self.size = size
713 self.sparse = sparse
714 self.position = 0
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000715
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000716 def tell(self):
717 """Return the current file position.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000718 """
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000719 return self.position
720
721 def seek(self, position):
722 """Seek to a position in the file.
723 """
724 self.position = position
725
726 def read(self, size=None):
727 """Read data from the file.
728 """
729 if size is None:
730 size = self.size - self.position
731 else:
732 size = min(size, self.size - self.position)
733
734 if self.sparse is None:
735 return self.readnormal(size)
736 else:
737 return self.readsparse(size)
738
739 def readnormal(self, size):
740 """Read operation for regular files.
741 """
742 self.fileobj.seek(self.offset + self.position)
743 self.position += size
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000744 return self.fileobj.read(size)
745
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000746 def readsparse(self, size):
747 """Read operation for sparse files.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000748 """
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000749 data = []
750 while size > 0:
751 buf = self.readsparsesection(size)
752 if not buf:
753 break
754 size -= len(buf)
755 data.append(buf)
756 return "".join(data)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000757
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000758 def readsparsesection(self, size):
759 """Read a single section of a sparse file.
760 """
761 section = self.sparse.find(self.position)
762
763 if section is None:
764 return ""
765
766 size = min(size, section.offset + section.size - self.position)
767
768 if isinstance(section, _data):
769 realpos = section.realpos + self.position - section.offset
770 self.fileobj.seek(self.offset + realpos)
771 self.position += size
772 return self.fileobj.read(size)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000773 else:
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000774 self.position += size
775 return NUL * size
776#class _FileInFile
777
778
779class ExFileObject(object):
780 """File-like object for reading an archive member.
781 Is returned by TarFile.extractfile().
782 """
783 blocksize = 1024
784
785 def __init__(self, tarfile, tarinfo):
786 self.fileobj = _FileInFile(tarfile.fileobj,
787 tarinfo.offset_data,
788 tarinfo.size,
789 getattr(tarinfo, "sparse", None))
790 self.name = tarinfo.name
791 self.mode = "r"
792 self.closed = False
793 self.size = tarinfo.size
794
795 self.position = 0
796 self.buffer = ""
797
798 def read(self, size=None):
799 """Read at most size bytes from the file. If size is not
800 present or None, read all data until EOF is reached.
801 """
802 if self.closed:
803 raise ValueError("I/O operation on closed file")
804
805 buf = ""
806 if self.buffer:
807 if size is None:
808 buf = self.buffer
809 self.buffer = ""
810 else:
811 buf = self.buffer[:size]
812 self.buffer = self.buffer[size:]
813
814 if size is None:
815 buf += self.fileobj.read()
816 else:
817 buf += self.fileobj.read(size - len(buf))
818
819 self.position += len(buf)
820 return buf
821
822 def readline(self, size=-1):
823 """Read one entire line from the file. If size is present
824 and non-negative, return a string with at most that
825 size, which may be an incomplete line.
826 """
827 if self.closed:
828 raise ValueError("I/O operation on closed file")
829
830 if "\n" in self.buffer:
831 pos = self.buffer.find("\n") + 1
832 else:
833 buffers = [self.buffer]
834 while True:
835 buf = self.fileobj.read(self.blocksize)
836 buffers.append(buf)
837 if not buf or "\n" in buf:
838 self.buffer = "".join(buffers)
839 pos = self.buffer.find("\n") + 1
840 if pos == 0:
841 # no newline found.
842 pos = len(self.buffer)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000843 break
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000844
845 if size != -1:
846 pos = min(size, pos)
847
848 buf = self.buffer[:pos]
849 self.buffer = self.buffer[pos:]
850 self.position += len(buf)
851 return buf
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000852
853 def readlines(self):
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000854 """Return a list with all remaining lines.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000855 """
856 result = []
857 while True:
858 line = self.readline()
859 if not line: break
860 result.append(line)
861 return result
862
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000863 def tell(self):
864 """Return the current file position.
865 """
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000866 if self.closed:
867 raise ValueError("I/O operation on closed file")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000868
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000869 return self.position
870
871 def seek(self, pos, whence=os.SEEK_SET):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000872 """Seek to a position in the file.
873 """
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000874 if self.closed:
875 raise ValueError("I/O operation on closed file")
876
877 if whence == os.SEEK_SET:
878 self.position = min(max(pos, 0), self.size)
879 elif whence == os.SEEK_CUR:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000880 if pos < 0:
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000881 self.position = max(self.position + pos, 0)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000882 else:
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000883 self.position = min(self.position + pos, self.size)
884 elif whence == os.SEEK_END:
885 self.position = max(min(self.size + pos, self.size), 0)
886 else:
887 raise ValueError("Invalid argument")
888
889 self.buffer = ""
890 self.fileobj.seek(self.position)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000891
892 def close(self):
893 """Close the file object.
894 """
895 self.closed = True
Martin v. Löwisdf241532005-03-03 08:17:42 +0000896
897 def __iter__(self):
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000898 """Get an iterator over the file's lines.
Martin v. Löwisdf241532005-03-03 08:17:42 +0000899 """
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000900 while True:
901 line = self.readline()
902 if not line:
903 break
904 yield line
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000905#class ExFileObject
906
907#------------------
908# Exported Classes
909#------------------
910class TarInfo(object):
911 """Informational class which holds the details about an
912 archive member given by a tar header block.
913 TarInfo objects are returned by TarFile.getmember(),
914 TarFile.getmembers() and TarFile.gettarinfo() and are
915 usually created internally.
916 """
917
918 def __init__(self, name=""):
919 """Construct a TarInfo object. name is the optional name
920 of the member.
921 """
Guido van Rossumd8faa362007-04-27 19:54:29 +0000922 self.name = name # member name
Guido van Rossumcd16bf62007-06-13 18:07:49 +0000923 self.mode = 0o644 # file permissions
Thomas Wouters477c8d52006-05-27 19:21:47 +0000924 self.uid = 0 # user id
925 self.gid = 0 # group id
926 self.size = 0 # file size
927 self.mtime = 0 # modification time
928 self.chksum = 0 # header checksum
929 self.type = REGTYPE # member type
930 self.linkname = "" # link name
Guido van Rossumd8faa362007-04-27 19:54:29 +0000931 self.uname = "root" # user name
932 self.gname = "root" # group name
Thomas Wouters477c8d52006-05-27 19:21:47 +0000933 self.devmajor = 0 # device major number
934 self.devminor = 0 # device minor number
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000935
Thomas Wouters477c8d52006-05-27 19:21:47 +0000936 self.offset = 0 # the tar header starts here
937 self.offset_data = 0 # the file's data starts here
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000938
Guido van Rossumd8faa362007-04-27 19:54:29 +0000939 self.pax_headers = {} # pax header information
940
941 # In pax headers the "name" and "linkname" field are called
942 # "path" and "linkpath".
943 def _getpath(self):
944 return self.name
945 def _setpath(self, name):
946 self.name = name
947 path = property(_getpath, _setpath)
948
949 def _getlinkpath(self):
950 return self.linkname
951 def _setlinkpath(self, linkname):
952 self.linkname = linkname
953 linkpath = property(_getlinkpath, _setlinkpath)
954
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000955 def __repr__(self):
956 return "<%s %r at %#x>" % (self.__class__.__name__,self.name,id(self))
957
Guido van Rossume7ba4952007-06-06 23:52:48 +0000958 def get_info(self, encoding, errors):
Guido van Rossumd8faa362007-04-27 19:54:29 +0000959 """Return the TarInfo's attributes as a dictionary.
960 """
961 info = {
962 "name": normpath(self.name),
Guido van Rossumcd16bf62007-06-13 18:07:49 +0000963 "mode": self.mode & 0o7777,
Guido van Rossumd8faa362007-04-27 19:54:29 +0000964 "uid": self.uid,
965 "gid": self.gid,
966 "size": self.size,
967 "mtime": self.mtime,
968 "chksum": self.chksum,
969 "type": self.type,
970 "linkname": normpath(self.linkname) if self.linkname else "",
971 "uname": self.uname,
972 "gname": self.gname,
973 "devmajor": self.devmajor,
974 "devminor": self.devminor
975 }
976
977 if info["type"] == DIRTYPE and not info["name"].endswith("/"):
978 info["name"] += "/"
979
Guido van Rossume7ba4952007-06-06 23:52:48 +0000980 for key in ("name", "linkname", "uname", "gname"):
Walter Dörwald2d5c2192007-06-12 18:07:38 +0000981 if isinstance(info[key], str):
Guido van Rossume7ba4952007-06-06 23:52:48 +0000982 info[key] = info[key].encode(encoding, errors)
983
Guido van Rossumd8faa362007-04-27 19:54:29 +0000984 return info
985
Guido van Rossume7ba4952007-06-06 23:52:48 +0000986 def tobuf(self, format=DEFAULT_FORMAT, encoding=ENCODING, errors="strict"):
Guido van Rossumd8faa362007-04-27 19:54:29 +0000987 """Return a tar header as a string of 512 byte blocks.
988 """
Guido van Rossume7ba4952007-06-06 23:52:48 +0000989 info = self.get_info(encoding, errors)
990
Guido van Rossumd8faa362007-04-27 19:54:29 +0000991 if format == USTAR_FORMAT:
Guido van Rossume7ba4952007-06-06 23:52:48 +0000992 return self.create_ustar_header(info)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000993 elif format == GNU_FORMAT:
Guido van Rossume7ba4952007-06-06 23:52:48 +0000994 return self.create_gnu_header(info)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000995 elif format == PAX_FORMAT:
Guido van Rossume7ba4952007-06-06 23:52:48 +0000996 return self.create_pax_header(info, encoding, errors)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000997 else:
998 raise ValueError("invalid format")
999
Guido van Rossume7ba4952007-06-06 23:52:48 +00001000 def create_ustar_header(self, info):
Guido van Rossumd8faa362007-04-27 19:54:29 +00001001 """Return the object as a ustar header block.
1002 """
Guido van Rossumd8faa362007-04-27 19:54:29 +00001003 info["magic"] = POSIX_MAGIC
1004
1005 if len(info["linkname"]) > LENGTH_LINK:
1006 raise ValueError("linkname is too long")
1007
1008 if len(info["name"]) > LENGTH_NAME:
1009 info["prefix"], info["name"] = self._posix_split_name(info["name"])
1010
1011 return self._create_header(info, USTAR_FORMAT)
1012
Guido van Rossume7ba4952007-06-06 23:52:48 +00001013 def create_gnu_header(self, info):
Guido van Rossumd8faa362007-04-27 19:54:29 +00001014 """Return the object as a GNU header block sequence.
1015 """
Guido van Rossumd8faa362007-04-27 19:54:29 +00001016 info["magic"] = GNU_MAGIC
1017
1018 buf = ""
1019 if len(info["linkname"]) > LENGTH_LINK:
1020 buf += self._create_gnu_long_header(info["linkname"], GNUTYPE_LONGLINK)
1021
1022 if len(info["name"]) > LENGTH_NAME:
1023 buf += self._create_gnu_long_header(info["name"], GNUTYPE_LONGNAME)
1024
1025 return buf + self._create_header(info, GNU_FORMAT)
1026
Guido van Rossume7ba4952007-06-06 23:52:48 +00001027 def create_pax_header(self, info, encoding, errors):
Guido van Rossumd8faa362007-04-27 19:54:29 +00001028 """Return the object as a ustar header block. If it cannot be
1029 represented this way, prepend a pax extended header sequence
1030 with supplement information.
1031 """
Guido van Rossumd8faa362007-04-27 19:54:29 +00001032 info["magic"] = POSIX_MAGIC
1033 pax_headers = self.pax_headers.copy()
1034
1035 # Test string fields for values that exceed the field length or cannot
1036 # be represented in ASCII encoding.
1037 for name, hname, length in (
1038 ("name", "path", LENGTH_NAME), ("linkname", "linkpath", LENGTH_LINK),
1039 ("uname", "uname", 32), ("gname", "gname", 32)):
1040
Guido van Rossume7ba4952007-06-06 23:52:48 +00001041 if hname in pax_headers:
1042 # The pax header has priority.
1043 continue
1044
1045 val = info[name].decode(encoding, errors)
Guido van Rossumd8faa362007-04-27 19:54:29 +00001046
1047 # Try to encode the string as ASCII.
1048 try:
1049 val.encode("ascii")
1050 except UnicodeEncodeError:
1051 pax_headers[hname] = val
1052 continue
1053
Guido van Rossume7ba4952007-06-06 23:52:48 +00001054 if len(info[name]) > length:
1055 pax_headers[hname] = val
Guido van Rossumd8faa362007-04-27 19:54:29 +00001056
1057 # Test number fields for values that exceed the field limit or values
1058 # that like to be stored as float.
1059 for name, digits in (("uid", 8), ("gid", 8), ("size", 12), ("mtime", 12)):
Guido van Rossume7ba4952007-06-06 23:52:48 +00001060 if name in pax_headers:
1061 # The pax header has priority. Avoid overflow.
1062 info[name] = 0
1063 continue
1064
Guido van Rossumd8faa362007-04-27 19:54:29 +00001065 val = info[name]
1066 if not 0 <= val < 8 ** (digits - 1) or isinstance(val, float):
Guido van Rossumef87d6e2007-05-02 19:09:54 +00001067 pax_headers[name] = str(val)
Guido van Rossumd8faa362007-04-27 19:54:29 +00001068 info[name] = 0
1069
Guido van Rossume7ba4952007-06-06 23:52:48 +00001070 # Create a pax extended header if necessary.
Guido van Rossumd8faa362007-04-27 19:54:29 +00001071 if pax_headers:
1072 buf = self._create_pax_generic_header(pax_headers)
1073 else:
1074 buf = ""
1075
1076 return buf + self._create_header(info, USTAR_FORMAT)
1077
1078 @classmethod
Guido van Rossume7ba4952007-06-06 23:52:48 +00001079 def create_pax_global_header(cls, pax_headers):
Guido van Rossumd8faa362007-04-27 19:54:29 +00001080 """Return the object as a pax global header block sequence.
1081 """
Guido van Rossume7ba4952007-06-06 23:52:48 +00001082 return cls._create_pax_generic_header(pax_headers, type=XGLTYPE)
Guido van Rossumd8faa362007-04-27 19:54:29 +00001083
1084 def _posix_split_name(self, name):
1085 """Split a name longer than 100 chars into a prefix
1086 and a name part.
1087 """
1088 prefix = name[:LENGTH_PREFIX + 1]
1089 while prefix and prefix[-1] != "/":
1090 prefix = prefix[:-1]
1091
1092 name = name[len(prefix):]
1093 prefix = prefix[:-1]
1094
1095 if not prefix or len(name) > LENGTH_NAME:
1096 raise ValueError("name is too long")
1097 return prefix, name
1098
1099 @staticmethod
1100 def _create_header(info, format):
1101 """Return a header block. info is a dictionary with file
1102 information, format must be one of the *_FORMAT constants.
1103 """
1104 parts = [
1105 stn(info.get("name", ""), 100),
Guido van Rossumcd16bf62007-06-13 18:07:49 +00001106 itn(info.get("mode", 0) & 0o7777, 8, format),
Guido van Rossumd8faa362007-04-27 19:54:29 +00001107 itn(info.get("uid", 0), 8, format),
1108 itn(info.get("gid", 0), 8, format),
1109 itn(info.get("size", 0), 12, format),
1110 itn(info.get("mtime", 0), 12, format),
1111 " ", # checksum field
1112 info.get("type", REGTYPE),
1113 stn(info.get("linkname", ""), 100),
Guido van Rossume7ba4952007-06-06 23:52:48 +00001114 stn(info.get("magic", POSIX_MAGIC), 8),
1115 stn(info.get("uname", "root"), 32),
1116 stn(info.get("gname", "root"), 32),
Guido van Rossumd8faa362007-04-27 19:54:29 +00001117 itn(info.get("devmajor", 0), 8, format),
1118 itn(info.get("devminor", 0), 8, format),
1119 stn(info.get("prefix", ""), 155)
1120 ]
1121
1122 buf = struct.pack("%ds" % BLOCKSIZE, "".join(parts))
1123 chksum = calc_chksums(buf[-BLOCKSIZE:])[0]
1124 buf = buf[:-364] + "%06o\0" % chksum + buf[-357:]
1125 return buf
1126
1127 @staticmethod
1128 def _create_payload(payload):
1129 """Return the string payload filled with zero bytes
1130 up to the next 512 byte border.
1131 """
1132 blocks, remainder = divmod(len(payload), BLOCKSIZE)
1133 if remainder > 0:
1134 payload += (BLOCKSIZE - remainder) * NUL
1135 return payload
1136
1137 @classmethod
1138 def _create_gnu_long_header(cls, name, type):
1139 """Return a GNUTYPE_LONGNAME or GNUTYPE_LONGLINK sequence
1140 for name.
1141 """
1142 name += NUL
1143
1144 info = {}
1145 info["name"] = "././@LongLink"
1146 info["type"] = type
1147 info["size"] = len(name)
1148 info["magic"] = GNU_MAGIC
1149
1150 # create extended header + name blocks.
1151 return cls._create_header(info, USTAR_FORMAT) + \
1152 cls._create_payload(name)
1153
1154 @classmethod
1155 def _create_pax_generic_header(cls, pax_headers, type=XHDTYPE):
1156 """Return a POSIX.1-2001 extended or global header sequence
1157 that contains a list of keyword, value pairs. The values
1158 must be unicode objects.
1159 """
1160 records = []
1161 for keyword, value in pax_headers.items():
1162 keyword = keyword.encode("utf8")
1163 value = value.encode("utf8")
1164 l = len(keyword) + len(value) + 3 # ' ' + '=' + '\n'
1165 n = p = 0
1166 while True:
1167 n = l + len(str(p))
1168 if n == p:
1169 break
1170 p = n
1171 records.append("%d %s=%s\n" % (p, keyword, value))
1172 records = "".join(records)
1173
1174 # We use a hardcoded "././@PaxHeader" name like star does
1175 # instead of the one that POSIX recommends.
1176 info = {}
1177 info["name"] = "././@PaxHeader"
1178 info["type"] = type
1179 info["size"] = len(records)
1180 info["magic"] = POSIX_MAGIC
1181
1182 # Create pax header + record blocks.
1183 return cls._create_header(info, USTAR_FORMAT) + \
1184 cls._create_payload(records)
1185
Guido van Rossum75b64e62005-01-16 00:16:11 +00001186 @classmethod
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001187 def frombuf(cls, buf):
1188 """Construct a TarInfo object from a 512 byte string buffer.
1189 """
Thomas Wouters477c8d52006-05-27 19:21:47 +00001190 if len(buf) != BLOCKSIZE:
Thomas Wouters902d6eb2007-01-09 23:18:33 +00001191 raise HeaderError("truncated header")
Thomas Wouters477c8d52006-05-27 19:21:47 +00001192 if buf.count(NUL) == BLOCKSIZE:
Thomas Wouters902d6eb2007-01-09 23:18:33 +00001193 raise HeaderError("empty header")
1194
1195 chksum = nti(buf[148:156])
1196 if chksum not in calc_chksums(buf):
1197 raise HeaderError("bad checksum")
Thomas Wouters477c8d52006-05-27 19:21:47 +00001198
Guido van Rossumd8faa362007-04-27 19:54:29 +00001199 obj = cls()
1200 obj.buf = buf
1201 obj.name = nts(buf[0:100])
1202 obj.mode = nti(buf[100:108])
1203 obj.uid = nti(buf[108:116])
1204 obj.gid = nti(buf[116:124])
1205 obj.size = nti(buf[124:136])
1206 obj.mtime = nti(buf[136:148])
1207 obj.chksum = chksum
1208 obj.type = buf[156:157]
1209 obj.linkname = nts(buf[157:257])
1210 obj.uname = nts(buf[265:297])
1211 obj.gname = nts(buf[297:329])
1212 obj.devmajor = nti(buf[329:337])
1213 obj.devminor = nti(buf[337:345])
1214 prefix = nts(buf[345:500])
Thomas Wouters89f507f2006-12-13 04:49:30 +00001215
Guido van Rossumd8faa362007-04-27 19:54:29 +00001216 # Old V7 tar format represents a directory as a regular
1217 # file with a trailing slash.
1218 if obj.type == AREGTYPE and obj.name.endswith("/"):
1219 obj.type = DIRTYPE
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001220
Guido van Rossumd8faa362007-04-27 19:54:29 +00001221 # Remove redundant slashes from directories.
1222 if obj.isdir():
1223 obj.name = obj.name.rstrip("/")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001224
Guido van Rossumd8faa362007-04-27 19:54:29 +00001225 # Reconstruct a ustar longname.
1226 if prefix and obj.type not in GNU_TYPES:
1227 obj.name = prefix + "/" + obj.name
1228 return obj
1229
1230 @classmethod
1231 def fromtarfile(cls, tarfile):
1232 """Return the next TarInfo object from TarFile object
1233 tarfile.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001234 """
Guido van Rossumd8faa362007-04-27 19:54:29 +00001235 buf = tarfile.fileobj.read(BLOCKSIZE)
1236 if not buf:
1237 return
1238 obj = cls.frombuf(buf)
1239 obj.offset = tarfile.fileobj.tell() - BLOCKSIZE
1240 return obj._proc_member(tarfile)
Thomas Wouters89f507f2006-12-13 04:49:30 +00001241
Guido van Rossumd8faa362007-04-27 19:54:29 +00001242 #--------------------------------------------------------------------------
1243 # The following are methods that are called depending on the type of a
1244 # member. The entry point is _proc_member() which can be overridden in a
1245 # subclass to add custom _proc_*() methods. A _proc_*() method MUST
1246 # implement the following
1247 # operations:
1248 # 1. Set self.offset_data to the position where the data blocks begin,
1249 # if there is data that follows.
1250 # 2. Set tarfile.offset to the position where the next member's header will
1251 # begin.
1252 # 3. Return self or another valid TarInfo object.
1253 def _proc_member(self, tarfile):
1254 """Choose the right processing method depending on
1255 the type and call it.
Thomas Wouters89f507f2006-12-13 04:49:30 +00001256 """
Guido van Rossumd8faa362007-04-27 19:54:29 +00001257 if self.type in (GNUTYPE_LONGNAME, GNUTYPE_LONGLINK):
1258 return self._proc_gnulong(tarfile)
1259 elif self.type == GNUTYPE_SPARSE:
1260 return self._proc_sparse(tarfile)
1261 elif self.type in (XHDTYPE, XGLTYPE, SOLARIS_XHDTYPE):
1262 return self._proc_pax(tarfile)
1263 else:
1264 return self._proc_builtin(tarfile)
Thomas Wouters89f507f2006-12-13 04:49:30 +00001265
Guido van Rossumd8faa362007-04-27 19:54:29 +00001266 def _proc_builtin(self, tarfile):
1267 """Process a builtin type or an unknown type which
1268 will be treated as a regular file.
1269 """
1270 self.offset_data = tarfile.fileobj.tell()
1271 offset = self.offset_data
1272 if self.isreg() or self.type not in SUPPORTED_TYPES:
1273 # Skip the following data blocks.
1274 offset += self._block(self.size)
1275 tarfile.offset = offset
Thomas Wouters89f507f2006-12-13 04:49:30 +00001276
Guido van Rossume7ba4952007-06-06 23:52:48 +00001277 # Patch the TarInfo object with saved global
Guido van Rossumd8faa362007-04-27 19:54:29 +00001278 # header information.
Guido van Rossume7ba4952007-06-06 23:52:48 +00001279 self._apply_pax_info(tarfile.pax_headers, tarfile.encoding, tarfile.errors)
Guido van Rossumd8faa362007-04-27 19:54:29 +00001280
1281 return self
1282
1283 def _proc_gnulong(self, tarfile):
1284 """Process the blocks that hold a GNU longname
1285 or longlink member.
1286 """
1287 buf = tarfile.fileobj.read(self._block(self.size))
1288
1289 # Fetch the next header and process it.
Guido van Rossume7ba4952007-06-06 23:52:48 +00001290 next = self.fromtarfile(tarfile)
1291 if next is None:
1292 raise HeaderError("missing subsequent header")
Guido van Rossumd8faa362007-04-27 19:54:29 +00001293
1294 # Patch the TarInfo object from the next header with
1295 # the longname information.
1296 next.offset = self.offset
1297 if self.type == GNUTYPE_LONGNAME:
Guido van Rossume7ba4952007-06-06 23:52:48 +00001298 next.name = nts(buf)
Guido van Rossumd8faa362007-04-27 19:54:29 +00001299 elif self.type == GNUTYPE_LONGLINK:
Guido van Rossume7ba4952007-06-06 23:52:48 +00001300 next.linkname = nts(buf)
Guido van Rossumd8faa362007-04-27 19:54:29 +00001301
1302 return next
1303
1304 def _proc_sparse(self, tarfile):
1305 """Process a GNU sparse header plus extra headers.
1306 """
1307 buf = self.buf
1308 sp = _ringbuffer()
1309 pos = 386
1310 lastpos = 0
1311 realpos = 0
1312 # There are 4 possible sparse structs in the
1313 # first header.
Guido van Rossum805365e2007-05-07 22:24:25 +00001314 for i in range(4):
Guido van Rossumd8faa362007-04-27 19:54:29 +00001315 try:
1316 offset = nti(buf[pos:pos + 12])
1317 numbytes = nti(buf[pos + 12:pos + 24])
1318 except ValueError:
1319 break
1320 if offset > lastpos:
1321 sp.append(_hole(lastpos, offset - lastpos))
1322 sp.append(_data(offset, numbytes, realpos))
1323 realpos += numbytes
1324 lastpos = offset + numbytes
1325 pos += 24
1326
1327 isextended = ord(buf[482])
1328 origsize = nti(buf[483:495])
1329
1330 # If the isextended flag is given,
1331 # there are extra headers to process.
1332 while isextended == 1:
1333 buf = tarfile.fileobj.read(BLOCKSIZE)
1334 pos = 0
Guido van Rossum805365e2007-05-07 22:24:25 +00001335 for i in range(21):
Guido van Rossumd8faa362007-04-27 19:54:29 +00001336 try:
1337 offset = nti(buf[pos:pos + 12])
1338 numbytes = nti(buf[pos + 12:pos + 24])
1339 except ValueError:
1340 break
1341 if offset > lastpos:
1342 sp.append(_hole(lastpos, offset - lastpos))
1343 sp.append(_data(offset, numbytes, realpos))
1344 realpos += numbytes
1345 lastpos = offset + numbytes
1346 pos += 24
1347 isextended = ord(buf[504])
1348
1349 if lastpos < origsize:
1350 sp.append(_hole(lastpos, origsize - lastpos))
1351
1352 self.sparse = sp
1353
1354 self.offset_data = tarfile.fileobj.tell()
1355 tarfile.offset = self.offset_data + self._block(self.size)
1356 self.size = origsize
1357
1358 return self
1359
1360 def _proc_pax(self, tarfile):
1361 """Process an extended or global header as described in
1362 POSIX.1-2001.
1363 """
1364 # Read the header information.
1365 buf = tarfile.fileobj.read(self._block(self.size))
1366
1367 # A pax header stores supplemental information for either
1368 # the following file (extended) or all following files
1369 # (global).
1370 if self.type == XGLTYPE:
1371 pax_headers = tarfile.pax_headers
1372 else:
1373 pax_headers = tarfile.pax_headers.copy()
1374
Guido van Rossumd8faa362007-04-27 19:54:29 +00001375 # Parse pax header information. A record looks like that:
1376 # "%d %s=%s\n" % (length, keyword, value). length is the size
1377 # of the complete record including the length field itself and
Guido van Rossume7ba4952007-06-06 23:52:48 +00001378 # the newline. keyword and value are both UTF-8 encoded strings.
Guido van Rossumd8faa362007-04-27 19:54:29 +00001379 regex = re.compile(r"(\d+) ([^=]+)=", re.U)
1380 pos = 0
1381 while True:
1382 match = regex.match(buf, pos)
1383 if not match:
1384 break
1385
1386 length, keyword = match.groups()
1387 length = int(length)
1388 value = buf[match.end(2) + 1:match.start(1) + length - 1]
1389
1390 keyword = keyword.decode("utf8")
Guido van Rossumd8faa362007-04-27 19:54:29 +00001391 value = value.decode("utf8")
Guido van Rossumd8faa362007-04-27 19:54:29 +00001392
1393 pax_headers[keyword] = value
1394 pos += length
1395
Guido van Rossume7ba4952007-06-06 23:52:48 +00001396 # Fetch the next header.
1397 next = self.fromtarfile(tarfile)
Guido van Rossumd8faa362007-04-27 19:54:29 +00001398
Guido van Rossume7ba4952007-06-06 23:52:48 +00001399 if self.type in (XHDTYPE, SOLARIS_XHDTYPE):
1400 if next is None:
1401 raise HeaderError("missing subsequent header")
Guido van Rossumd8faa362007-04-27 19:54:29 +00001402
Guido van Rossume7ba4952007-06-06 23:52:48 +00001403 # Patch the TarInfo object with the extended header info.
1404 next._apply_pax_info(pax_headers, tarfile.encoding, tarfile.errors)
1405 next.offset = self.offset
1406
1407 if "size" in pax_headers:
1408 # If the extended header replaces the size field,
1409 # we need to recalculate the offset where the next
1410 # header starts.
1411 offset = next.offset_data
1412 if next.isreg() or next.type not in SUPPORTED_TYPES:
1413 offset += next._block(next.size)
1414 tarfile.offset = offset
1415
1416 return next
1417
1418 def _apply_pax_info(self, pax_headers, encoding, errors):
1419 """Replace fields with supplemental information from a previous
1420 pax extended or global header.
1421 """
1422 for keyword, value in pax_headers.items():
1423 if keyword not in PAX_FIELDS:
1424 continue
1425
1426 if keyword == "path":
1427 value = value.rstrip("/")
1428
1429 if keyword in PAX_NUMBER_FIELDS:
1430 try:
1431 value = PAX_NUMBER_FIELDS[keyword](value)
1432 except ValueError:
1433 value = 0
1434 else:
1435 value = uts(value, encoding, errors)
1436
1437 setattr(self, keyword, value)
1438
1439 self.pax_headers = pax_headers.copy()
Guido van Rossumd8faa362007-04-27 19:54:29 +00001440
1441 def _block(self, count):
1442 """Round up a byte count by BLOCKSIZE and return it,
1443 e.g. _block(834) => 1024.
1444 """
1445 blocks, remainder = divmod(count, BLOCKSIZE)
1446 if remainder:
1447 blocks += 1
1448 return blocks * BLOCKSIZE
Thomas Wouters89f507f2006-12-13 04:49:30 +00001449
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001450 def isreg(self):
1451 return self.type in REGULAR_TYPES
1452 def isfile(self):
1453 return self.isreg()
1454 def isdir(self):
1455 return self.type == DIRTYPE
1456 def issym(self):
1457 return self.type == SYMTYPE
1458 def islnk(self):
1459 return self.type == LNKTYPE
1460 def ischr(self):
1461 return self.type == CHRTYPE
1462 def isblk(self):
1463 return self.type == BLKTYPE
1464 def isfifo(self):
1465 return self.type == FIFOTYPE
1466 def issparse(self):
1467 return self.type == GNUTYPE_SPARSE
1468 def isdev(self):
1469 return self.type in (CHRTYPE, BLKTYPE, FIFOTYPE)
1470# class TarInfo
1471
1472class TarFile(object):
1473 """The TarFile Class provides an interface to tar archives.
1474 """
1475
1476 debug = 0 # May be set from 0 (no msgs) to 3 (all msgs)
1477
1478 dereference = False # If true, add content of linked file to the
1479 # tar file, else the link.
1480
1481 ignore_zeros = False # If true, skips empty or invalid blocks and
1482 # continues processing.
1483
1484 errorlevel = 0 # If 0, fatal errors only appear in debug
1485 # messages (if debug >= 0). If > 0, errors
1486 # are passed to the caller as exceptions.
1487
Guido van Rossumd8faa362007-04-27 19:54:29 +00001488 format = DEFAULT_FORMAT # The format to use when creating an archive.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001489
Guido van Rossume7ba4952007-06-06 23:52:48 +00001490 encoding = ENCODING # Encoding for 8-bit character strings.
1491
1492 errors = None # Error handler for unicode conversion.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001493
Guido van Rossumd8faa362007-04-27 19:54:29 +00001494 tarinfo = TarInfo # The default TarInfo class to use.
1495
1496 fileobject = ExFileObject # The default ExFileObject class to use.
1497
1498 def __init__(self, name=None, mode="r", fileobj=None, format=None,
1499 tarinfo=None, dereference=None, ignore_zeros=None, encoding=None,
Guido van Rossume7ba4952007-06-06 23:52:48 +00001500 errors=None, pax_headers=None, debug=None, errorlevel=None):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001501 """Open an (uncompressed) tar archive `name'. `mode' is either 'r' to
1502 read from an existing archive, 'a' to append data to an existing
1503 file or 'w' to create a new file overwriting an existing one. `mode'
1504 defaults to 'r'.
1505 If `fileobj' is given, it is used for reading or writing data. If it
1506 can be determined, `mode' is overridden by `fileobj's mode.
1507 `fileobj' is not closed, when TarFile is closed.
1508 """
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001509 if len(mode) > 1 or mode not in "raw":
Thomas Wouters477c8d52006-05-27 19:21:47 +00001510 raise ValueError("mode must be 'r', 'a' or 'w'")
Guido van Rossumd8faa362007-04-27 19:54:29 +00001511 self.mode = mode
1512 self._mode = {"r": "rb", "a": "r+b", "w": "wb"}[mode]
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001513
1514 if not fileobj:
Guido van Rossumd8faa362007-04-27 19:54:29 +00001515 if self.mode == "a" and not os.path.exists(name):
Thomas Wouterscf297e42007-02-23 15:07:44 +00001516 # Create nonexistent files in append mode.
Guido van Rossumd8faa362007-04-27 19:54:29 +00001517 self.mode = "w"
1518 self._mode = "wb"
Guido van Rossume7ba4952007-06-06 23:52:48 +00001519 fileobj = bltn_open(name, self._mode)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001520 self._extfileobj = False
1521 else:
Guido van Rossumd8faa362007-04-27 19:54:29 +00001522 if name is None and hasattr(fileobj, "name"):
1523 name = fileobj.name
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001524 if hasattr(fileobj, "mode"):
Guido van Rossumd8faa362007-04-27 19:54:29 +00001525 self._mode = fileobj.mode
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001526 self._extfileobj = True
Guido van Rossumd8faa362007-04-27 19:54:29 +00001527 self.name = os.path.abspath(name)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001528 self.fileobj = fileobj
1529
Guido van Rossumd8faa362007-04-27 19:54:29 +00001530 # Init attributes.
1531 if format is not None:
1532 self.format = format
1533 if tarinfo is not None:
1534 self.tarinfo = tarinfo
1535 if dereference is not None:
1536 self.dereference = dereference
1537 if ignore_zeros is not None:
1538 self.ignore_zeros = ignore_zeros
1539 if encoding is not None:
1540 self.encoding = encoding
Guido van Rossume7ba4952007-06-06 23:52:48 +00001541
1542 if errors is not None:
1543 self.errors = errors
1544 elif mode == "r":
1545 self.errors = "utf-8"
1546 else:
1547 self.errors = "strict"
1548
1549 if pax_headers is not None and self.format == PAX_FORMAT:
1550 self.pax_headers = pax_headers
1551 else:
1552 self.pax_headers = {}
1553
Guido van Rossumd8faa362007-04-27 19:54:29 +00001554 if debug is not None:
1555 self.debug = debug
1556 if errorlevel is not None:
1557 self.errorlevel = errorlevel
1558
1559 # Init datastructures.
Thomas Wouters477c8d52006-05-27 19:21:47 +00001560 self.closed = False
1561 self.members = [] # list of members as TarInfo objects
1562 self._loaded = False # flag if all members have been read
Guido van Rossume2a383d2007-01-15 16:59:06 +00001563 self.offset = 0 # current position in the archive file
Thomas Wouters477c8d52006-05-27 19:21:47 +00001564 self.inodes = {} # dictionary caching the inodes of
1565 # archive members already added
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001566
Guido van Rossumd8faa362007-04-27 19:54:29 +00001567 if self.mode == "r":
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001568 self.firstmember = None
1569 self.firstmember = self.next()
1570
Guido van Rossumd8faa362007-04-27 19:54:29 +00001571 if self.mode == "a":
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001572 # Move to the end of the archive,
1573 # before the first empty block.
1574 self.firstmember = None
1575 while True:
Guido van Rossumd8faa362007-04-27 19:54:29 +00001576 if self.next() is None:
Thomas Wouterscf297e42007-02-23 15:07:44 +00001577 if self.offset > 0:
1578 self.fileobj.seek(- BLOCKSIZE, 1)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001579 break
1580
Guido van Rossumd8faa362007-04-27 19:54:29 +00001581 if self.mode in "aw":
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001582 self._loaded = True
1583
Guido van Rossume7ba4952007-06-06 23:52:48 +00001584 if self.pax_headers:
1585 buf = self.tarinfo.create_pax_global_header(self.pax_headers.copy())
Guido van Rossumd8faa362007-04-27 19:54:29 +00001586 self.fileobj.write(buf)
1587 self.offset += len(buf)
1588
1589 def _getposix(self):
1590 return self.format == USTAR_FORMAT
1591 def _setposix(self, value):
1592 import warnings
1593 warnings.warn("use the format attribute instead", DeprecationWarning)
1594 if value:
1595 self.format = USTAR_FORMAT
1596 else:
1597 self.format = GNU_FORMAT
1598 posix = property(_getposix, _setposix)
1599
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001600 #--------------------------------------------------------------------------
1601 # Below are the classmethods which act as alternate constructors to the
1602 # TarFile class. The open() method is the only one that is needed for
1603 # public use; it is the "super"-constructor and is able to select an
1604 # adequate "sub"-constructor for a particular compression using the mapping
1605 # from OPEN_METH.
1606 #
1607 # This concept allows one to subclass TarFile without losing the comfort of
1608 # the super-constructor. A sub-constructor is registered and made available
1609 # by adding it to the mapping in OPEN_METH.
1610
Guido van Rossum75b64e62005-01-16 00:16:11 +00001611 @classmethod
Guido van Rossumd8faa362007-04-27 19:54:29 +00001612 def open(cls, name=None, mode="r", fileobj=None, bufsize=RECORDSIZE, **kwargs):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001613 """Open a tar archive for reading, writing or appending. Return
1614 an appropriate TarFile class.
1615
1616 mode:
Martin v. Löwis78be7df2005-03-05 12:47:42 +00001617 'r' or 'r:*' open for reading with transparent compression
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001618 'r:' open for reading exclusively uncompressed
1619 'r:gz' open for reading with gzip compression
1620 'r:bz2' open for reading with bzip2 compression
Thomas Wouterscf297e42007-02-23 15:07:44 +00001621 'a' or 'a:' open for appending, creating the file if necessary
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001622 'w' or 'w:' open for writing without compression
1623 'w:gz' open for writing with gzip compression
1624 'w:bz2' open for writing with bzip2 compression
Martin v. Löwis78be7df2005-03-05 12:47:42 +00001625
1626 'r|*' open a stream of tar blocks with transparent compression
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001627 'r|' open an uncompressed stream of tar blocks for reading
1628 'r|gz' open a gzip compressed stream of tar blocks
1629 'r|bz2' open a bzip2 compressed stream of tar blocks
1630 'w|' open an uncompressed stream for writing
1631 'w|gz' open a gzip compressed stream for writing
1632 'w|bz2' open a bzip2 compressed stream for writing
1633 """
1634
1635 if not name and not fileobj:
Thomas Wouters477c8d52006-05-27 19:21:47 +00001636 raise ValueError("nothing to open")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001637
Martin v. Löwis78be7df2005-03-05 12:47:42 +00001638 if mode in ("r", "r:*"):
1639 # Find out which *open() is appropriate for opening the file.
1640 for comptype in cls.OPEN_METH:
1641 func = getattr(cls, cls.OPEN_METH[comptype])
Thomas Wouters902d6eb2007-01-09 23:18:33 +00001642 if fileobj is not None:
1643 saved_pos = fileobj.tell()
Martin v. Löwis78be7df2005-03-05 12:47:42 +00001644 try:
Guido van Rossumd8faa362007-04-27 19:54:29 +00001645 return func(name, "r", fileobj, **kwargs)
1646 except (ReadError, CompressionError) as e:
Thomas Wouters902d6eb2007-01-09 23:18:33 +00001647 if fileobj is not None:
1648 fileobj.seek(saved_pos)
Martin v. Löwis78be7df2005-03-05 12:47:42 +00001649 continue
Thomas Wouters477c8d52006-05-27 19:21:47 +00001650 raise ReadError("file could not be opened successfully")
Martin v. Löwis78be7df2005-03-05 12:47:42 +00001651
1652 elif ":" in mode:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001653 filemode, comptype = mode.split(":", 1)
1654 filemode = filemode or "r"
1655 comptype = comptype or "tar"
1656
1657 # Select the *open() function according to
1658 # given compression.
1659 if comptype in cls.OPEN_METH:
1660 func = getattr(cls, cls.OPEN_METH[comptype])
1661 else:
Thomas Wouters477c8d52006-05-27 19:21:47 +00001662 raise CompressionError("unknown compression type %r" % comptype)
Guido van Rossumd8faa362007-04-27 19:54:29 +00001663 return func(name, filemode, fileobj, **kwargs)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001664
1665 elif "|" in mode:
1666 filemode, comptype = mode.split("|", 1)
1667 filemode = filemode or "r"
1668 comptype = comptype or "tar"
1669
1670 if filemode not in "rw":
Thomas Wouters477c8d52006-05-27 19:21:47 +00001671 raise ValueError("mode must be 'r' or 'w'")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001672
1673 t = cls(name, filemode,
Guido van Rossumd8faa362007-04-27 19:54:29 +00001674 _Stream(name, filemode, comptype, fileobj, bufsize),
1675 **kwargs)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001676 t._extfileobj = False
1677 return t
1678
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001679 elif mode in "aw":
Guido van Rossumd8faa362007-04-27 19:54:29 +00001680 return cls.taropen(name, mode, fileobj, **kwargs)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001681
Thomas Wouters477c8d52006-05-27 19:21:47 +00001682 raise ValueError("undiscernible mode")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001683
Guido van Rossum75b64e62005-01-16 00:16:11 +00001684 @classmethod
Guido van Rossumd8faa362007-04-27 19:54:29 +00001685 def taropen(cls, name, mode="r", fileobj=None, **kwargs):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001686 """Open uncompressed tar archive name for reading or writing.
1687 """
1688 if len(mode) > 1 or mode not in "raw":
Thomas Wouters477c8d52006-05-27 19:21:47 +00001689 raise ValueError("mode must be 'r', 'a' or 'w'")
Guido van Rossumd8faa362007-04-27 19:54:29 +00001690 return cls(name, mode, fileobj, **kwargs)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001691
Guido van Rossum75b64e62005-01-16 00:16:11 +00001692 @classmethod
Guido van Rossumd8faa362007-04-27 19:54:29 +00001693 def gzopen(cls, name, mode="r", fileobj=None, compresslevel=9, **kwargs):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001694 """Open gzip compressed tar archive name for reading or writing.
1695 Appending is not allowed.
1696 """
1697 if len(mode) > 1 or mode not in "rw":
Thomas Wouters477c8d52006-05-27 19:21:47 +00001698 raise ValueError("mode must be 'r' or 'w'")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001699
1700 try:
1701 import gzip
Neal Norwitz4ec68242003-04-11 03:05:56 +00001702 gzip.GzipFile
1703 except (ImportError, AttributeError):
Thomas Wouters477c8d52006-05-27 19:21:47 +00001704 raise CompressionError("gzip module is not available")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001705
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001706 if fileobj is None:
Guido van Rossume7ba4952007-06-06 23:52:48 +00001707 fileobj = bltn_open(name, mode + "b")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001708
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001709 try:
Thomas Wouters902d6eb2007-01-09 23:18:33 +00001710 t = cls.taropen(name, mode,
Guido van Rossumd8faa362007-04-27 19:54:29 +00001711 gzip.GzipFile(name, mode, compresslevel, fileobj),
1712 **kwargs)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001713 except IOError:
Thomas Wouters477c8d52006-05-27 19:21:47 +00001714 raise ReadError("not a gzip file")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001715 t._extfileobj = False
1716 return t
1717
Guido van Rossum75b64e62005-01-16 00:16:11 +00001718 @classmethod
Guido van Rossumd8faa362007-04-27 19:54:29 +00001719 def bz2open(cls, name, mode="r", fileobj=None, compresslevel=9, **kwargs):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001720 """Open bzip2 compressed tar archive name for reading or writing.
1721 Appending is not allowed.
1722 """
1723 if len(mode) > 1 or mode not in "rw":
Thomas Wouters477c8d52006-05-27 19:21:47 +00001724 raise ValueError("mode must be 'r' or 'w'.")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001725
1726 try:
1727 import bz2
1728 except ImportError:
Thomas Wouters477c8d52006-05-27 19:21:47 +00001729 raise CompressionError("bz2 module is not available")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001730
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001731 if fileobj is not None:
Thomas Wouters477c8d52006-05-27 19:21:47 +00001732 fileobj = _BZ2Proxy(fileobj, mode)
1733 else:
1734 fileobj = bz2.BZ2File(name, mode, compresslevel=compresslevel)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001735
1736 try:
Guido van Rossumd8faa362007-04-27 19:54:29 +00001737 t = cls.taropen(name, mode, fileobj, **kwargs)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001738 except IOError:
Thomas Wouters477c8d52006-05-27 19:21:47 +00001739 raise ReadError("not a bzip2 file")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001740 t._extfileobj = False
1741 return t
1742
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001743 # All *open() methods are registered here.
1744 OPEN_METH = {
1745 "tar": "taropen", # uncompressed tar
1746 "gz": "gzopen", # gzip compressed tar
1747 "bz2": "bz2open" # bzip2 compressed tar
1748 }
1749
1750 #--------------------------------------------------------------------------
1751 # The public methods which TarFile provides:
1752
1753 def close(self):
1754 """Close the TarFile. In write-mode, two finishing zero blocks are
1755 appended to the archive.
1756 """
1757 if self.closed:
1758 return
1759
Guido van Rossumd8faa362007-04-27 19:54:29 +00001760 if self.mode in "aw":
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001761 self.fileobj.write(NUL * (BLOCKSIZE * 2))
1762 self.offset += (BLOCKSIZE * 2)
1763 # fill up the end with zero-blocks
1764 # (like option -b20 for tar does)
1765 blocks, remainder = divmod(self.offset, RECORDSIZE)
1766 if remainder > 0:
1767 self.fileobj.write(NUL * (RECORDSIZE - remainder))
1768
1769 if not self._extfileobj:
1770 self.fileobj.close()
1771 self.closed = True
1772
1773 def getmember(self, name):
1774 """Return a TarInfo object for member `name'. If `name' can not be
1775 found in the archive, KeyError is raised. If a member occurs more
1776 than once in the archive, its last occurence is assumed to be the
1777 most up-to-date version.
1778 """
Martin v. Löwisf3c56112004-09-18 09:08:52 +00001779 tarinfo = self._getmember(name)
1780 if tarinfo is None:
Thomas Wouters477c8d52006-05-27 19:21:47 +00001781 raise KeyError("filename %r not found" % name)
Martin v. Löwisf3c56112004-09-18 09:08:52 +00001782 return tarinfo
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001783
1784 def getmembers(self):
1785 """Return the members of the archive as a list of TarInfo objects. The
1786 list has the same order as the members in the archive.
1787 """
1788 self._check()
1789 if not self._loaded: # if we want to obtain a list of
1790 self._load() # all members, we first have to
1791 # scan the whole archive.
1792 return self.members
1793
1794 def getnames(self):
1795 """Return the members of the archive as a list of their names. It has
1796 the same order as the list returned by getmembers().
1797 """
Martin v. Löwisf3c56112004-09-18 09:08:52 +00001798 return [tarinfo.name for tarinfo in self.getmembers()]
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001799
1800 def gettarinfo(self, name=None, arcname=None, fileobj=None):
1801 """Create a TarInfo object for either the file `name' or the file
1802 object `fileobj' (using os.fstat on its file descriptor). You can
1803 modify some of the TarInfo's attributes before you add it using
1804 addfile(). If given, `arcname' specifies an alternative name for the
1805 file in the archive.
1806 """
1807 self._check("aw")
1808
1809 # When fileobj is given, replace name by
1810 # fileobj's real name.
1811 if fileobj is not None:
1812 name = fileobj.name
1813
1814 # Building the name of the member in the archive.
1815 # Backward slashes are converted to forward slashes,
1816 # Absolute paths are turned to relative paths.
1817 if arcname is None:
1818 arcname = name
1819 arcname = normpath(arcname)
1820 drv, arcname = os.path.splitdrive(arcname)
1821 while arcname[0:1] == "/":
1822 arcname = arcname[1:]
1823
1824 # Now, fill the TarInfo object with
1825 # information specific for the file.
Guido van Rossumd8faa362007-04-27 19:54:29 +00001826 tarinfo = self.tarinfo()
1827 tarinfo.tarfile = self
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001828
1829 # Use os.stat or os.lstat, depending on platform
1830 # and if symlinks shall be resolved.
1831 if fileobj is None:
1832 if hasattr(os, "lstat") and not self.dereference:
1833 statres = os.lstat(name)
1834 else:
1835 statres = os.stat(name)
1836 else:
1837 statres = os.fstat(fileobj.fileno())
1838 linkname = ""
1839
1840 stmd = statres.st_mode
1841 if stat.S_ISREG(stmd):
1842 inode = (statres.st_ino, statres.st_dev)
Guido van Rossumd8faa362007-04-27 19:54:29 +00001843 if not self.dereference and statres.st_nlink > 1 and \
1844 inode in self.inodes and arcname != self.inodes[inode]:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001845 # Is it a hardlink to an already
1846 # archived file?
1847 type = LNKTYPE
1848 linkname = self.inodes[inode]
1849 else:
1850 # The inode is added only if its valid.
1851 # For win32 it is always 0.
1852 type = REGTYPE
1853 if inode[0]:
1854 self.inodes[inode] = arcname
1855 elif stat.S_ISDIR(stmd):
1856 type = DIRTYPE
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001857 elif stat.S_ISFIFO(stmd):
1858 type = FIFOTYPE
1859 elif stat.S_ISLNK(stmd):
1860 type = SYMTYPE
1861 linkname = os.readlink(name)
1862 elif stat.S_ISCHR(stmd):
1863 type = CHRTYPE
1864 elif stat.S_ISBLK(stmd):
1865 type = BLKTYPE
1866 else:
1867 return None
1868
1869 # Fill the TarInfo object with all
1870 # information we can get.
Martin v. Löwis5dbdc592005-08-27 10:07:56 +00001871 tarinfo.name = arcname
1872 tarinfo.mode = stmd
1873 tarinfo.uid = statres.st_uid
1874 tarinfo.gid = statres.st_gid
1875 if stat.S_ISREG(stmd):
Martin v. Löwis61d77e02004-08-20 06:35:46 +00001876 tarinfo.size = statres.st_size
Martin v. Löwis5dbdc592005-08-27 10:07:56 +00001877 else:
Guido van Rossume2a383d2007-01-15 16:59:06 +00001878 tarinfo.size = 0
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001879 tarinfo.mtime = statres.st_mtime
Martin v. Löwis5dbdc592005-08-27 10:07:56 +00001880 tarinfo.type = type
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001881 tarinfo.linkname = linkname
1882 if pwd:
1883 try:
1884 tarinfo.uname = pwd.getpwuid(tarinfo.uid)[0]
1885 except KeyError:
1886 pass
1887 if grp:
1888 try:
1889 tarinfo.gname = grp.getgrgid(tarinfo.gid)[0]
1890 except KeyError:
1891 pass
1892
1893 if type in (CHRTYPE, BLKTYPE):
1894 if hasattr(os, "major") and hasattr(os, "minor"):
1895 tarinfo.devmajor = os.major(statres.st_rdev)
1896 tarinfo.devminor = os.minor(statres.st_rdev)
1897 return tarinfo
1898
1899 def list(self, verbose=True):
1900 """Print a table of contents to sys.stdout. If `verbose' is False, only
1901 the names of the members are printed. If it is True, an `ls -l'-like
1902 output is produced.
1903 """
1904 self._check()
1905
1906 for tarinfo in self:
1907 if verbose:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001908 print(filemode(tarinfo.mode), end=' ')
1909 print("%s/%s" % (tarinfo.uname or tarinfo.uid,
1910 tarinfo.gname or tarinfo.gid), end=' ')
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001911 if tarinfo.ischr() or tarinfo.isblk():
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001912 print("%10s" % ("%d,%d" \
1913 % (tarinfo.devmajor, tarinfo.devminor)), end=' ')
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001914 else:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001915 print("%10d" % tarinfo.size, end=' ')
1916 print("%d-%02d-%02d %02d:%02d:%02d" \
1917 % time.localtime(tarinfo.mtime)[:6], end=' ')
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001918
Guido van Rossumd8faa362007-04-27 19:54:29 +00001919 print(tarinfo.name + ("/" if tarinfo.isdir() else ""), end=' ')
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001920
1921 if verbose:
1922 if tarinfo.issym():
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001923 print("->", tarinfo.linkname, end=' ')
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001924 if tarinfo.islnk():
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001925 print("link to", tarinfo.linkname, end=' ')
1926 print()
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001927
1928 def add(self, name, arcname=None, recursive=True):
1929 """Add the file `name' to the archive. `name' may be any type of file
1930 (directory, fifo, symbolic link, etc.). If given, `arcname'
1931 specifies an alternative name for the file in the archive.
1932 Directories are added recursively by default. This can be avoided by
1933 setting `recursive' to False.
1934 """
1935 self._check("aw")
1936
1937 if arcname is None:
1938 arcname = name
1939
1940 # Skip if somebody tries to archive the archive...
Thomas Wouters902d6eb2007-01-09 23:18:33 +00001941 if self.name is not None and os.path.abspath(name) == self.name:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001942 self._dbg(2, "tarfile: Skipped %r" % name)
1943 return
1944
1945 # Special case: The user wants to add the current
1946 # working directory.
1947 if name == ".":
1948 if recursive:
1949 if arcname == ".":
1950 arcname = ""
Guido van Rossumd8faa362007-04-27 19:54:29 +00001951 for f in os.listdir(name):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001952 self.add(f, os.path.join(arcname, f))
1953 return
1954
1955 self._dbg(1, name)
1956
1957 # Create a TarInfo object from the file.
1958 tarinfo = self.gettarinfo(name, arcname)
1959
1960 if tarinfo is None:
1961 self._dbg(1, "tarfile: Unsupported type %r" % name)
1962 return
1963
1964 # Append the tar header and data to the archive.
1965 if tarinfo.isreg():
Guido van Rossume7ba4952007-06-06 23:52:48 +00001966 f = bltn_open(name, "rb")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001967 self.addfile(tarinfo, f)
1968 f.close()
1969
Martin v. Löwis5dbdc592005-08-27 10:07:56 +00001970 elif tarinfo.isdir():
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001971 self.addfile(tarinfo)
1972 if recursive:
1973 for f in os.listdir(name):
1974 self.add(os.path.join(name, f), os.path.join(arcname, f))
1975
Martin v. Löwis5dbdc592005-08-27 10:07:56 +00001976 else:
1977 self.addfile(tarinfo)
1978
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001979 def addfile(self, tarinfo, fileobj=None):
1980 """Add the TarInfo object `tarinfo' to the archive. If `fileobj' is
1981 given, tarinfo.size bytes are read from it and added to the archive.
1982 You can create TarInfo objects using gettarinfo().
1983 On Windows platforms, `fileobj' should always be opened with mode
1984 'rb' to avoid irritation about the file size.
1985 """
1986 self._check("aw")
1987
Thomas Wouters89f507f2006-12-13 04:49:30 +00001988 tarinfo = copy.copy(tarinfo)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001989
Guido van Rossume7ba4952007-06-06 23:52:48 +00001990 buf = tarinfo.tobuf(self.format, self.encoding, self.errors)
Thomas Wouters89f507f2006-12-13 04:49:30 +00001991 self.fileobj.write(buf)
1992 self.offset += len(buf)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001993
1994 # If there's data to follow, append it.
1995 if fileobj is not None:
1996 copyfileobj(fileobj, self.fileobj, tarinfo.size)
1997 blocks, remainder = divmod(tarinfo.size, BLOCKSIZE)
1998 if remainder > 0:
1999 self.fileobj.write(NUL * (BLOCKSIZE - remainder))
2000 blocks += 1
2001 self.offset += blocks * BLOCKSIZE
2002
Martin v. Löwisf3c56112004-09-18 09:08:52 +00002003 self.members.append(tarinfo)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002004
Martin v. Löwis00a73e72005-03-04 19:40:34 +00002005 def extractall(self, path=".", members=None):
2006 """Extract all members from the archive to the current working
2007 directory and set owner, modification time and permissions on
2008 directories afterwards. `path' specifies a different directory
2009 to extract to. `members' is optional and must be a subset of the
2010 list returned by getmembers().
2011 """
2012 directories = []
2013
2014 if members is None:
2015 members = self
2016
2017 for tarinfo in members:
2018 if tarinfo.isdir():
2019 # Extract directory with a safe mode, so that
2020 # all files below can be extracted as well.
2021 try:
Guido van Rossumcd16bf62007-06-13 18:07:49 +00002022 os.makedirs(os.path.join(path, tarinfo.name), 0o700)
Martin v. Löwis00a73e72005-03-04 19:40:34 +00002023 except EnvironmentError:
2024 pass
2025 directories.append(tarinfo)
2026 else:
2027 self.extract(tarinfo, path)
2028
2029 # Reverse sort directories.
2030 directories.sort(lambda a, b: cmp(a.name, b.name))
2031 directories.reverse()
2032
2033 # Set correct owner, mtime and filemode on directories.
2034 for tarinfo in directories:
2035 path = os.path.join(path, tarinfo.name)
2036 try:
2037 self.chown(tarinfo, path)
2038 self.utime(tarinfo, path)
2039 self.chmod(tarinfo, path)
Guido van Rossumb940e112007-01-10 16:19:56 +00002040 except ExtractError as e:
Martin v. Löwis00a73e72005-03-04 19:40:34 +00002041 if self.errorlevel > 1:
2042 raise
2043 else:
2044 self._dbg(1, "tarfile: %s" % e)
2045
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002046 def extract(self, member, path=""):
2047 """Extract a member from the archive to the current working directory,
2048 using its full name. Its file information is extracted as accurately
2049 as possible. `member' may be a filename or a TarInfo object. You can
2050 specify a different directory using `path'.
2051 """
2052 self._check("r")
2053
Guido van Rossumd8faa362007-04-27 19:54:29 +00002054 if isinstance(member, basestring):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002055 tarinfo = self.getmember(member)
Guido van Rossumd8faa362007-04-27 19:54:29 +00002056 else:
2057 tarinfo = member
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002058
Neal Norwitza4f651a2004-07-20 22:07:44 +00002059 # Prepare the link target for makelink().
2060 if tarinfo.islnk():
2061 tarinfo._link_target = os.path.join(path, tarinfo.linkname)
2062
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002063 try:
2064 self._extract_member(tarinfo, os.path.join(path, tarinfo.name))
Guido van Rossumb940e112007-01-10 16:19:56 +00002065 except EnvironmentError as e:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002066 if self.errorlevel > 0:
2067 raise
2068 else:
2069 if e.filename is None:
2070 self._dbg(1, "tarfile: %s" % e.strerror)
2071 else:
2072 self._dbg(1, "tarfile: %s %r" % (e.strerror, e.filename))
Guido van Rossumb940e112007-01-10 16:19:56 +00002073 except ExtractError as e:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002074 if self.errorlevel > 1:
2075 raise
2076 else:
2077 self._dbg(1, "tarfile: %s" % e)
2078
2079 def extractfile(self, member):
2080 """Extract a member from the archive as a file object. `member' may be
2081 a filename or a TarInfo object. If `member' is a regular file, a
2082 file-like object is returned. If `member' is a link, a file-like
2083 object is constructed from the link's target. If `member' is none of
2084 the above, None is returned.
2085 The file-like object is read-only and provides the following
2086 methods: read(), readline(), readlines(), seek() and tell()
2087 """
2088 self._check("r")
2089
Guido van Rossumd8faa362007-04-27 19:54:29 +00002090 if isinstance(member, basestring):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002091 tarinfo = self.getmember(member)
Guido van Rossumd8faa362007-04-27 19:54:29 +00002092 else:
2093 tarinfo = member
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002094
2095 if tarinfo.isreg():
2096 return self.fileobject(self, tarinfo)
2097
2098 elif tarinfo.type not in SUPPORTED_TYPES:
2099 # If a member's type is unknown, it is treated as a
2100 # regular file.
2101 return self.fileobject(self, tarinfo)
2102
2103 elif tarinfo.islnk() or tarinfo.issym():
2104 if isinstance(self.fileobj, _Stream):
2105 # A small but ugly workaround for the case that someone tries
2106 # to extract a (sym)link as a file-object from a non-seekable
2107 # stream of tar blocks.
Thomas Wouters477c8d52006-05-27 19:21:47 +00002108 raise StreamError("cannot extract (sym)link as file object")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002109 else:
Georg Brandl7eb4b7d2005-07-22 21:49:32 +00002110 # A (sym)link's file object is its target's file object.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002111 return self.extractfile(self._getmember(tarinfo.linkname,
2112 tarinfo))
2113 else:
2114 # If there's no data associated with the member (directory, chrdev,
2115 # blkdev, etc.), return None instead of a file object.
2116 return None
2117
2118 def _extract_member(self, tarinfo, targetpath):
2119 """Extract the TarInfo object tarinfo to a physical
2120 file called targetpath.
2121 """
2122 # Fetch the TarInfo object for the given name
2123 # and build the destination pathname, replacing
2124 # forward slashes to platform specific separators.
2125 if targetpath[-1:] == "/":
2126 targetpath = targetpath[:-1]
2127 targetpath = os.path.normpath(targetpath)
2128
2129 # Create all upper directories.
2130 upperdirs = os.path.dirname(targetpath)
2131 if upperdirs and not os.path.exists(upperdirs):
Thomas Woutersb2137042007-02-01 18:02:27 +00002132 os.makedirs(upperdirs)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002133
2134 if tarinfo.islnk() or tarinfo.issym():
2135 self._dbg(1, "%s -> %s" % (tarinfo.name, tarinfo.linkname))
2136 else:
2137 self._dbg(1, tarinfo.name)
2138
2139 if tarinfo.isreg():
2140 self.makefile(tarinfo, targetpath)
2141 elif tarinfo.isdir():
2142 self.makedir(tarinfo, targetpath)
2143 elif tarinfo.isfifo():
2144 self.makefifo(tarinfo, targetpath)
2145 elif tarinfo.ischr() or tarinfo.isblk():
2146 self.makedev(tarinfo, targetpath)
2147 elif tarinfo.islnk() or tarinfo.issym():
2148 self.makelink(tarinfo, targetpath)
2149 elif tarinfo.type not in SUPPORTED_TYPES:
2150 self.makeunknown(tarinfo, targetpath)
2151 else:
2152 self.makefile(tarinfo, targetpath)
2153
2154 self.chown(tarinfo, targetpath)
2155 if not tarinfo.issym():
2156 self.chmod(tarinfo, targetpath)
2157 self.utime(tarinfo, targetpath)
2158
2159 #--------------------------------------------------------------------------
2160 # Below are the different file methods. They are called via
2161 # _extract_member() when extract() is called. They can be replaced in a
2162 # subclass to implement other functionality.
2163
2164 def makedir(self, tarinfo, targetpath):
2165 """Make a directory called targetpath.
2166 """
2167 try:
2168 os.mkdir(targetpath)
Guido van Rossumb940e112007-01-10 16:19:56 +00002169 except EnvironmentError as e:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002170 if e.errno != errno.EEXIST:
2171 raise
2172
2173 def makefile(self, tarinfo, targetpath):
2174 """Make a file called targetpath.
2175 """
2176 source = self.extractfile(tarinfo)
Guido van Rossume7ba4952007-06-06 23:52:48 +00002177 target = bltn_open(targetpath, "wb")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002178 copyfileobj(source, target)
2179 source.close()
2180 target.close()
2181
2182 def makeunknown(self, tarinfo, targetpath):
2183 """Make a file from a TarInfo object with an unknown type
2184 at targetpath.
2185 """
2186 self.makefile(tarinfo, targetpath)
2187 self._dbg(1, "tarfile: Unknown file type %r, " \
2188 "extracted as regular file." % tarinfo.type)
2189
2190 def makefifo(self, tarinfo, targetpath):
2191 """Make a fifo called targetpath.
2192 """
2193 if hasattr(os, "mkfifo"):
2194 os.mkfifo(targetpath)
2195 else:
Thomas Wouters477c8d52006-05-27 19:21:47 +00002196 raise ExtractError("fifo not supported by system")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002197
2198 def makedev(self, tarinfo, targetpath):
2199 """Make a character or block device called targetpath.
2200 """
2201 if not hasattr(os, "mknod") or not hasattr(os, "makedev"):
Thomas Wouters477c8d52006-05-27 19:21:47 +00002202 raise ExtractError("special devices not supported by system")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002203
2204 mode = tarinfo.mode
2205 if tarinfo.isblk():
2206 mode |= stat.S_IFBLK
2207 else:
2208 mode |= stat.S_IFCHR
2209
2210 os.mknod(targetpath, mode,
2211 os.makedev(tarinfo.devmajor, tarinfo.devminor))
2212
2213 def makelink(self, tarinfo, targetpath):
2214 """Make a (symbolic) link called targetpath. If it cannot be created
2215 (platform limitation), we try to make a copy of the referenced file
2216 instead of a link.
2217 """
2218 linkpath = tarinfo.linkname
2219 try:
2220 if tarinfo.issym():
2221 os.symlink(linkpath, targetpath)
2222 else:
Neal Norwitza4f651a2004-07-20 22:07:44 +00002223 # See extract().
2224 os.link(tarinfo._link_target, targetpath)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002225 except AttributeError:
2226 if tarinfo.issym():
2227 linkpath = os.path.join(os.path.dirname(tarinfo.name),
2228 linkpath)
2229 linkpath = normpath(linkpath)
2230
2231 try:
2232 self._extract_member(self.getmember(linkpath), targetpath)
Guido van Rossumb940e112007-01-10 16:19:56 +00002233 except (EnvironmentError, KeyError) as e:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002234 linkpath = os.path.normpath(linkpath)
2235 try:
2236 shutil.copy2(linkpath, targetpath)
Guido van Rossumb940e112007-01-10 16:19:56 +00002237 except EnvironmentError as e:
Thomas Wouters477c8d52006-05-27 19:21:47 +00002238 raise IOError("link could not be created")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002239
2240 def chown(self, tarinfo, targetpath):
2241 """Set owner of targetpath according to tarinfo.
2242 """
2243 if pwd and hasattr(os, "geteuid") and os.geteuid() == 0:
2244 # We have to be root to do so.
2245 try:
2246 g = grp.getgrnam(tarinfo.gname)[2]
2247 except KeyError:
2248 try:
2249 g = grp.getgrgid(tarinfo.gid)[2]
2250 except KeyError:
2251 g = os.getgid()
2252 try:
2253 u = pwd.getpwnam(tarinfo.uname)[2]
2254 except KeyError:
2255 try:
2256 u = pwd.getpwuid(tarinfo.uid)[2]
2257 except KeyError:
2258 u = os.getuid()
2259 try:
2260 if tarinfo.issym() and hasattr(os, "lchown"):
2261 os.lchown(targetpath, u, g)
2262 else:
Andrew MacIntyre7970d202003-02-19 12:51:34 +00002263 if sys.platform != "os2emx":
2264 os.chown(targetpath, u, g)
Guido van Rossumb940e112007-01-10 16:19:56 +00002265 except EnvironmentError as e:
Thomas Wouters477c8d52006-05-27 19:21:47 +00002266 raise ExtractError("could not change owner")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002267
2268 def chmod(self, tarinfo, targetpath):
2269 """Set file permissions of targetpath according to tarinfo.
2270 """
Jack Jansen834eff62003-03-07 12:47:06 +00002271 if hasattr(os, 'chmod'):
2272 try:
2273 os.chmod(targetpath, tarinfo.mode)
Guido van Rossumb940e112007-01-10 16:19:56 +00002274 except EnvironmentError as e:
Thomas Wouters477c8d52006-05-27 19:21:47 +00002275 raise ExtractError("could not change mode")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002276
2277 def utime(self, tarinfo, targetpath):
2278 """Set modification time of targetpath according to tarinfo.
2279 """
Jack Jansen834eff62003-03-07 12:47:06 +00002280 if not hasattr(os, 'utime'):
Tim Petersf9347782003-03-07 15:36:41 +00002281 return
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002282 if sys.platform == "win32" and tarinfo.isdir():
2283 # According to msdn.microsoft.com, it is an error (EACCES)
2284 # to use utime() on directories.
2285 return
2286 try:
2287 os.utime(targetpath, (tarinfo.mtime, tarinfo.mtime))
Guido van Rossumb940e112007-01-10 16:19:56 +00002288 except EnvironmentError as e:
Thomas Wouters477c8d52006-05-27 19:21:47 +00002289 raise ExtractError("could not change modification time")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002290
2291 #--------------------------------------------------------------------------
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002292 def next(self):
2293 """Return the next member of the archive as a TarInfo object, when
2294 TarFile is opened for reading. Return None if there is no more
2295 available.
2296 """
2297 self._check("ra")
2298 if self.firstmember is not None:
2299 m = self.firstmember
2300 self.firstmember = None
2301 return m
2302
2303 # Read the next block.
Andrew M. Kuchling864bba12004-07-10 22:02:11 +00002304 self.fileobj.seek(self.offset)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002305 while True:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002306 try:
Guido van Rossumd8faa362007-04-27 19:54:29 +00002307 tarinfo = self.tarinfo.fromtarfile(self)
2308 if tarinfo is None:
2309 return
2310 self.members.append(tarinfo)
Thomas Wouters477c8d52006-05-27 19:21:47 +00002311
Guido van Rossumb940e112007-01-10 16:19:56 +00002312 except HeaderError as e:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002313 if self.ignore_zeros:
Thomas Wouters902d6eb2007-01-09 23:18:33 +00002314 self._dbg(2, "0x%X: %s" % (self.offset, e))
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002315 self.offset += BLOCKSIZE
2316 continue
2317 else:
Andrew M. Kuchling864bba12004-07-10 22:02:11 +00002318 if self.offset == 0:
Thomas Wouters902d6eb2007-01-09 23:18:33 +00002319 raise ReadError(str(e))
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002320 return None
2321 break
2322
Thomas Wouters477c8d52006-05-27 19:21:47 +00002323 return tarinfo
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002324
2325 #--------------------------------------------------------------------------
2326 # Little helper methods:
2327
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002328 def _getmember(self, name, tarinfo=None):
2329 """Find an archive member by name from bottom to top.
2330 If tarinfo is given, it is used as the starting point.
2331 """
Martin v. Löwisf3c56112004-09-18 09:08:52 +00002332 # Ensure that all members have been loaded.
2333 members = self.getmembers()
2334
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002335 if tarinfo is None:
Martin v. Löwisf3c56112004-09-18 09:08:52 +00002336 end = len(members)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002337 else:
Martin v. Löwisf3c56112004-09-18 09:08:52 +00002338 end = members.index(tarinfo)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002339
Guido van Rossum805365e2007-05-07 22:24:25 +00002340 for i in range(end - 1, -1, -1):
Martin v. Löwisf3c56112004-09-18 09:08:52 +00002341 if name == members[i].name:
2342 return members[i]
Andrew M. Kuchling864bba12004-07-10 22:02:11 +00002343
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002344 def _load(self):
2345 """Read through the entire archive file and look for readable
2346 members.
2347 """
2348 while True:
2349 tarinfo = self.next()
2350 if tarinfo is None:
2351 break
2352 self._loaded = True
2353
2354 def _check(self, mode=None):
2355 """Check if TarFile is still open, and if the operation's mode
2356 corresponds to TarFile's mode.
2357 """
2358 if self.closed:
Thomas Wouters477c8d52006-05-27 19:21:47 +00002359 raise IOError("%s is closed" % self.__class__.__name__)
Guido van Rossumd8faa362007-04-27 19:54:29 +00002360 if mode is not None and self.mode not in mode:
2361 raise IOError("bad operation for mode %r" % self.mode)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002362
2363 def __iter__(self):
2364 """Provide an iterator object.
2365 """
2366 if self._loaded:
2367 return iter(self.members)
2368 else:
2369 return TarIter(self)
2370
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002371 def _dbg(self, level, msg):
2372 """Write debugging output to sys.stderr.
2373 """
2374 if level <= self.debug:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00002375 print(msg, file=sys.stderr)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002376# class TarFile
2377
2378class TarIter:
2379 """Iterator Class.
2380
2381 for tarinfo in TarFile(...):
2382 suite...
2383 """
2384
2385 def __init__(self, tarfile):
2386 """Construct a TarIter object.
2387 """
2388 self.tarfile = tarfile
Martin v. Löwis637431b2005-03-03 23:12:42 +00002389 self.index = 0
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002390 def __iter__(self):
2391 """Return iterator object.
2392 """
2393 return self
Georg Brandla18af4e2007-04-21 15:47:16 +00002394 def __next__(self):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002395 """Return the next item using TarFile's next() method.
2396 When all members have been read, set TarFile as _loaded.
2397 """
Martin v. Löwis637431b2005-03-03 23:12:42 +00002398 # Fix for SF #1100429: Under rare circumstances it can
2399 # happen that getmembers() is called during iteration,
2400 # which will cause TarIter to stop prematurely.
2401 if not self.tarfile._loaded:
2402 tarinfo = self.tarfile.next()
2403 if not tarinfo:
2404 self.tarfile._loaded = True
2405 raise StopIteration
2406 else:
2407 try:
2408 tarinfo = self.tarfile.members[self.index]
2409 except IndexError:
2410 raise StopIteration
2411 self.index += 1
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002412 return tarinfo
2413
2414# Helper classes for sparse file support
2415class _section:
2416 """Base class for _data and _hole.
2417 """
2418 def __init__(self, offset, size):
2419 self.offset = offset
2420 self.size = size
2421 def __contains__(self, offset):
2422 return self.offset <= offset < self.offset + self.size
2423
2424class _data(_section):
2425 """Represent a data section in a sparse file.
2426 """
2427 def __init__(self, offset, size, realpos):
2428 _section.__init__(self, offset, size)
2429 self.realpos = realpos
2430
2431class _hole(_section):
2432 """Represent a hole section in a sparse file.
2433 """
2434 pass
2435
2436class _ringbuffer(list):
2437 """Ringbuffer class which increases performance
2438 over a regular list.
2439 """
2440 def __init__(self):
2441 self.idx = 0
2442 def find(self, offset):
2443 idx = self.idx
2444 while True:
2445 item = self[idx]
2446 if offset in item:
2447 break
2448 idx += 1
2449 if idx == len(self):
2450 idx = 0
2451 if idx == self.idx:
2452 # End of File
2453 return None
2454 self.idx = idx
2455 return item
2456
2457#---------------------------------------------
2458# zipfile compatible TarFile class
2459#---------------------------------------------
2460TAR_PLAIN = 0 # zipfile.ZIP_STORED
2461TAR_GZIPPED = 8 # zipfile.ZIP_DEFLATED
2462class TarFileCompat:
2463 """TarFile class compatible with standard module zipfile's
2464 ZipFile class.
2465 """
2466 def __init__(self, file, mode="r", compression=TAR_PLAIN):
2467 if compression == TAR_PLAIN:
2468 self.tarfile = TarFile.taropen(file, mode)
2469 elif compression == TAR_GZIPPED:
2470 self.tarfile = TarFile.gzopen(file, mode)
2471 else:
Thomas Wouters477c8d52006-05-27 19:21:47 +00002472 raise ValueError("unknown compression constant")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002473 if mode[0:1] == "r":
2474 members = self.tarfile.getmembers()
Raymond Hettingera1d09e22005-09-11 16:34:05 +00002475 for m in members:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002476 m.filename = m.name
2477 m.file_size = m.size
2478 m.date_time = time.gmtime(m.mtime)[:6]
2479 def namelist(self):
2480 return map(lambda m: m.name, self.infolist())
2481 def infolist(self):
2482 return filter(lambda m: m.type in REGULAR_TYPES,
2483 self.tarfile.getmembers())
2484 def printdir(self):
2485 self.tarfile.list()
2486 def testzip(self):
2487 return
2488 def getinfo(self, name):
2489 return self.tarfile.getmember(name)
2490 def read(self, name):
2491 return self.tarfile.extractfile(self.tarfile.getmember(name)).read()
2492 def write(self, filename, arcname=None, compress_type=None):
2493 self.tarfile.add(filename, arcname)
2494 def writestr(self, zinfo, bytes):
Guido van Rossum68937b42007-05-18 00:51:22 +00002495 from io import StringIO
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002496 import calendar
2497 zinfo.name = zinfo.filename
2498 zinfo.size = zinfo.file_size
2499 zinfo.mtime = calendar.timegm(zinfo.date_time)
Raymond Hettingera6172712004-12-31 19:15:26 +00002500 self.tarfile.addfile(zinfo, StringIO(bytes))
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002501 def close(self):
2502 self.tarfile.close()
2503#class TarFileCompat
2504
2505#--------------------
2506# exported functions
2507#--------------------
2508def is_tarfile(name):
2509 """Return True if name points to a tar archive that we
2510 are able to handle, else return False.
2511 """
2512 try:
2513 t = open(name)
2514 t.close()
2515 return True
2516 except TarError:
2517 return False
2518
Guido van Rossume7ba4952007-06-06 23:52:48 +00002519bltn_open = open
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002520open = TarFile.open