blob: d238063949e4273afb002236995a6a1409b965b2 [file] [log] [blame]
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001#!/usr/bin/env python
2# -*- coding: iso-8859-1 -*-
3#-------------------------------------------------------------------
4# tarfile.py
5#-------------------------------------------------------------------
6# Copyright (C) 2002 Lars Gustäbel <lars@gustaebel.de>
7# All rights reserved.
8#
9# Permission is hereby granted, free of charge, to any person
10# obtaining a copy of this software and associated documentation
11# files (the "Software"), to deal in the Software without
12# restriction, including without limitation the rights to use,
13# copy, modify, merge, publish, distribute, sublicense, and/or sell
14# copies of the Software, and to permit persons to whom the
15# Software is furnished to do so, subject to the following
16# conditions:
17#
18# The above copyright notice and this permission notice shall be
19# included in all copies or substantial portions of the Software.
20#
21# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
22# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
23# OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
24# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
25# HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
26# WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
27# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
28# OTHER DEALINGS IN THE SOFTWARE.
29#
30"""Read from and write to tar format archives.
31"""
32
33__version__ = "$Revision$"
34# $Source$
35
Thomas Wouters477c8d52006-05-27 19:21:47 +000036version = "0.8.0"
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +000037__author__ = "Lars Gustäbel (lars@gustaebel.de)"
38__date__ = "$Date$"
39__cvsid__ = "$Id$"
40__credits__ = "Gustavo Niemeyer, Niels Gustäbel, Richard Townsend."
41
42#---------
43# Imports
44#---------
45import sys
46import os
47import shutil
48import stat
49import errno
50import time
51import struct
52
Jack Jansencfc49022003-03-07 13:37:32 +000053if sys.platform == 'mac':
54 # This module needs work for MacOS9, especially in the area of pathname
55 # handling. In many places it is assumed a simple substitution of / by the
56 # local os.path.sep is good enough to convert pathnames, but this does not
57 # work with the mac rooted:path:name versus :nonrooted:path:name syntax
58 raise ImportError, "tarfile does not work for platform==mac"
59
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +000060try:
61 import grp, pwd
62except ImportError:
63 grp = pwd = None
64
65# from tarfile import *
66__all__ = ["TarFile", "TarInfo", "is_tarfile", "TarError"]
67
Guido van Rossum8f78fe92006-08-24 04:03:53 +000068from __builtin__ import open as _open # Since 'open' is TarFile.open
69
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +000070#---------------------------------------------------------
71# tar constants
72#---------------------------------------------------------
73NUL = "\0" # the null character
74BLOCKSIZE = 512 # length of processing blocks
75RECORDSIZE = BLOCKSIZE * 20 # length of records
76MAGIC = "ustar" # magic tar string
77VERSION = "00" # version number
78
79LENGTH_NAME = 100 # maximum length of a filename
80LENGTH_LINK = 100 # maximum length of a linkname
81LENGTH_PREFIX = 155 # maximum length of the prefix field
82MAXSIZE_MEMBER = 077777777777L # maximum size of a file (11 octal digits)
83
84REGTYPE = "0" # regular file
85AREGTYPE = "\0" # regular file
86LNKTYPE = "1" # link (inside tarfile)
87SYMTYPE = "2" # symbolic link
88CHRTYPE = "3" # character special device
89BLKTYPE = "4" # block special device
90DIRTYPE = "5" # directory
91FIFOTYPE = "6" # fifo special device
92CONTTYPE = "7" # contiguous file
93
94GNUTYPE_LONGNAME = "L" # GNU tar extension for longnames
95GNUTYPE_LONGLINK = "K" # GNU tar extension for longlink
96GNUTYPE_SPARSE = "S" # GNU tar extension for sparse file
97
98#---------------------------------------------------------
99# tarfile constants
100#---------------------------------------------------------
101SUPPORTED_TYPES = (REGTYPE, AREGTYPE, LNKTYPE, # file types that tarfile
102 SYMTYPE, DIRTYPE, FIFOTYPE, # can cope with.
103 CONTTYPE, CHRTYPE, BLKTYPE,
104 GNUTYPE_LONGNAME, GNUTYPE_LONGLINK,
105 GNUTYPE_SPARSE)
106
107REGULAR_TYPES = (REGTYPE, AREGTYPE, # file types that somehow
108 CONTTYPE, GNUTYPE_SPARSE) # represent regular files
109
110#---------------------------------------------------------
111# Bits used in the mode field, values in octal.
112#---------------------------------------------------------
113S_IFLNK = 0120000 # symbolic link
114S_IFREG = 0100000 # regular file
115S_IFBLK = 0060000 # block device
116S_IFDIR = 0040000 # directory
117S_IFCHR = 0020000 # character device
118S_IFIFO = 0010000 # fifo
119
120TSUID = 04000 # set UID on execution
121TSGID = 02000 # set GID on execution
122TSVTX = 01000 # reserved
123
124TUREAD = 0400 # read by owner
125TUWRITE = 0200 # write by owner
126TUEXEC = 0100 # execute/search by owner
127TGREAD = 0040 # read by group
128TGWRITE = 0020 # write by group
129TGEXEC = 0010 # execute/search by group
130TOREAD = 0004 # read by other
131TOWRITE = 0002 # write by other
132TOEXEC = 0001 # execute/search by other
133
134#---------------------------------------------------------
135# Some useful functions
136#---------------------------------------------------------
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000137
Thomas Wouters477c8d52006-05-27 19:21:47 +0000138def stn(s, length):
139 """Convert a python string to a null-terminated string buffer.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000140 """
Thomas Wouters477c8d52006-05-27 19:21:47 +0000141 return s[:length-1] + (length - len(s) - 1) * NUL + NUL
142
143def nti(s):
144 """Convert a number field to a python number.
145 """
146 # There are two possible encodings for a number field, see
147 # itn() below.
148 if s[0] != chr(0200):
149 n = int(s.rstrip(NUL) or "0", 8)
150 else:
151 n = 0L
152 for i in xrange(len(s) - 1):
153 n <<= 8
154 n += ord(s[i + 1])
155 return n
156
157def itn(n, digits=8, posix=False):
158 """Convert a python number to a number field.
159 """
160 # POSIX 1003.1-1988 requires numbers to be encoded as a string of
161 # octal digits followed by a null-byte, this allows values up to
162 # (8**(digits-1))-1. GNU tar allows storing numbers greater than
163 # that if necessary. A leading 0200 byte indicates this particular
164 # encoding, the following digits-1 bytes are a big-endian
165 # representation. This allows values up to (256**(digits-1))-1.
166 if 0 <= n < 8 ** (digits - 1):
167 s = "%0*o" % (digits - 1, n) + NUL
168 else:
169 if posix:
170 raise ValueError("overflow in number field")
171
172 if n < 0:
173 # XXX We mimic GNU tar's behaviour with negative numbers,
174 # this could raise OverflowError.
175 n = struct.unpack("L", struct.pack("l", n))[0]
176
177 s = ""
178 for i in xrange(digits - 1):
179 s = chr(n & 0377) + s
180 n >>= 8
181 s = chr(0200) + s
182 return s
183
184def calc_chksums(buf):
185 """Calculate the checksum for a member's header by summing up all
186 characters except for the chksum field which is treated as if
187 it was filled with spaces. According to the GNU tar sources,
188 some tars (Sun and NeXT) calculate chksum with signed char,
189 which will be different if there are chars in the buffer with
190 the high bit set. So we calculate two checksums, unsigned and
191 signed.
192 """
193 unsigned_chksum = 256 + sum(struct.unpack("148B", buf[:148]) + struct.unpack("356B", buf[156:512]))
194 signed_chksum = 256 + sum(struct.unpack("148b", buf[:148]) + struct.unpack("356b", buf[156:512]))
195 return unsigned_chksum, signed_chksum
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000196
197def copyfileobj(src, dst, length=None):
198 """Copy length bytes from fileobj src to fileobj dst.
199 If length is None, copy the entire content.
200 """
201 if length == 0:
202 return
203 if length is None:
204 shutil.copyfileobj(src, dst)
205 return
206
207 BUFSIZE = 16 * 1024
208 blocks, remainder = divmod(length, BUFSIZE)
209 for b in xrange(blocks):
210 buf = src.read(BUFSIZE)
211 if len(buf) < BUFSIZE:
Thomas Wouters477c8d52006-05-27 19:21:47 +0000212 raise IOError("end of file reached")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000213 dst.write(buf)
214
215 if remainder != 0:
216 buf = src.read(remainder)
217 if len(buf) < remainder:
Thomas Wouters477c8d52006-05-27 19:21:47 +0000218 raise IOError("end of file reached")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000219 dst.write(buf)
220 return
221
222filemode_table = (
Andrew M. Kuchling8bc462f2004-10-20 11:48:42 +0000223 ((S_IFLNK, "l"),
224 (S_IFREG, "-"),
225 (S_IFBLK, "b"),
226 (S_IFDIR, "d"),
227 (S_IFCHR, "c"),
228 (S_IFIFO, "p")),
229
230 ((TUREAD, "r"),),
231 ((TUWRITE, "w"),),
232 ((TUEXEC|TSUID, "s"),
233 (TSUID, "S"),
234 (TUEXEC, "x")),
235
236 ((TGREAD, "r"),),
237 ((TGWRITE, "w"),),
238 ((TGEXEC|TSGID, "s"),
239 (TSGID, "S"),
240 (TGEXEC, "x")),
241
242 ((TOREAD, "r"),),
243 ((TOWRITE, "w"),),
244 ((TOEXEC|TSVTX, "t"),
245 (TSVTX, "T"),
246 (TOEXEC, "x"))
247)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000248
249def filemode(mode):
250 """Convert a file's mode to a string of the form
251 -rwxrwxrwx.
252 Used by TarFile.list()
253 """
Andrew M. Kuchling8bc462f2004-10-20 11:48:42 +0000254 perm = []
255 for table in filemode_table:
256 for bit, char in table:
257 if mode & bit == bit:
258 perm.append(char)
259 break
260 else:
261 perm.append("-")
262 return "".join(perm)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000263
264if os.sep != "/":
265 normpath = lambda path: os.path.normpath(path).replace(os.sep, "/")
266else:
267 normpath = os.path.normpath
268
269class TarError(Exception):
270 """Base exception."""
271 pass
272class ExtractError(TarError):
273 """General exception for extract errors."""
274 pass
275class ReadError(TarError):
276 """Exception for unreadble tar archives."""
277 pass
278class CompressionError(TarError):
279 """Exception for unavailable compression methods."""
280 pass
281class StreamError(TarError):
282 """Exception for unsupported operations on stream-like TarFiles."""
283 pass
284
285#---------------------------
286# internal stream interface
287#---------------------------
288class _LowLevelFile:
289 """Low-level file object. Supports reading and writing.
290 It is used instead of a regular file object for streaming
291 access.
292 """
293
294 def __init__(self, name, mode):
295 mode = {
296 "r": os.O_RDONLY,
297 "w": os.O_WRONLY | os.O_CREAT | os.O_TRUNC,
298 }[mode]
299 if hasattr(os, "O_BINARY"):
300 mode |= os.O_BINARY
301 self.fd = os.open(name, mode)
302
303 def close(self):
304 os.close(self.fd)
305
306 def read(self, size):
307 return os.read(self.fd, size)
308
309 def write(self, s):
310 os.write(self.fd, s)
311
312class _Stream:
313 """Class that serves as an adapter between TarFile and
314 a stream-like object. The stream-like object only
315 needs to have a read() or write() method and is accessed
316 blockwise. Use of gzip or bzip2 compression is possible.
317 A stream-like object could be for example: sys.stdin,
318 sys.stdout, a socket, a tape device etc.
319
320 _Stream is intended to be used only internally.
321 """
322
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000323 def __init__(self, name, mode, comptype, fileobj, bufsize):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000324 """Construct a _Stream object.
325 """
326 self._extfileobj = True
327 if fileobj is None:
328 fileobj = _LowLevelFile(name, mode)
329 self._extfileobj = False
330
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000331 if comptype == '*':
332 # Enable transparent compression detection for the
333 # stream interface
334 fileobj = _StreamProxy(fileobj)
335 comptype = fileobj.getcomptype()
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000336
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000337 self.name = name or ""
338 self.mode = mode
339 self.comptype = comptype
340 self.fileobj = fileobj
341 self.bufsize = bufsize
342 self.buf = ""
343 self.pos = 0L
344 self.closed = False
345
346 if comptype == "gz":
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000347 try:
348 import zlib
349 except ImportError:
Thomas Wouters477c8d52006-05-27 19:21:47 +0000350 raise CompressionError("zlib module is not available")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000351 self.zlib = zlib
352 self.crc = zlib.crc32("")
353 if mode == "r":
354 self._init_read_gz()
355 else:
356 self._init_write_gz()
357
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000358 if comptype == "bz2":
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000359 try:
360 import bz2
361 except ImportError:
Thomas Wouters477c8d52006-05-27 19:21:47 +0000362 raise CompressionError("bz2 module is not available")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000363 if mode == "r":
364 self.dbuf = ""
365 self.cmp = bz2.BZ2Decompressor()
366 else:
367 self.cmp = bz2.BZ2Compressor()
368
369 def __del__(self):
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000370 if hasattr(self, "closed") and not self.closed:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000371 self.close()
372
373 def _init_write_gz(self):
374 """Initialize for writing with gzip compression.
375 """
376 self.cmp = self.zlib.compressobj(9, self.zlib.DEFLATED,
377 -self.zlib.MAX_WBITS,
378 self.zlib.DEF_MEM_LEVEL,
379 0)
380 timestamp = struct.pack("<L", long(time.time()))
381 self.__write("\037\213\010\010%s\002\377" % timestamp)
382 if self.name.endswith(".gz"):
383 self.name = self.name[:-3]
384 self.__write(self.name + NUL)
385
386 def write(self, s):
387 """Write string s to the stream.
388 """
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000389 if self.comptype == "gz":
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000390 self.crc = self.zlib.crc32(s, self.crc)
391 self.pos += len(s)
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000392 if self.comptype != "tar":
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000393 s = self.cmp.compress(s)
394 self.__write(s)
395
396 def __write(self, s):
397 """Write string s to the stream if a whole new block
398 is ready to be written.
399 """
400 self.buf += s
401 while len(self.buf) > self.bufsize:
402 self.fileobj.write(self.buf[:self.bufsize])
403 self.buf = self.buf[self.bufsize:]
404
405 def close(self):
406 """Close the _Stream object. No operation should be
407 done on it afterwards.
408 """
409 if self.closed:
410 return
411
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000412 if self.mode == "w" and self.comptype != "tar":
Martin v. Löwisc234a522004-08-22 21:28:33 +0000413 self.buf += self.cmp.flush()
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000414
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000415 if self.mode == "w" and self.buf:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000416 self.fileobj.write(self.buf)
417 self.buf = ""
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000418 if self.comptype == "gz":
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000419 # The native zlib crc is an unsigned 32-bit integer, but
420 # the Python wrapper implicitly casts that to a signed C
421 # long. So, on a 32-bit box self.crc may "look negative",
422 # while the same crc on a 64-bit box may "look positive".
423 # To avoid irksome warnings from the `struct` module, force
424 # it to look positive on all boxes.
425 self.fileobj.write(struct.pack("<L", self.crc & 0xffffffffL))
Andrew M. Kuchling10a44492003-10-24 17:38:34 +0000426 self.fileobj.write(struct.pack("<L", self.pos & 0xffffFFFFL))
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000427
428 if not self._extfileobj:
429 self.fileobj.close()
430
431 self.closed = True
432
433 def _init_read_gz(self):
434 """Initialize for reading a gzip compressed fileobj.
435 """
436 self.cmp = self.zlib.decompressobj(-self.zlib.MAX_WBITS)
437 self.dbuf = ""
438
439 # taken from gzip.GzipFile with some alterations
440 if self.__read(2) != "\037\213":
Thomas Wouters477c8d52006-05-27 19:21:47 +0000441 raise ReadError("not a gzip file")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000442 if self.__read(1) != "\010":
Thomas Wouters477c8d52006-05-27 19:21:47 +0000443 raise CompressionError("unsupported compression method")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000444
445 flag = ord(self.__read(1))
446 self.__read(6)
447
448 if flag & 4:
449 xlen = ord(self.__read(1)) + 256 * ord(self.__read(1))
450 self.read(xlen)
451 if flag & 8:
452 while True:
453 s = self.__read(1)
454 if not s or s == NUL:
455 break
456 if flag & 16:
457 while True:
458 s = self.__read(1)
459 if not s or s == NUL:
460 break
461 if flag & 2:
462 self.__read(2)
463
464 def tell(self):
465 """Return the stream's file pointer position.
466 """
467 return self.pos
468
469 def seek(self, pos=0):
470 """Set the stream's file pointer to pos. Negative seeking
471 is forbidden.
472 """
473 if pos - self.pos >= 0:
474 blocks, remainder = divmod(pos - self.pos, self.bufsize)
475 for i in xrange(blocks):
476 self.read(self.bufsize)
477 self.read(remainder)
478 else:
Thomas Wouters477c8d52006-05-27 19:21:47 +0000479 raise StreamError("seeking backwards is not allowed")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000480 return self.pos
481
482 def read(self, size=None):
483 """Return the next size number of bytes from the stream.
484 If size is not defined, return all bytes of the stream
485 up to EOF.
486 """
487 if size is None:
488 t = []
489 while True:
490 buf = self._read(self.bufsize)
491 if not buf:
492 break
493 t.append(buf)
494 buf = "".join(t)
495 else:
496 buf = self._read(size)
497 self.pos += len(buf)
498 return buf
499
500 def _read(self, size):
501 """Return size bytes from the stream.
502 """
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000503 if self.comptype == "tar":
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000504 return self.__read(size)
505
506 c = len(self.dbuf)
507 t = [self.dbuf]
508 while c < size:
509 buf = self.__read(self.bufsize)
510 if not buf:
511 break
512 buf = self.cmp.decompress(buf)
513 t.append(buf)
514 c += len(buf)
515 t = "".join(t)
516 self.dbuf = t[size:]
517 return t[:size]
518
519 def __read(self, size):
520 """Return size bytes from stream. If internal buffer is empty,
521 read another block from the stream.
522 """
523 c = len(self.buf)
524 t = [self.buf]
525 while c < size:
526 buf = self.fileobj.read(self.bufsize)
527 if not buf:
528 break
529 t.append(buf)
530 c += len(buf)
531 t = "".join(t)
532 self.buf = t[size:]
533 return t[:size]
534# class _Stream
535
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000536class _StreamProxy(object):
537 """Small proxy class that enables transparent compression
538 detection for the Stream interface (mode 'r|*').
539 """
540
541 def __init__(self, fileobj):
542 self.fileobj = fileobj
543 self.buf = self.fileobj.read(BLOCKSIZE)
544
545 def read(self, size):
546 self.read = self.fileobj.read
547 return self.buf
548
549 def getcomptype(self):
550 if self.buf.startswith("\037\213\010"):
551 return "gz"
552 if self.buf.startswith("BZh91"):
553 return "bz2"
554 return "tar"
555
556 def close(self):
557 self.fileobj.close()
558# class StreamProxy
559
Thomas Wouters477c8d52006-05-27 19:21:47 +0000560class _BZ2Proxy(object):
561 """Small proxy class that enables external file object
562 support for "r:bz2" and "w:bz2" modes. This is actually
563 a workaround for a limitation in bz2 module's BZ2File
564 class which (unlike gzip.GzipFile) has no support for
565 a file object argument.
566 """
567
568 blocksize = 16 * 1024
569
570 def __init__(self, fileobj, mode):
571 self.fileobj = fileobj
572 self.mode = mode
573 self.init()
574
575 def init(self):
576 import bz2
577 self.pos = 0
578 if self.mode == "r":
579 self.bz2obj = bz2.BZ2Decompressor()
580 self.fileobj.seek(0)
581 self.buf = ""
582 else:
583 self.bz2obj = bz2.BZ2Compressor()
584
585 def read(self, size):
586 b = [self.buf]
587 x = len(self.buf)
588 while x < size:
589 try:
590 raw = self.fileobj.read(self.blocksize)
591 data = self.bz2obj.decompress(raw)
592 b.append(data)
593 except EOFError:
594 break
595 x += len(data)
596 self.buf = "".join(b)
597
598 buf = self.buf[:size]
599 self.buf = self.buf[size:]
600 self.pos += len(buf)
601 return buf
602
603 def seek(self, pos):
604 if pos < self.pos:
605 self.init()
606 self.read(pos - self.pos)
607
608 def tell(self):
609 return self.pos
610
611 def write(self, data):
612 self.pos += len(data)
613 raw = self.bz2obj.compress(data)
614 self.fileobj.write(raw)
615
616 def close(self):
617 if self.mode == "w":
618 raw = self.bz2obj.flush()
619 self.fileobj.write(raw)
620 self.fileobj.close()
621# class _BZ2Proxy
622
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000623#------------------------
624# Extraction file object
625#------------------------
626class ExFileObject(object):
627 """File-like object for reading an archive member.
628 Is returned by TarFile.extractfile(). Support for
629 sparse files included.
630 """
631
632 def __init__(self, tarfile, tarinfo):
633 self.fileobj = tarfile.fileobj
634 self.name = tarinfo.name
635 self.mode = "r"
636 self.closed = False
637 self.offset = tarinfo.offset_data
638 self.size = tarinfo.size
639 self.pos = 0L
640 self.linebuffer = ""
641 if tarinfo.issparse():
642 self.sparse = tarinfo.sparse
643 self.read = self._readsparse
644 else:
645 self.read = self._readnormal
646
647 def __read(self, size):
648 """Overloadable read method.
649 """
650 return self.fileobj.read(size)
651
652 def readline(self, size=-1):
653 """Read a line with approx. size. If size is negative,
654 read a whole line. readline() and read() must not
655 be mixed up (!).
656 """
657 if size < 0:
658 size = sys.maxint
659
660 nl = self.linebuffer.find("\n")
661 if nl >= 0:
662 nl = min(nl, size)
663 else:
664 size -= len(self.linebuffer)
Martin v. Löwisc11d6f12004-08-25 10:52:58 +0000665 while (nl < 0 and size > 0):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000666 buf = self.read(min(size, 100))
667 if not buf:
668 break
669 self.linebuffer += buf
670 size -= len(buf)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000671 nl = self.linebuffer.find("\n")
672 if nl == -1:
673 s = self.linebuffer
674 self.linebuffer = ""
675 return s
676 buf = self.linebuffer[:nl]
677 self.linebuffer = self.linebuffer[nl + 1:]
678 while buf[-1:] == "\r":
679 buf = buf[:-1]
680 return buf + "\n"
681
682 def readlines(self):
683 """Return a list with all (following) lines.
684 """
685 result = []
686 while True:
687 line = self.readline()
688 if not line: break
689 result.append(line)
690 return result
691
692 def _readnormal(self, size=None):
693 """Read operation for regular files.
694 """
695 if self.closed:
Thomas Wouters477c8d52006-05-27 19:21:47 +0000696 raise ValueError("file is closed")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000697 self.fileobj.seek(self.offset + self.pos)
698 bytesleft = self.size - self.pos
699 if size is None:
700 bytestoread = bytesleft
701 else:
702 bytestoread = min(size, bytesleft)
703 self.pos += bytestoread
704 return self.__read(bytestoread)
705
706 def _readsparse(self, size=None):
707 """Read operation for sparse files.
708 """
709 if self.closed:
Thomas Wouters477c8d52006-05-27 19:21:47 +0000710 raise ValueError("file is closed")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000711
712 if size is None:
713 size = self.size - self.pos
714
715 data = []
716 while size > 0:
717 buf = self._readsparsesection(size)
718 if not buf:
719 break
720 size -= len(buf)
721 data.append(buf)
722 return "".join(data)
723
724 def _readsparsesection(self, size):
725 """Read a single section of a sparse file.
726 """
727 section = self.sparse.find(self.pos)
728
729 if section is None:
730 return ""
731
732 toread = min(size, section.offset + section.size - self.pos)
733 if isinstance(section, _data):
734 realpos = section.realpos + self.pos - section.offset
735 self.pos += toread
736 self.fileobj.seek(self.offset + realpos)
737 return self.__read(toread)
738 else:
739 self.pos += toread
740 return NUL * toread
741
742 def tell(self):
743 """Return the current file position.
744 """
745 return self.pos
746
747 def seek(self, pos, whence=0):
748 """Seek to a position in the file.
749 """
750 self.linebuffer = ""
751 if whence == 0:
752 self.pos = min(max(pos, 0), self.size)
753 if whence == 1:
754 if pos < 0:
755 self.pos = max(self.pos + pos, 0)
756 else:
757 self.pos = min(self.pos + pos, self.size)
758 if whence == 2:
759 self.pos = max(min(self.size + pos, self.size), 0)
760
761 def close(self):
762 """Close the file object.
763 """
764 self.closed = True
Martin v. Löwisdf241532005-03-03 08:17:42 +0000765
766 def __iter__(self):
767 """Get an iterator over the file object.
768 """
769 if self.closed:
770 raise ValueError("I/O operation on closed file")
771 return self
772
773 def next(self):
774 """Get the next item from the file iterator.
775 """
776 result = self.readline()
777 if not result:
778 raise StopIteration
779 return result
Tim Peterseba28be2005-03-28 01:08:02 +0000780
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000781#class ExFileObject
782
783#------------------
784# Exported Classes
785#------------------
786class TarInfo(object):
787 """Informational class which holds the details about an
788 archive member given by a tar header block.
789 TarInfo objects are returned by TarFile.getmember(),
790 TarFile.getmembers() and TarFile.gettarinfo() and are
791 usually created internally.
792 """
793
794 def __init__(self, name=""):
795 """Construct a TarInfo object. name is the optional name
796 of the member.
797 """
798
Thomas Wouters477c8d52006-05-27 19:21:47 +0000799 self.name = name # member name (dirnames must end with '/')
800 self.mode = 0666 # file permissions
801 self.uid = 0 # user id
802 self.gid = 0 # group id
803 self.size = 0 # file size
804 self.mtime = 0 # modification time
805 self.chksum = 0 # header checksum
806 self.type = REGTYPE # member type
807 self.linkname = "" # link name
808 self.uname = "user" # user name
809 self.gname = "group" # group name
810 self.devmajor = 0 # device major number
811 self.devminor = 0 # device minor number
812 self.prefix = "" # prefix to filename or information
813 # about sparse files
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000814
Thomas Wouters477c8d52006-05-27 19:21:47 +0000815 self.offset = 0 # the tar header starts here
816 self.offset_data = 0 # the file's data starts here
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000817
818 def __repr__(self):
819 return "<%s %r at %#x>" % (self.__class__.__name__,self.name,id(self))
820
Guido van Rossum75b64e62005-01-16 00:16:11 +0000821 @classmethod
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000822 def frombuf(cls, buf):
823 """Construct a TarInfo object from a 512 byte string buffer.
824 """
Thomas Wouters477c8d52006-05-27 19:21:47 +0000825 if len(buf) != BLOCKSIZE:
826 raise ValueError("truncated header")
827 if buf.count(NUL) == BLOCKSIZE:
828 raise ValueError("empty header")
829
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000830 tarinfo = cls()
Thomas Wouters477c8d52006-05-27 19:21:47 +0000831 tarinfo.buf = buf
832 tarinfo.name = buf[0:100].rstrip(NUL)
833 tarinfo.mode = nti(buf[100:108])
834 tarinfo.uid = nti(buf[108:116])
835 tarinfo.gid = nti(buf[116:124])
836 tarinfo.size = nti(buf[124:136])
837 tarinfo.mtime = nti(buf[136:148])
838 tarinfo.chksum = nti(buf[148:156])
839 tarinfo.type = buf[156:157]
840 tarinfo.linkname = buf[157:257].rstrip(NUL)
841 tarinfo.uname = buf[265:297].rstrip(NUL)
842 tarinfo.gname = buf[297:329].rstrip(NUL)
843 tarinfo.devmajor = nti(buf[329:337])
844 tarinfo.devminor = nti(buf[337:345])
Jack Jansen2b4b5a52003-04-22 22:03:11 +0000845 tarinfo.prefix = buf[345:500]
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000846
Thomas Wouters477c8d52006-05-27 19:21:47 +0000847 if tarinfo.chksum not in calc_chksums(buf):
848 raise ValueError("invalid header")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000849 return tarinfo
850
Thomas Wouters477c8d52006-05-27 19:21:47 +0000851 def tobuf(self, posix=False):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000852 """Return a tar header block as a 512 byte string.
853 """
Thomas Wouters477c8d52006-05-27 19:21:47 +0000854 parts = [
855 stn(self.name, 100),
856 itn(self.mode & 07777, 8, posix),
857 itn(self.uid, 8, posix),
858 itn(self.gid, 8, posix),
859 itn(self.size, 12, posix),
860 itn(self.mtime, 12, posix),
861 " ", # checksum field
862 self.type,
863 stn(self.linkname, 100),
864 stn(MAGIC, 6),
865 stn(VERSION, 2),
866 stn(self.uname, 32),
867 stn(self.gname, 32),
868 itn(self.devmajor, 8, posix),
869 itn(self.devminor, 8, posix),
870 stn(self.prefix, 155)
871 ]
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000872
Thomas Wouters477c8d52006-05-27 19:21:47 +0000873 buf = struct.pack("%ds" % BLOCKSIZE, "".join(parts))
874 chksum = calc_chksums(buf)[0]
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000875 buf = buf[:148] + "%06o\0" % chksum + buf[155:]
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000876 self.buf = buf
877 return buf
878
879 def isreg(self):
880 return self.type in REGULAR_TYPES
881 def isfile(self):
882 return self.isreg()
883 def isdir(self):
884 return self.type == DIRTYPE
885 def issym(self):
886 return self.type == SYMTYPE
887 def islnk(self):
888 return self.type == LNKTYPE
889 def ischr(self):
890 return self.type == CHRTYPE
891 def isblk(self):
892 return self.type == BLKTYPE
893 def isfifo(self):
894 return self.type == FIFOTYPE
895 def issparse(self):
896 return self.type == GNUTYPE_SPARSE
897 def isdev(self):
898 return self.type in (CHRTYPE, BLKTYPE, FIFOTYPE)
899# class TarInfo
900
901class TarFile(object):
902 """The TarFile Class provides an interface to tar archives.
903 """
904
905 debug = 0 # May be set from 0 (no msgs) to 3 (all msgs)
906
907 dereference = False # If true, add content of linked file to the
908 # tar file, else the link.
909
910 ignore_zeros = False # If true, skips empty or invalid blocks and
911 # continues processing.
912
913 errorlevel = 0 # If 0, fatal errors only appear in debug
914 # messages (if debug >= 0). If > 0, errors
915 # are passed to the caller as exceptions.
916
Martin v. Löwis75b9da42004-08-18 13:57:44 +0000917 posix = False # If True, generates POSIX.1-1990-compliant
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000918 # archives (no GNU extensions!)
919
920 fileobject = ExFileObject
921
922 def __init__(self, name=None, mode="r", fileobj=None):
923 """Open an (uncompressed) tar archive `name'. `mode' is either 'r' to
924 read from an existing archive, 'a' to append data to an existing
925 file or 'w' to create a new file overwriting an existing one. `mode'
926 defaults to 'r'.
927 If `fileobj' is given, it is used for reading or writing data. If it
928 can be determined, `mode' is overridden by `fileobj's mode.
929 `fileobj' is not closed, when TarFile is closed.
930 """
Martin v. Löwisfaffa152005-08-24 06:43:09 +0000931 self.name = name
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000932
933 if len(mode) > 1 or mode not in "raw":
Thomas Wouters477c8d52006-05-27 19:21:47 +0000934 raise ValueError("mode must be 'r', 'a' or 'w'")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000935 self._mode = mode
936 self.mode = {"r": "rb", "a": "r+b", "w": "wb"}[mode]
937
938 if not fileobj:
Guido van Rossum8f78fe92006-08-24 04:03:53 +0000939 fileobj = _open(self.name, self.mode)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000940 self._extfileobj = False
941 else:
942 if self.name is None and hasattr(fileobj, "name"):
Martin v. Löwisfaffa152005-08-24 06:43:09 +0000943 self.name = fileobj.name
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000944 if hasattr(fileobj, "mode"):
945 self.mode = fileobj.mode
946 self._extfileobj = True
947 self.fileobj = fileobj
948
949 # Init datastructures
Thomas Wouters477c8d52006-05-27 19:21:47 +0000950 self.closed = False
951 self.members = [] # list of members as TarInfo objects
952 self._loaded = False # flag if all members have been read
953 self.offset = 0L # current position in the archive file
954 self.inodes = {} # dictionary caching the inodes of
955 # archive members already added
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000956
957 if self._mode == "r":
958 self.firstmember = None
959 self.firstmember = self.next()
960
961 if self._mode == "a":
962 # Move to the end of the archive,
963 # before the first empty block.
964 self.firstmember = None
965 while True:
966 try:
967 tarinfo = self.next()
968 except ReadError:
969 self.fileobj.seek(0)
970 break
971 if tarinfo is None:
972 self.fileobj.seek(- BLOCKSIZE, 1)
973 break
974
975 if self._mode in "aw":
976 self._loaded = True
977
978 #--------------------------------------------------------------------------
979 # Below are the classmethods which act as alternate constructors to the
980 # TarFile class. The open() method is the only one that is needed for
981 # public use; it is the "super"-constructor and is able to select an
982 # adequate "sub"-constructor for a particular compression using the mapping
983 # from OPEN_METH.
984 #
985 # This concept allows one to subclass TarFile without losing the comfort of
986 # the super-constructor. A sub-constructor is registered and made available
987 # by adding it to the mapping in OPEN_METH.
988
Guido van Rossum75b64e62005-01-16 00:16:11 +0000989 @classmethod
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000990 def open(cls, name=None, mode="r", fileobj=None, bufsize=20*512):
991 """Open a tar archive for reading, writing or appending. Return
992 an appropriate TarFile class.
993
994 mode:
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000995 'r' or 'r:*' open for reading with transparent compression
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000996 'r:' open for reading exclusively uncompressed
997 'r:gz' open for reading with gzip compression
998 'r:bz2' open for reading with bzip2 compression
999 'a' or 'a:' open for appending
1000 'w' or 'w:' open for writing without compression
1001 'w:gz' open for writing with gzip compression
1002 'w:bz2' open for writing with bzip2 compression
Martin v. Löwis78be7df2005-03-05 12:47:42 +00001003
1004 'r|*' open a stream of tar blocks with transparent compression
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001005 'r|' open an uncompressed stream of tar blocks for reading
1006 'r|gz' open a gzip compressed stream of tar blocks
1007 'r|bz2' open a bzip2 compressed stream of tar blocks
1008 'w|' open an uncompressed stream for writing
1009 'w|gz' open a gzip compressed stream for writing
1010 'w|bz2' open a bzip2 compressed stream for writing
1011 """
1012
1013 if not name and not fileobj:
Thomas Wouters477c8d52006-05-27 19:21:47 +00001014 raise ValueError("nothing to open")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001015
Martin v. Löwis78be7df2005-03-05 12:47:42 +00001016 if mode in ("r", "r:*"):
1017 # Find out which *open() is appropriate for opening the file.
1018 for comptype in cls.OPEN_METH:
1019 func = getattr(cls, cls.OPEN_METH[comptype])
1020 try:
1021 return func(name, "r", fileobj)
1022 except (ReadError, CompressionError):
1023 continue
Thomas Wouters477c8d52006-05-27 19:21:47 +00001024 raise ReadError("file could not be opened successfully")
Martin v. Löwis78be7df2005-03-05 12:47:42 +00001025
1026 elif ":" in mode:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001027 filemode, comptype = mode.split(":", 1)
1028 filemode = filemode or "r"
1029 comptype = comptype or "tar"
1030
1031 # Select the *open() function according to
1032 # given compression.
1033 if comptype in cls.OPEN_METH:
1034 func = getattr(cls, cls.OPEN_METH[comptype])
1035 else:
Thomas Wouters477c8d52006-05-27 19:21:47 +00001036 raise CompressionError("unknown compression type %r" % comptype)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001037 return func(name, filemode, fileobj)
1038
1039 elif "|" in mode:
1040 filemode, comptype = mode.split("|", 1)
1041 filemode = filemode or "r"
1042 comptype = comptype or "tar"
1043
1044 if filemode not in "rw":
Thomas Wouters477c8d52006-05-27 19:21:47 +00001045 raise ValueError("mode must be 'r' or 'w'")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001046
1047 t = cls(name, filemode,
1048 _Stream(name, filemode, comptype, fileobj, bufsize))
1049 t._extfileobj = False
1050 return t
1051
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001052 elif mode in "aw":
1053 return cls.taropen(name, mode, fileobj)
1054
Thomas Wouters477c8d52006-05-27 19:21:47 +00001055 raise ValueError("undiscernible mode")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001056
Guido van Rossum75b64e62005-01-16 00:16:11 +00001057 @classmethod
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001058 def taropen(cls, name, mode="r", fileobj=None):
1059 """Open uncompressed tar archive name for reading or writing.
1060 """
1061 if len(mode) > 1 or mode not in "raw":
Thomas Wouters477c8d52006-05-27 19:21:47 +00001062 raise ValueError("mode must be 'r', 'a' or 'w'")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001063 return cls(name, mode, fileobj)
1064
Guido van Rossum75b64e62005-01-16 00:16:11 +00001065 @classmethod
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001066 def gzopen(cls, name, mode="r", fileobj=None, compresslevel=9):
1067 """Open gzip compressed tar archive name for reading or writing.
1068 Appending is not allowed.
1069 """
1070 if len(mode) > 1 or mode not in "rw":
Thomas Wouters477c8d52006-05-27 19:21:47 +00001071 raise ValueError("mode must be 'r' or 'w'")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001072
1073 try:
1074 import gzip
Neal Norwitz4ec68242003-04-11 03:05:56 +00001075 gzip.GzipFile
1076 except (ImportError, AttributeError):
Thomas Wouters477c8d52006-05-27 19:21:47 +00001077 raise CompressionError("gzip module is not available")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001078
1079 pre, ext = os.path.splitext(name)
Martin v. Löwisfaffa152005-08-24 06:43:09 +00001080 pre = os.path.basename(pre)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001081 if ext == ".tgz":
1082 ext = ".tar"
1083 if ext == ".gz":
1084 ext = ""
Martin v. Löwisfaffa152005-08-24 06:43:09 +00001085 tarname = pre + ext
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001086
1087 if fileobj is None:
Guido van Rossum8f78fe92006-08-24 04:03:53 +00001088 fileobj = _open(name, mode + "b")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001089
Martin v. Löwisfaffa152005-08-24 06:43:09 +00001090 if mode != "r":
1091 name = tarname
1092
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001093 try:
Martin v. Löwisfaffa152005-08-24 06:43:09 +00001094 t = cls.taropen(tarname, mode,
1095 gzip.GzipFile(name, mode, compresslevel, fileobj)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001096 )
1097 except IOError:
Thomas Wouters477c8d52006-05-27 19:21:47 +00001098 raise ReadError("not a gzip file")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001099 t._extfileobj = False
1100 return t
1101
Guido van Rossum75b64e62005-01-16 00:16:11 +00001102 @classmethod
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001103 def bz2open(cls, name, mode="r", fileobj=None, compresslevel=9):
1104 """Open bzip2 compressed tar archive name for reading or writing.
1105 Appending is not allowed.
1106 """
1107 if len(mode) > 1 or mode not in "rw":
Thomas Wouters477c8d52006-05-27 19:21:47 +00001108 raise ValueError("mode must be 'r' or 'w'.")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001109
1110 try:
1111 import bz2
1112 except ImportError:
Thomas Wouters477c8d52006-05-27 19:21:47 +00001113 raise CompressionError("bz2 module is not available")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001114
Martin v. Löwisfaffa152005-08-24 06:43:09 +00001115 pre, ext = os.path.splitext(name)
1116 pre = os.path.basename(pre)
1117 if ext == ".tbz2":
1118 ext = ".tar"
1119 if ext == ".bz2":
1120 ext = ""
1121 tarname = pre + ext
1122
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001123 if fileobj is not None:
Thomas Wouters477c8d52006-05-27 19:21:47 +00001124 fileobj = _BZ2Proxy(fileobj, mode)
1125 else:
1126 fileobj = bz2.BZ2File(name, mode, compresslevel=compresslevel)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001127
1128 try:
Thomas Wouters477c8d52006-05-27 19:21:47 +00001129 t = cls.taropen(tarname, mode, fileobj)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001130 except IOError:
Thomas Wouters477c8d52006-05-27 19:21:47 +00001131 raise ReadError("not a bzip2 file")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001132 t._extfileobj = False
1133 return t
1134
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001135 # All *open() methods are registered here.
1136 OPEN_METH = {
1137 "tar": "taropen", # uncompressed tar
1138 "gz": "gzopen", # gzip compressed tar
1139 "bz2": "bz2open" # bzip2 compressed tar
1140 }
1141
1142 #--------------------------------------------------------------------------
1143 # The public methods which TarFile provides:
1144
1145 def close(self):
1146 """Close the TarFile. In write-mode, two finishing zero blocks are
1147 appended to the archive.
1148 """
1149 if self.closed:
1150 return
1151
1152 if self._mode in "aw":
1153 self.fileobj.write(NUL * (BLOCKSIZE * 2))
1154 self.offset += (BLOCKSIZE * 2)
1155 # fill up the end with zero-blocks
1156 # (like option -b20 for tar does)
1157 blocks, remainder = divmod(self.offset, RECORDSIZE)
1158 if remainder > 0:
1159 self.fileobj.write(NUL * (RECORDSIZE - remainder))
1160
1161 if not self._extfileobj:
1162 self.fileobj.close()
1163 self.closed = True
1164
1165 def getmember(self, name):
1166 """Return a TarInfo object for member `name'. If `name' can not be
1167 found in the archive, KeyError is raised. If a member occurs more
1168 than once in the archive, its last occurence is assumed to be the
1169 most up-to-date version.
1170 """
Martin v. Löwisf3c56112004-09-18 09:08:52 +00001171 tarinfo = self._getmember(name)
1172 if tarinfo is None:
Thomas Wouters477c8d52006-05-27 19:21:47 +00001173 raise KeyError("filename %r not found" % name)
Martin v. Löwisf3c56112004-09-18 09:08:52 +00001174 return tarinfo
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001175
1176 def getmembers(self):
1177 """Return the members of the archive as a list of TarInfo objects. The
1178 list has the same order as the members in the archive.
1179 """
1180 self._check()
1181 if not self._loaded: # if we want to obtain a list of
1182 self._load() # all members, we first have to
1183 # scan the whole archive.
1184 return self.members
1185
1186 def getnames(self):
1187 """Return the members of the archive as a list of their names. It has
1188 the same order as the list returned by getmembers().
1189 """
Martin v. Löwisf3c56112004-09-18 09:08:52 +00001190 return [tarinfo.name for tarinfo in self.getmembers()]
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001191
1192 def gettarinfo(self, name=None, arcname=None, fileobj=None):
1193 """Create a TarInfo object for either the file `name' or the file
1194 object `fileobj' (using os.fstat on its file descriptor). You can
1195 modify some of the TarInfo's attributes before you add it using
1196 addfile(). If given, `arcname' specifies an alternative name for the
1197 file in the archive.
1198 """
1199 self._check("aw")
1200
1201 # When fileobj is given, replace name by
1202 # fileobj's real name.
1203 if fileobj is not None:
1204 name = fileobj.name
1205
1206 # Building the name of the member in the archive.
1207 # Backward slashes are converted to forward slashes,
1208 # Absolute paths are turned to relative paths.
1209 if arcname is None:
1210 arcname = name
1211 arcname = normpath(arcname)
1212 drv, arcname = os.path.splitdrive(arcname)
1213 while arcname[0:1] == "/":
1214 arcname = arcname[1:]
1215
1216 # Now, fill the TarInfo object with
1217 # information specific for the file.
1218 tarinfo = TarInfo()
1219
1220 # Use os.stat or os.lstat, depending on platform
1221 # and if symlinks shall be resolved.
1222 if fileobj is None:
1223 if hasattr(os, "lstat") and not self.dereference:
1224 statres = os.lstat(name)
1225 else:
1226 statres = os.stat(name)
1227 else:
1228 statres = os.fstat(fileobj.fileno())
1229 linkname = ""
1230
1231 stmd = statres.st_mode
1232 if stat.S_ISREG(stmd):
1233 inode = (statres.st_ino, statres.st_dev)
Neal Norwitzb0e32e22005-10-20 04:50:13 +00001234 if not self.dereference and \
1235 statres.st_nlink > 1 and inode in self.inodes:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001236 # Is it a hardlink to an already
1237 # archived file?
1238 type = LNKTYPE
1239 linkname = self.inodes[inode]
1240 else:
1241 # The inode is added only if its valid.
1242 # For win32 it is always 0.
1243 type = REGTYPE
1244 if inode[0]:
1245 self.inodes[inode] = arcname
1246 elif stat.S_ISDIR(stmd):
1247 type = DIRTYPE
1248 if arcname[-1:] != "/":
1249 arcname += "/"
1250 elif stat.S_ISFIFO(stmd):
1251 type = FIFOTYPE
1252 elif stat.S_ISLNK(stmd):
1253 type = SYMTYPE
1254 linkname = os.readlink(name)
1255 elif stat.S_ISCHR(stmd):
1256 type = CHRTYPE
1257 elif stat.S_ISBLK(stmd):
1258 type = BLKTYPE
1259 else:
1260 return None
1261
1262 # Fill the TarInfo object with all
1263 # information we can get.
Martin v. Löwis5dbdc592005-08-27 10:07:56 +00001264 tarinfo.name = arcname
1265 tarinfo.mode = stmd
1266 tarinfo.uid = statres.st_uid
1267 tarinfo.gid = statres.st_gid
1268 if stat.S_ISREG(stmd):
Martin v. Löwis61d77e02004-08-20 06:35:46 +00001269 tarinfo.size = statres.st_size
Martin v. Löwis5dbdc592005-08-27 10:07:56 +00001270 else:
1271 tarinfo.size = 0L
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001272 tarinfo.mtime = statres.st_mtime
Martin v. Löwis5dbdc592005-08-27 10:07:56 +00001273 tarinfo.type = type
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001274 tarinfo.linkname = linkname
1275 if pwd:
1276 try:
1277 tarinfo.uname = pwd.getpwuid(tarinfo.uid)[0]
1278 except KeyError:
1279 pass
1280 if grp:
1281 try:
1282 tarinfo.gname = grp.getgrgid(tarinfo.gid)[0]
1283 except KeyError:
1284 pass
1285
1286 if type in (CHRTYPE, BLKTYPE):
1287 if hasattr(os, "major") and hasattr(os, "minor"):
1288 tarinfo.devmajor = os.major(statres.st_rdev)
1289 tarinfo.devminor = os.minor(statres.st_rdev)
1290 return tarinfo
1291
1292 def list(self, verbose=True):
1293 """Print a table of contents to sys.stdout. If `verbose' is False, only
1294 the names of the members are printed. If it is True, an `ls -l'-like
1295 output is produced.
1296 """
1297 self._check()
1298
1299 for tarinfo in self:
1300 if verbose:
1301 print filemode(tarinfo.mode),
1302 print "%s/%s" % (tarinfo.uname or tarinfo.uid,
1303 tarinfo.gname or tarinfo.gid),
1304 if tarinfo.ischr() or tarinfo.isblk():
1305 print "%10s" % ("%d,%d" \
1306 % (tarinfo.devmajor, tarinfo.devminor)),
1307 else:
1308 print "%10d" % tarinfo.size,
1309 print "%d-%02d-%02d %02d:%02d:%02d" \
1310 % time.localtime(tarinfo.mtime)[:6],
1311
1312 print tarinfo.name,
1313
1314 if verbose:
1315 if tarinfo.issym():
1316 print "->", tarinfo.linkname,
1317 if tarinfo.islnk():
1318 print "link to", tarinfo.linkname,
1319 print
1320
1321 def add(self, name, arcname=None, recursive=True):
1322 """Add the file `name' to the archive. `name' may be any type of file
1323 (directory, fifo, symbolic link, etc.). If given, `arcname'
1324 specifies an alternative name for the file in the archive.
1325 Directories are added recursively by default. This can be avoided by
1326 setting `recursive' to False.
1327 """
1328 self._check("aw")
1329
1330 if arcname is None:
1331 arcname = name
1332
1333 # Skip if somebody tries to archive the archive...
Martin v. Löwisfaffa152005-08-24 06:43:09 +00001334 if self.name is not None \
1335 and os.path.abspath(name) == os.path.abspath(self.name):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001336 self._dbg(2, "tarfile: Skipped %r" % name)
1337 return
1338
1339 # Special case: The user wants to add the current
1340 # working directory.
1341 if name == ".":
1342 if recursive:
1343 if arcname == ".":
1344 arcname = ""
1345 for f in os.listdir("."):
1346 self.add(f, os.path.join(arcname, f))
1347 return
1348
1349 self._dbg(1, name)
1350
1351 # Create a TarInfo object from the file.
1352 tarinfo = self.gettarinfo(name, arcname)
1353
1354 if tarinfo is None:
1355 self._dbg(1, "tarfile: Unsupported type %r" % name)
1356 return
1357
1358 # Append the tar header and data to the archive.
1359 if tarinfo.isreg():
Guido van Rossum8f78fe92006-08-24 04:03:53 +00001360 f = _open(name, "rb")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001361 self.addfile(tarinfo, f)
1362 f.close()
1363
Martin v. Löwis5dbdc592005-08-27 10:07:56 +00001364 elif tarinfo.isdir():
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001365 self.addfile(tarinfo)
1366 if recursive:
1367 for f in os.listdir(name):
1368 self.add(os.path.join(name, f), os.path.join(arcname, f))
1369
Martin v. Löwis5dbdc592005-08-27 10:07:56 +00001370 else:
1371 self.addfile(tarinfo)
1372
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001373 def addfile(self, tarinfo, fileobj=None):
1374 """Add the TarInfo object `tarinfo' to the archive. If `fileobj' is
1375 given, tarinfo.size bytes are read from it and added to the archive.
1376 You can create TarInfo objects using gettarinfo().
1377 On Windows platforms, `fileobj' should always be opened with mode
1378 'rb' to avoid irritation about the file size.
1379 """
1380 self._check("aw")
1381
1382 tarinfo.name = normpath(tarinfo.name)
1383 if tarinfo.isdir():
1384 # directories should end with '/'
1385 tarinfo.name += "/"
1386
1387 if tarinfo.linkname:
1388 tarinfo.linkname = normpath(tarinfo.linkname)
1389
1390 if tarinfo.size > MAXSIZE_MEMBER:
Neal Norwitzd96d1012004-07-20 22:23:02 +00001391 if self.posix:
Thomas Wouters477c8d52006-05-27 19:21:47 +00001392 raise ValueError("file is too large (>= 8 GB)")
Neal Norwitzd96d1012004-07-20 22:23:02 +00001393 else:
1394 self._dbg(2, "tarfile: Created GNU tar largefile header")
1395
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001396
1397 if len(tarinfo.linkname) > LENGTH_LINK:
1398 if self.posix:
Thomas Wouters477c8d52006-05-27 19:21:47 +00001399 raise ValueError("linkname is too long (>%d)" % (LENGTH_LINK))
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001400 else:
1401 self._create_gnulong(tarinfo.linkname, GNUTYPE_LONGLINK)
1402 tarinfo.linkname = tarinfo.linkname[:LENGTH_LINK -1]
1403 self._dbg(2, "tarfile: Created GNU tar extension LONGLINK")
1404
1405 if len(tarinfo.name) > LENGTH_NAME:
1406 if self.posix:
1407 prefix = tarinfo.name[:LENGTH_PREFIX + 1]
1408 while prefix and prefix[-1] != "/":
Tim Peters2c60f7a2003-01-29 03:49:43 +00001409 prefix = prefix[:-1]
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001410
1411 name = tarinfo.name[len(prefix):]
1412 prefix = prefix[:-1]
1413
1414 if not prefix or len(name) > LENGTH_NAME:
Thomas Wouters477c8d52006-05-27 19:21:47 +00001415 raise ValueError("name is too long (>%d)" % (LENGTH_NAME))
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001416
1417 tarinfo.name = name
1418 tarinfo.prefix = prefix
1419 else:
1420 self._create_gnulong(tarinfo.name, GNUTYPE_LONGNAME)
1421 tarinfo.name = tarinfo.name[:LENGTH_NAME - 1]
1422 self._dbg(2, "tarfile: Created GNU tar extension LONGNAME")
1423
Thomas Wouters477c8d52006-05-27 19:21:47 +00001424 self.fileobj.write(tarinfo.tobuf(self.posix))
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001425 self.offset += BLOCKSIZE
1426
1427 # If there's data to follow, append it.
1428 if fileobj is not None:
1429 copyfileobj(fileobj, self.fileobj, tarinfo.size)
1430 blocks, remainder = divmod(tarinfo.size, BLOCKSIZE)
1431 if remainder > 0:
1432 self.fileobj.write(NUL * (BLOCKSIZE - remainder))
1433 blocks += 1
1434 self.offset += blocks * BLOCKSIZE
1435
Martin v. Löwisf3c56112004-09-18 09:08:52 +00001436 self.members.append(tarinfo)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001437
Martin v. Löwis00a73e72005-03-04 19:40:34 +00001438 def extractall(self, path=".", members=None):
1439 """Extract all members from the archive to the current working
1440 directory and set owner, modification time and permissions on
1441 directories afterwards. `path' specifies a different directory
1442 to extract to. `members' is optional and must be a subset of the
1443 list returned by getmembers().
1444 """
1445 directories = []
1446
1447 if members is None:
1448 members = self
1449
1450 for tarinfo in members:
1451 if tarinfo.isdir():
1452 # Extract directory with a safe mode, so that
1453 # all files below can be extracted as well.
1454 try:
1455 os.makedirs(os.path.join(path, tarinfo.name), 0777)
1456 except EnvironmentError:
1457 pass
1458 directories.append(tarinfo)
1459 else:
1460 self.extract(tarinfo, path)
1461
1462 # Reverse sort directories.
1463 directories.sort(lambda a, b: cmp(a.name, b.name))
1464 directories.reverse()
1465
1466 # Set correct owner, mtime and filemode on directories.
1467 for tarinfo in directories:
1468 path = os.path.join(path, tarinfo.name)
1469 try:
1470 self.chown(tarinfo, path)
1471 self.utime(tarinfo, path)
1472 self.chmod(tarinfo, path)
1473 except ExtractError, e:
1474 if self.errorlevel > 1:
1475 raise
1476 else:
1477 self._dbg(1, "tarfile: %s" % e)
1478
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001479 def extract(self, member, path=""):
1480 """Extract a member from the archive to the current working directory,
1481 using its full name. Its file information is extracted as accurately
1482 as possible. `member' may be a filename or a TarInfo object. You can
1483 specify a different directory using `path'.
1484 """
1485 self._check("r")
1486
1487 if isinstance(member, TarInfo):
1488 tarinfo = member
1489 else:
1490 tarinfo = self.getmember(member)
1491
Neal Norwitza4f651a2004-07-20 22:07:44 +00001492 # Prepare the link target for makelink().
1493 if tarinfo.islnk():
1494 tarinfo._link_target = os.path.join(path, tarinfo.linkname)
1495
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001496 try:
1497 self._extract_member(tarinfo, os.path.join(path, tarinfo.name))
1498 except EnvironmentError, e:
1499 if self.errorlevel > 0:
1500 raise
1501 else:
1502 if e.filename is None:
1503 self._dbg(1, "tarfile: %s" % e.strerror)
1504 else:
1505 self._dbg(1, "tarfile: %s %r" % (e.strerror, e.filename))
1506 except ExtractError, e:
1507 if self.errorlevel > 1:
1508 raise
1509 else:
1510 self._dbg(1, "tarfile: %s" % e)
1511
1512 def extractfile(self, member):
1513 """Extract a member from the archive as a file object. `member' may be
1514 a filename or a TarInfo object. If `member' is a regular file, a
1515 file-like object is returned. If `member' is a link, a file-like
1516 object is constructed from the link's target. If `member' is none of
1517 the above, None is returned.
1518 The file-like object is read-only and provides the following
1519 methods: read(), readline(), readlines(), seek() and tell()
1520 """
1521 self._check("r")
1522
1523 if isinstance(member, TarInfo):
1524 tarinfo = member
1525 else:
1526 tarinfo = self.getmember(member)
1527
1528 if tarinfo.isreg():
1529 return self.fileobject(self, tarinfo)
1530
1531 elif tarinfo.type not in SUPPORTED_TYPES:
1532 # If a member's type is unknown, it is treated as a
1533 # regular file.
1534 return self.fileobject(self, tarinfo)
1535
1536 elif tarinfo.islnk() or tarinfo.issym():
1537 if isinstance(self.fileobj, _Stream):
1538 # A small but ugly workaround for the case that someone tries
1539 # to extract a (sym)link as a file-object from a non-seekable
1540 # stream of tar blocks.
Thomas Wouters477c8d52006-05-27 19:21:47 +00001541 raise StreamError("cannot extract (sym)link as file object")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001542 else:
Georg Brandl7eb4b7d2005-07-22 21:49:32 +00001543 # A (sym)link's file object is its target's file object.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001544 return self.extractfile(self._getmember(tarinfo.linkname,
1545 tarinfo))
1546 else:
1547 # If there's no data associated with the member (directory, chrdev,
1548 # blkdev, etc.), return None instead of a file object.
1549 return None
1550
1551 def _extract_member(self, tarinfo, targetpath):
1552 """Extract the TarInfo object tarinfo to a physical
1553 file called targetpath.
1554 """
1555 # Fetch the TarInfo object for the given name
1556 # and build the destination pathname, replacing
1557 # forward slashes to platform specific separators.
1558 if targetpath[-1:] == "/":
1559 targetpath = targetpath[:-1]
1560 targetpath = os.path.normpath(targetpath)
1561
1562 # Create all upper directories.
1563 upperdirs = os.path.dirname(targetpath)
1564 if upperdirs and not os.path.exists(upperdirs):
1565 ti = TarInfo()
1566 ti.name = upperdirs
1567 ti.type = DIRTYPE
1568 ti.mode = 0777
1569 ti.mtime = tarinfo.mtime
1570 ti.uid = tarinfo.uid
1571 ti.gid = tarinfo.gid
1572 ti.uname = tarinfo.uname
1573 ti.gname = tarinfo.gname
1574 try:
1575 self._extract_member(ti, ti.name)
1576 except:
1577 pass
1578
1579 if tarinfo.islnk() or tarinfo.issym():
1580 self._dbg(1, "%s -> %s" % (tarinfo.name, tarinfo.linkname))
1581 else:
1582 self._dbg(1, tarinfo.name)
1583
1584 if tarinfo.isreg():
1585 self.makefile(tarinfo, targetpath)
1586 elif tarinfo.isdir():
1587 self.makedir(tarinfo, targetpath)
1588 elif tarinfo.isfifo():
1589 self.makefifo(tarinfo, targetpath)
1590 elif tarinfo.ischr() or tarinfo.isblk():
1591 self.makedev(tarinfo, targetpath)
1592 elif tarinfo.islnk() or tarinfo.issym():
1593 self.makelink(tarinfo, targetpath)
1594 elif tarinfo.type not in SUPPORTED_TYPES:
1595 self.makeunknown(tarinfo, targetpath)
1596 else:
1597 self.makefile(tarinfo, targetpath)
1598
1599 self.chown(tarinfo, targetpath)
1600 if not tarinfo.issym():
1601 self.chmod(tarinfo, targetpath)
1602 self.utime(tarinfo, targetpath)
1603
1604 #--------------------------------------------------------------------------
1605 # Below are the different file methods. They are called via
1606 # _extract_member() when extract() is called. They can be replaced in a
1607 # subclass to implement other functionality.
1608
1609 def makedir(self, tarinfo, targetpath):
1610 """Make a directory called targetpath.
1611 """
1612 try:
1613 os.mkdir(targetpath)
1614 except EnvironmentError, e:
1615 if e.errno != errno.EEXIST:
1616 raise
1617
1618 def makefile(self, tarinfo, targetpath):
1619 """Make a file called targetpath.
1620 """
1621 source = self.extractfile(tarinfo)
Guido van Rossum8f78fe92006-08-24 04:03:53 +00001622 target = _open(targetpath, "wb")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001623 copyfileobj(source, target)
1624 source.close()
1625 target.close()
1626
1627 def makeunknown(self, tarinfo, targetpath):
1628 """Make a file from a TarInfo object with an unknown type
1629 at targetpath.
1630 """
1631 self.makefile(tarinfo, targetpath)
1632 self._dbg(1, "tarfile: Unknown file type %r, " \
1633 "extracted as regular file." % tarinfo.type)
1634
1635 def makefifo(self, tarinfo, targetpath):
1636 """Make a fifo called targetpath.
1637 """
1638 if hasattr(os, "mkfifo"):
1639 os.mkfifo(targetpath)
1640 else:
Thomas Wouters477c8d52006-05-27 19:21:47 +00001641 raise ExtractError("fifo not supported by system")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001642
1643 def makedev(self, tarinfo, targetpath):
1644 """Make a character or block device called targetpath.
1645 """
1646 if not hasattr(os, "mknod") or not hasattr(os, "makedev"):
Thomas Wouters477c8d52006-05-27 19:21:47 +00001647 raise ExtractError("special devices not supported by system")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001648
1649 mode = tarinfo.mode
1650 if tarinfo.isblk():
1651 mode |= stat.S_IFBLK
1652 else:
1653 mode |= stat.S_IFCHR
1654
1655 os.mknod(targetpath, mode,
1656 os.makedev(tarinfo.devmajor, tarinfo.devminor))
1657
1658 def makelink(self, tarinfo, targetpath):
1659 """Make a (symbolic) link called targetpath. If it cannot be created
1660 (platform limitation), we try to make a copy of the referenced file
1661 instead of a link.
1662 """
1663 linkpath = tarinfo.linkname
1664 try:
1665 if tarinfo.issym():
1666 os.symlink(linkpath, targetpath)
1667 else:
Neal Norwitza4f651a2004-07-20 22:07:44 +00001668 # See extract().
1669 os.link(tarinfo._link_target, targetpath)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001670 except AttributeError:
1671 if tarinfo.issym():
1672 linkpath = os.path.join(os.path.dirname(tarinfo.name),
1673 linkpath)
1674 linkpath = normpath(linkpath)
1675
1676 try:
1677 self._extract_member(self.getmember(linkpath), targetpath)
1678 except (EnvironmentError, KeyError), e:
1679 linkpath = os.path.normpath(linkpath)
1680 try:
1681 shutil.copy2(linkpath, targetpath)
1682 except EnvironmentError, e:
Thomas Wouters477c8d52006-05-27 19:21:47 +00001683 raise IOError("link could not be created")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001684
1685 def chown(self, tarinfo, targetpath):
1686 """Set owner of targetpath according to tarinfo.
1687 """
1688 if pwd and hasattr(os, "geteuid") and os.geteuid() == 0:
1689 # We have to be root to do so.
1690 try:
1691 g = grp.getgrnam(tarinfo.gname)[2]
1692 except KeyError:
1693 try:
1694 g = grp.getgrgid(tarinfo.gid)[2]
1695 except KeyError:
1696 g = os.getgid()
1697 try:
1698 u = pwd.getpwnam(tarinfo.uname)[2]
1699 except KeyError:
1700 try:
1701 u = pwd.getpwuid(tarinfo.uid)[2]
1702 except KeyError:
1703 u = os.getuid()
1704 try:
1705 if tarinfo.issym() and hasattr(os, "lchown"):
1706 os.lchown(targetpath, u, g)
1707 else:
Andrew MacIntyre7970d202003-02-19 12:51:34 +00001708 if sys.platform != "os2emx":
1709 os.chown(targetpath, u, g)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001710 except EnvironmentError, e:
Thomas Wouters477c8d52006-05-27 19:21:47 +00001711 raise ExtractError("could not change owner")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001712
1713 def chmod(self, tarinfo, targetpath):
1714 """Set file permissions of targetpath according to tarinfo.
1715 """
Jack Jansen834eff62003-03-07 12:47:06 +00001716 if hasattr(os, 'chmod'):
1717 try:
1718 os.chmod(targetpath, tarinfo.mode)
1719 except EnvironmentError, e:
Thomas Wouters477c8d52006-05-27 19:21:47 +00001720 raise ExtractError("could not change mode")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001721
1722 def utime(self, tarinfo, targetpath):
1723 """Set modification time of targetpath according to tarinfo.
1724 """
Jack Jansen834eff62003-03-07 12:47:06 +00001725 if not hasattr(os, 'utime'):
Tim Petersf9347782003-03-07 15:36:41 +00001726 return
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001727 if sys.platform == "win32" and tarinfo.isdir():
1728 # According to msdn.microsoft.com, it is an error (EACCES)
1729 # to use utime() on directories.
1730 return
1731 try:
1732 os.utime(targetpath, (tarinfo.mtime, tarinfo.mtime))
1733 except EnvironmentError, e:
Thomas Wouters477c8d52006-05-27 19:21:47 +00001734 raise ExtractError("could not change modification time")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001735
1736 #--------------------------------------------------------------------------
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001737 def next(self):
1738 """Return the next member of the archive as a TarInfo object, when
1739 TarFile is opened for reading. Return None if there is no more
1740 available.
1741 """
1742 self._check("ra")
1743 if self.firstmember is not None:
1744 m = self.firstmember
1745 self.firstmember = None
1746 return m
1747
1748 # Read the next block.
Andrew M. Kuchling864bba12004-07-10 22:02:11 +00001749 self.fileobj.seek(self.offset)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001750 while True:
1751 buf = self.fileobj.read(BLOCKSIZE)
1752 if not buf:
1753 return None
Thomas Wouters477c8d52006-05-27 19:21:47 +00001754
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001755 try:
1756 tarinfo = TarInfo.frombuf(buf)
Thomas Wouters477c8d52006-05-27 19:21:47 +00001757
Thomas Wouters477c8d52006-05-27 19:21:47 +00001758 # Set the TarInfo object's offset to the current position of the
1759 # TarFile and set self.offset to the position where the data blocks
1760 # should begin.
1761 tarinfo.offset = self.offset
1762 self.offset += BLOCKSIZE
1763
1764 tarinfo = self.proc_member(tarinfo)
1765
1766 except ValueError, e:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001767 if self.ignore_zeros:
Thomas Wouters477c8d52006-05-27 19:21:47 +00001768 self._dbg(2, "0x%X: empty or invalid block: %s" %
1769 (self.offset, e))
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001770 self.offset += BLOCKSIZE
1771 continue
1772 else:
Andrew M. Kuchling864bba12004-07-10 22:02:11 +00001773 if self.offset == 0:
Thomas Wouters477c8d52006-05-27 19:21:47 +00001774 raise ReadError("empty, unreadable or compressed "
1775 "file: %s" % e)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001776 return None
1777 break
1778
Thomas Wouters477c8d52006-05-27 19:21:47 +00001779 # Some old tar programs represent a directory as a regular
1780 # file with a trailing slash.
1781 if tarinfo.isreg() and tarinfo.name.endswith("/"):
1782 tarinfo.type = DIRTYPE
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001783
Thomas Wouters477c8d52006-05-27 19:21:47 +00001784 # The prefix field is used for filenames > 100 in
1785 # the POSIX standard.
1786 # name = prefix + '/' + name
1787 tarinfo.name = normpath(os.path.join(tarinfo.prefix.rstrip(NUL),
1788 tarinfo.name))
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001789
Thomas Wouters477c8d52006-05-27 19:21:47 +00001790 # Directory names should have a '/' at the end.
1791 if tarinfo.isdir():
1792 tarinfo.name += "/"
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001793
Martin v. Löwisf3c56112004-09-18 09:08:52 +00001794 self.members.append(tarinfo)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001795 return tarinfo
1796
1797 #--------------------------------------------------------------------------
Thomas Wouters477c8d52006-05-27 19:21:47 +00001798 # The following are methods that are called depending on the type of a
1799 # member. The entry point is proc_member() which is called with a TarInfo
1800 # object created from the header block from the current offset. The
1801 # proc_member() method can be overridden in a subclass to add custom
1802 # proc_*() methods. A proc_*() method MUST implement the following
1803 # operations:
1804 # 1. Set tarinfo.offset_data to the position where the data blocks begin,
1805 # if there is data that follows.
1806 # 2. Set self.offset to the position where the next member's header will
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001807 # begin.
Thomas Wouters477c8d52006-05-27 19:21:47 +00001808 # 3. Return tarinfo or another valid TarInfo object.
1809 def proc_member(self, tarinfo):
1810 """Choose the right processing method for tarinfo depending
1811 on its type and call it.
1812 """
1813 if tarinfo.type in (GNUTYPE_LONGNAME, GNUTYPE_LONGLINK):
1814 return self.proc_gnulong(tarinfo)
1815 elif tarinfo.type == GNUTYPE_SPARSE:
1816 return self.proc_sparse(tarinfo)
1817 else:
1818 return self.proc_builtin(tarinfo)
1819
1820 def proc_builtin(self, tarinfo):
1821 """Process a builtin type member or an unknown member
1822 which will be treated as a regular file.
1823 """
1824 tarinfo.offset_data = self.offset
1825 if tarinfo.isreg() or tarinfo.type not in SUPPORTED_TYPES:
1826 # Skip the following data blocks.
1827 self.offset += self._block(tarinfo.size)
1828 return tarinfo
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001829
1830 def proc_gnulong(self, tarinfo):
Thomas Wouters477c8d52006-05-27 19:21:47 +00001831 """Process the blocks that hold a GNU longname
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001832 or longlink member.
1833 """
1834 buf = ""
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001835 count = tarinfo.size
1836 while count > 0:
1837 block = self.fileobj.read(BLOCKSIZE)
1838 buf += block
1839 self.offset += BLOCKSIZE
1840 count -= BLOCKSIZE
1841
Thomas Wouters477c8d52006-05-27 19:21:47 +00001842 # Fetch the next header and process it.
1843 b = self.fileobj.read(BLOCKSIZE)
1844 t = TarInfo.frombuf(b)
1845 t.offset = self.offset
1846 self.offset += BLOCKSIZE
1847 next = self.proc_member(t)
Andrew M. Kuchling864bba12004-07-10 22:02:11 +00001848
Thomas Wouters477c8d52006-05-27 19:21:47 +00001849 # Patch the TarInfo object from the next header with
1850 # the longname information.
Andrew M. Kuchling864bba12004-07-10 22:02:11 +00001851 next.offset = tarinfo.offset
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001852 if tarinfo.type == GNUTYPE_LONGNAME:
Thomas Wouters477c8d52006-05-27 19:21:47 +00001853 next.name = buf.rstrip(NUL)
Andrew M. Kuchling864bba12004-07-10 22:02:11 +00001854 elif tarinfo.type == GNUTYPE_LONGLINK:
Thomas Wouters477c8d52006-05-27 19:21:47 +00001855 next.linkname = buf.rstrip(NUL)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001856
Andrew M. Kuchling864bba12004-07-10 22:02:11 +00001857 return next
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001858
1859 def proc_sparse(self, tarinfo):
Thomas Wouters477c8d52006-05-27 19:21:47 +00001860 """Process a GNU sparse header plus extra headers.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001861 """
Thomas Wouters477c8d52006-05-27 19:21:47 +00001862 buf = tarinfo.buf
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001863 sp = _ringbuffer()
1864 pos = 386
1865 lastpos = 0L
1866 realpos = 0L
1867 # There are 4 possible sparse structs in the
1868 # first header.
1869 for i in xrange(4):
1870 try:
Thomas Wouters477c8d52006-05-27 19:21:47 +00001871 offset = nti(buf[pos:pos + 12])
1872 numbytes = nti(buf[pos + 12:pos + 24])
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001873 except ValueError:
1874 break
1875 if offset > lastpos:
1876 sp.append(_hole(lastpos, offset - lastpos))
1877 sp.append(_data(offset, numbytes, realpos))
1878 realpos += numbytes
1879 lastpos = offset + numbytes
1880 pos += 24
1881
1882 isextended = ord(buf[482])
Thomas Wouters477c8d52006-05-27 19:21:47 +00001883 origsize = nti(buf[483:495])
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001884
1885 # If the isextended flag is given,
1886 # there are extra headers to process.
1887 while isextended == 1:
1888 buf = self.fileobj.read(BLOCKSIZE)
1889 self.offset += BLOCKSIZE
1890 pos = 0
1891 for i in xrange(21):
1892 try:
Thomas Wouters477c8d52006-05-27 19:21:47 +00001893 offset = nti(buf[pos:pos + 12])
1894 numbytes = nti(buf[pos + 12:pos + 24])
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001895 except ValueError:
1896 break
1897 if offset > lastpos:
1898 sp.append(_hole(lastpos, offset - lastpos))
1899 sp.append(_data(offset, numbytes, realpos))
1900 realpos += numbytes
1901 lastpos = offset + numbytes
1902 pos += 24
1903 isextended = ord(buf[504])
1904
1905 if lastpos < origsize:
1906 sp.append(_hole(lastpos, origsize - lastpos))
1907
1908 tarinfo.sparse = sp
1909
1910 tarinfo.offset_data = self.offset
1911 self.offset += self._block(tarinfo.size)
1912 tarinfo.size = origsize
Andrew M. Kuchling864bba12004-07-10 22:02:11 +00001913
Thomas Wouters477c8d52006-05-27 19:21:47 +00001914 # Clear the prefix field so that it is not used
1915 # as a pathname in next().
1916 tarinfo.prefix = ""
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001917
Thomas Wouters477c8d52006-05-27 19:21:47 +00001918 return tarinfo
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001919
1920 #--------------------------------------------------------------------------
1921 # Little helper methods:
1922
1923 def _block(self, count):
1924 """Round up a byte count by BLOCKSIZE and return it,
1925 e.g. _block(834) => 1024.
1926 """
1927 blocks, remainder = divmod(count, BLOCKSIZE)
1928 if remainder:
1929 blocks += 1
1930 return blocks * BLOCKSIZE
1931
1932 def _getmember(self, name, tarinfo=None):
1933 """Find an archive member by name from bottom to top.
1934 If tarinfo is given, it is used as the starting point.
1935 """
Martin v. Löwisf3c56112004-09-18 09:08:52 +00001936 # Ensure that all members have been loaded.
1937 members = self.getmembers()
1938
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001939 if tarinfo is None:
Martin v. Löwisf3c56112004-09-18 09:08:52 +00001940 end = len(members)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001941 else:
Martin v. Löwisf3c56112004-09-18 09:08:52 +00001942 end = members.index(tarinfo)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001943
1944 for i in xrange(end - 1, -1, -1):
Martin v. Löwisf3c56112004-09-18 09:08:52 +00001945 if name == members[i].name:
1946 return members[i]
Andrew M. Kuchling864bba12004-07-10 22:02:11 +00001947
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001948 def _load(self):
1949 """Read through the entire archive file and look for readable
1950 members.
1951 """
1952 while True:
1953 tarinfo = self.next()
1954 if tarinfo is None:
1955 break
1956 self._loaded = True
1957
1958 def _check(self, mode=None):
1959 """Check if TarFile is still open, and if the operation's mode
1960 corresponds to TarFile's mode.
1961 """
1962 if self.closed:
Thomas Wouters477c8d52006-05-27 19:21:47 +00001963 raise IOError("%s is closed" % self.__class__.__name__)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001964 if mode is not None and self._mode not in mode:
Thomas Wouters477c8d52006-05-27 19:21:47 +00001965 raise IOError("bad operation for mode %r" % self._mode)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001966
1967 def __iter__(self):
1968 """Provide an iterator object.
1969 """
1970 if self._loaded:
1971 return iter(self.members)
1972 else:
1973 return TarIter(self)
1974
1975 def _create_gnulong(self, name, type):
1976 """Write a GNU longname/longlink member to the TarFile.
1977 It consists of an extended tar header, with the length
1978 of the longname as size, followed by data blocks,
1979 which contain the longname as a null terminated string.
1980 """
Neal Norwitz0662f8a2004-07-20 21:54:18 +00001981 name += NUL
1982
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001983 tarinfo = TarInfo()
1984 tarinfo.name = "././@LongLink"
1985 tarinfo.type = type
1986 tarinfo.mode = 0
1987 tarinfo.size = len(name)
1988
1989 # write extended header
1990 self.fileobj.write(tarinfo.tobuf())
Neal Norwitz0662f8a2004-07-20 21:54:18 +00001991 self.offset += BLOCKSIZE
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001992 # write name blocks
1993 self.fileobj.write(name)
1994 blocks, remainder = divmod(tarinfo.size, BLOCKSIZE)
1995 if remainder > 0:
1996 self.fileobj.write(NUL * (BLOCKSIZE - remainder))
1997 blocks += 1
1998 self.offset += blocks * BLOCKSIZE
1999
2000 def _dbg(self, level, msg):
2001 """Write debugging output to sys.stderr.
2002 """
2003 if level <= self.debug:
2004 print >> sys.stderr, msg
2005# class TarFile
2006
2007class TarIter:
2008 """Iterator Class.
2009
2010 for tarinfo in TarFile(...):
2011 suite...
2012 """
2013
2014 def __init__(self, tarfile):
2015 """Construct a TarIter object.
2016 """
2017 self.tarfile = tarfile
Martin v. Löwis637431b2005-03-03 23:12:42 +00002018 self.index = 0
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002019 def __iter__(self):
2020 """Return iterator object.
2021 """
2022 return self
2023 def next(self):
2024 """Return the next item using TarFile's next() method.
2025 When all members have been read, set TarFile as _loaded.
2026 """
Martin v. Löwis637431b2005-03-03 23:12:42 +00002027 # Fix for SF #1100429: Under rare circumstances it can
2028 # happen that getmembers() is called during iteration,
2029 # which will cause TarIter to stop prematurely.
2030 if not self.tarfile._loaded:
2031 tarinfo = self.tarfile.next()
2032 if not tarinfo:
2033 self.tarfile._loaded = True
2034 raise StopIteration
2035 else:
2036 try:
2037 tarinfo = self.tarfile.members[self.index]
2038 except IndexError:
2039 raise StopIteration
2040 self.index += 1
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002041 return tarinfo
2042
2043# Helper classes for sparse file support
2044class _section:
2045 """Base class for _data and _hole.
2046 """
2047 def __init__(self, offset, size):
2048 self.offset = offset
2049 self.size = size
2050 def __contains__(self, offset):
2051 return self.offset <= offset < self.offset + self.size
2052
2053class _data(_section):
2054 """Represent a data section in a sparse file.
2055 """
2056 def __init__(self, offset, size, realpos):
2057 _section.__init__(self, offset, size)
2058 self.realpos = realpos
2059
2060class _hole(_section):
2061 """Represent a hole section in a sparse file.
2062 """
2063 pass
2064
2065class _ringbuffer(list):
2066 """Ringbuffer class which increases performance
2067 over a regular list.
2068 """
2069 def __init__(self):
2070 self.idx = 0
2071 def find(self, offset):
2072 idx = self.idx
2073 while True:
2074 item = self[idx]
2075 if offset in item:
2076 break
2077 idx += 1
2078 if idx == len(self):
2079 idx = 0
2080 if idx == self.idx:
2081 # End of File
2082 return None
2083 self.idx = idx
2084 return item
2085
2086#---------------------------------------------
2087# zipfile compatible TarFile class
2088#---------------------------------------------
2089TAR_PLAIN = 0 # zipfile.ZIP_STORED
2090TAR_GZIPPED = 8 # zipfile.ZIP_DEFLATED
2091class TarFileCompat:
2092 """TarFile class compatible with standard module zipfile's
2093 ZipFile class.
2094 """
2095 def __init__(self, file, mode="r", compression=TAR_PLAIN):
2096 if compression == TAR_PLAIN:
2097 self.tarfile = TarFile.taropen(file, mode)
2098 elif compression == TAR_GZIPPED:
2099 self.tarfile = TarFile.gzopen(file, mode)
2100 else:
Thomas Wouters477c8d52006-05-27 19:21:47 +00002101 raise ValueError("unknown compression constant")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002102 if mode[0:1] == "r":
2103 members = self.tarfile.getmembers()
Raymond Hettingera1d09e22005-09-11 16:34:05 +00002104 for m in members:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002105 m.filename = m.name
2106 m.file_size = m.size
2107 m.date_time = time.gmtime(m.mtime)[:6]
2108 def namelist(self):
2109 return map(lambda m: m.name, self.infolist())
2110 def infolist(self):
2111 return filter(lambda m: m.type in REGULAR_TYPES,
2112 self.tarfile.getmembers())
2113 def printdir(self):
2114 self.tarfile.list()
2115 def testzip(self):
2116 return
2117 def getinfo(self, name):
2118 return self.tarfile.getmember(name)
2119 def read(self, name):
2120 return self.tarfile.extractfile(self.tarfile.getmember(name)).read()
2121 def write(self, filename, arcname=None, compress_type=None):
2122 self.tarfile.add(filename, arcname)
2123 def writestr(self, zinfo, bytes):
Raymond Hettingera6172712004-12-31 19:15:26 +00002124 try:
2125 from cStringIO import StringIO
2126 except ImportError:
2127 from StringIO import StringIO
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002128 import calendar
2129 zinfo.name = zinfo.filename
2130 zinfo.size = zinfo.file_size
2131 zinfo.mtime = calendar.timegm(zinfo.date_time)
Raymond Hettingera6172712004-12-31 19:15:26 +00002132 self.tarfile.addfile(zinfo, StringIO(bytes))
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002133 def close(self):
2134 self.tarfile.close()
2135#class TarFileCompat
2136
2137#--------------------
2138# exported functions
2139#--------------------
2140def is_tarfile(name):
2141 """Return True if name points to a tar archive that we
2142 are able to handle, else return False.
2143 """
2144 try:
2145 t = open(name)
2146 t.close()
2147 return True
2148 except TarError:
2149 return False
2150
2151open = TarFile.open