blob: c185fbd49eb0bc577c58fa46f60bd2d6f4652843 [file] [log] [blame]
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001#!/usr/bin/env python
2# -*- coding: iso-8859-1 -*-
3#-------------------------------------------------------------------
4# tarfile.py
5#-------------------------------------------------------------------
6# Copyright (C) 2002 Lars Gustäbel <lars@gustaebel.de>
7# All rights reserved.
8#
9# Permission is hereby granted, free of charge, to any person
10# obtaining a copy of this software and associated documentation
11# files (the "Software"), to deal in the Software without
12# restriction, including without limitation the rights to use,
13# copy, modify, merge, publish, distribute, sublicense, and/or sell
14# copies of the Software, and to permit persons to whom the
15# Software is furnished to do so, subject to the following
16# conditions:
17#
18# The above copyright notice and this permission notice shall be
19# included in all copies or substantial portions of the Software.
20#
21# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
22# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
23# OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
24# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
25# HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
26# WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
27# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
28# OTHER DEALINGS IN THE SOFTWARE.
29#
30"""Read from and write to tar format archives.
31"""
32
33__version__ = "$Revision$"
34# $Source$
35
Thomas Wouters477c8d52006-05-27 19:21:47 +000036version = "0.8.0"
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +000037__author__ = "Lars Gustäbel (lars@gustaebel.de)"
38__date__ = "$Date$"
39__cvsid__ = "$Id$"
40__credits__ = "Gustavo Niemeyer, Niels Gustäbel, Richard Townsend."
41
42#---------
43# Imports
44#---------
45import sys
46import os
47import shutil
48import stat
49import errno
50import time
51import struct
52
Jack Jansencfc49022003-03-07 13:37:32 +000053if sys.platform == 'mac':
54 # This module needs work for MacOS9, especially in the area of pathname
55 # handling. In many places it is assumed a simple substitution of / by the
56 # local os.path.sep is good enough to convert pathnames, but this does not
57 # work with the mac rooted:path:name versus :nonrooted:path:name syntax
58 raise ImportError, "tarfile does not work for platform==mac"
59
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +000060try:
61 import grp, pwd
62except ImportError:
63 grp = pwd = None
64
65# from tarfile import *
66__all__ = ["TarFile", "TarInfo", "is_tarfile", "TarError"]
67
68#---------------------------------------------------------
69# tar constants
70#---------------------------------------------------------
71NUL = "\0" # the null character
72BLOCKSIZE = 512 # length of processing blocks
73RECORDSIZE = BLOCKSIZE * 20 # length of records
74MAGIC = "ustar" # magic tar string
75VERSION = "00" # version number
76
77LENGTH_NAME = 100 # maximum length of a filename
78LENGTH_LINK = 100 # maximum length of a linkname
79LENGTH_PREFIX = 155 # maximum length of the prefix field
80MAXSIZE_MEMBER = 077777777777L # maximum size of a file (11 octal digits)
81
82REGTYPE = "0" # regular file
83AREGTYPE = "\0" # regular file
84LNKTYPE = "1" # link (inside tarfile)
85SYMTYPE = "2" # symbolic link
86CHRTYPE = "3" # character special device
87BLKTYPE = "4" # block special device
88DIRTYPE = "5" # directory
89FIFOTYPE = "6" # fifo special device
90CONTTYPE = "7" # contiguous file
91
92GNUTYPE_LONGNAME = "L" # GNU tar extension for longnames
93GNUTYPE_LONGLINK = "K" # GNU tar extension for longlink
94GNUTYPE_SPARSE = "S" # GNU tar extension for sparse file
95
96#---------------------------------------------------------
97# tarfile constants
98#---------------------------------------------------------
99SUPPORTED_TYPES = (REGTYPE, AREGTYPE, LNKTYPE, # file types that tarfile
100 SYMTYPE, DIRTYPE, FIFOTYPE, # can cope with.
101 CONTTYPE, CHRTYPE, BLKTYPE,
102 GNUTYPE_LONGNAME, GNUTYPE_LONGLINK,
103 GNUTYPE_SPARSE)
104
105REGULAR_TYPES = (REGTYPE, AREGTYPE, # file types that somehow
106 CONTTYPE, GNUTYPE_SPARSE) # represent regular files
107
108#---------------------------------------------------------
109# Bits used in the mode field, values in octal.
110#---------------------------------------------------------
111S_IFLNK = 0120000 # symbolic link
112S_IFREG = 0100000 # regular file
113S_IFBLK = 0060000 # block device
114S_IFDIR = 0040000 # directory
115S_IFCHR = 0020000 # character device
116S_IFIFO = 0010000 # fifo
117
118TSUID = 04000 # set UID on execution
119TSGID = 02000 # set GID on execution
120TSVTX = 01000 # reserved
121
122TUREAD = 0400 # read by owner
123TUWRITE = 0200 # write by owner
124TUEXEC = 0100 # execute/search by owner
125TGREAD = 0040 # read by group
126TGWRITE = 0020 # write by group
127TGEXEC = 0010 # execute/search by group
128TOREAD = 0004 # read by other
129TOWRITE = 0002 # write by other
130TOEXEC = 0001 # execute/search by other
131
132#---------------------------------------------------------
133# Some useful functions
134#---------------------------------------------------------
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000135
Thomas Wouters477c8d52006-05-27 19:21:47 +0000136def stn(s, length):
137 """Convert a python string to a null-terminated string buffer.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000138 """
Thomas Wouters477c8d52006-05-27 19:21:47 +0000139 return s[:length-1] + (length - len(s) - 1) * NUL + NUL
140
141def nti(s):
142 """Convert a number field to a python number.
143 """
144 # There are two possible encodings for a number field, see
145 # itn() below.
146 if s[0] != chr(0200):
147 n = int(s.rstrip(NUL) or "0", 8)
148 else:
149 n = 0L
150 for i in xrange(len(s) - 1):
151 n <<= 8
152 n += ord(s[i + 1])
153 return n
154
155def itn(n, digits=8, posix=False):
156 """Convert a python number to a number field.
157 """
158 # POSIX 1003.1-1988 requires numbers to be encoded as a string of
159 # octal digits followed by a null-byte, this allows values up to
160 # (8**(digits-1))-1. GNU tar allows storing numbers greater than
161 # that if necessary. A leading 0200 byte indicates this particular
162 # encoding, the following digits-1 bytes are a big-endian
163 # representation. This allows values up to (256**(digits-1))-1.
164 if 0 <= n < 8 ** (digits - 1):
165 s = "%0*o" % (digits - 1, n) + NUL
166 else:
167 if posix:
168 raise ValueError("overflow in number field")
169
170 if n < 0:
171 # XXX We mimic GNU tar's behaviour with negative numbers,
172 # this could raise OverflowError.
173 n = struct.unpack("L", struct.pack("l", n))[0]
174
175 s = ""
176 for i in xrange(digits - 1):
177 s = chr(n & 0377) + s
178 n >>= 8
179 s = chr(0200) + s
180 return s
181
182def calc_chksums(buf):
183 """Calculate the checksum for a member's header by summing up all
184 characters except for the chksum field which is treated as if
185 it was filled with spaces. According to the GNU tar sources,
186 some tars (Sun and NeXT) calculate chksum with signed char,
187 which will be different if there are chars in the buffer with
188 the high bit set. So we calculate two checksums, unsigned and
189 signed.
190 """
191 unsigned_chksum = 256 + sum(struct.unpack("148B", buf[:148]) + struct.unpack("356B", buf[156:512]))
192 signed_chksum = 256 + sum(struct.unpack("148b", buf[:148]) + struct.unpack("356b", buf[156:512]))
193 return unsigned_chksum, signed_chksum
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000194
195def copyfileobj(src, dst, length=None):
196 """Copy length bytes from fileobj src to fileobj dst.
197 If length is None, copy the entire content.
198 """
199 if length == 0:
200 return
201 if length is None:
202 shutil.copyfileobj(src, dst)
203 return
204
205 BUFSIZE = 16 * 1024
206 blocks, remainder = divmod(length, BUFSIZE)
207 for b in xrange(blocks):
208 buf = src.read(BUFSIZE)
209 if len(buf) < BUFSIZE:
Thomas Wouters477c8d52006-05-27 19:21:47 +0000210 raise IOError("end of file reached")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000211 dst.write(buf)
212
213 if remainder != 0:
214 buf = src.read(remainder)
215 if len(buf) < remainder:
Thomas Wouters477c8d52006-05-27 19:21:47 +0000216 raise IOError("end of file reached")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000217 dst.write(buf)
218 return
219
220filemode_table = (
Andrew M. Kuchling8bc462f2004-10-20 11:48:42 +0000221 ((S_IFLNK, "l"),
222 (S_IFREG, "-"),
223 (S_IFBLK, "b"),
224 (S_IFDIR, "d"),
225 (S_IFCHR, "c"),
226 (S_IFIFO, "p")),
227
228 ((TUREAD, "r"),),
229 ((TUWRITE, "w"),),
230 ((TUEXEC|TSUID, "s"),
231 (TSUID, "S"),
232 (TUEXEC, "x")),
233
234 ((TGREAD, "r"),),
235 ((TGWRITE, "w"),),
236 ((TGEXEC|TSGID, "s"),
237 (TSGID, "S"),
238 (TGEXEC, "x")),
239
240 ((TOREAD, "r"),),
241 ((TOWRITE, "w"),),
242 ((TOEXEC|TSVTX, "t"),
243 (TSVTX, "T"),
244 (TOEXEC, "x"))
245)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000246
247def filemode(mode):
248 """Convert a file's mode to a string of the form
249 -rwxrwxrwx.
250 Used by TarFile.list()
251 """
Andrew M. Kuchling8bc462f2004-10-20 11:48:42 +0000252 perm = []
253 for table in filemode_table:
254 for bit, char in table:
255 if mode & bit == bit:
256 perm.append(char)
257 break
258 else:
259 perm.append("-")
260 return "".join(perm)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000261
262if os.sep != "/":
263 normpath = lambda path: os.path.normpath(path).replace(os.sep, "/")
264else:
265 normpath = os.path.normpath
266
267class TarError(Exception):
268 """Base exception."""
269 pass
270class ExtractError(TarError):
271 """General exception for extract errors."""
272 pass
273class ReadError(TarError):
274 """Exception for unreadble tar archives."""
275 pass
276class CompressionError(TarError):
277 """Exception for unavailable compression methods."""
278 pass
279class StreamError(TarError):
280 """Exception for unsupported operations on stream-like TarFiles."""
281 pass
282
283#---------------------------
284# internal stream interface
285#---------------------------
286class _LowLevelFile:
287 """Low-level file object. Supports reading and writing.
288 It is used instead of a regular file object for streaming
289 access.
290 """
291
292 def __init__(self, name, mode):
293 mode = {
294 "r": os.O_RDONLY,
295 "w": os.O_WRONLY | os.O_CREAT | os.O_TRUNC,
296 }[mode]
297 if hasattr(os, "O_BINARY"):
298 mode |= os.O_BINARY
299 self.fd = os.open(name, mode)
300
301 def close(self):
302 os.close(self.fd)
303
304 def read(self, size):
305 return os.read(self.fd, size)
306
307 def write(self, s):
308 os.write(self.fd, s)
309
310class _Stream:
311 """Class that serves as an adapter between TarFile and
312 a stream-like object. The stream-like object only
313 needs to have a read() or write() method and is accessed
314 blockwise. Use of gzip or bzip2 compression is possible.
315 A stream-like object could be for example: sys.stdin,
316 sys.stdout, a socket, a tape device etc.
317
318 _Stream is intended to be used only internally.
319 """
320
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000321 def __init__(self, name, mode, comptype, fileobj, bufsize):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000322 """Construct a _Stream object.
323 """
324 self._extfileobj = True
325 if fileobj is None:
326 fileobj = _LowLevelFile(name, mode)
327 self._extfileobj = False
328
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000329 if comptype == '*':
330 # Enable transparent compression detection for the
331 # stream interface
332 fileobj = _StreamProxy(fileobj)
333 comptype = fileobj.getcomptype()
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000334
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000335 self.name = name or ""
336 self.mode = mode
337 self.comptype = comptype
338 self.fileobj = fileobj
339 self.bufsize = bufsize
340 self.buf = ""
341 self.pos = 0L
342 self.closed = False
343
344 if comptype == "gz":
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000345 try:
346 import zlib
347 except ImportError:
Thomas Wouters477c8d52006-05-27 19:21:47 +0000348 raise CompressionError("zlib module is not available")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000349 self.zlib = zlib
350 self.crc = zlib.crc32("")
351 if mode == "r":
352 self._init_read_gz()
353 else:
354 self._init_write_gz()
355
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000356 if comptype == "bz2":
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000357 try:
358 import bz2
359 except ImportError:
Thomas Wouters477c8d52006-05-27 19:21:47 +0000360 raise CompressionError("bz2 module is not available")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000361 if mode == "r":
362 self.dbuf = ""
363 self.cmp = bz2.BZ2Decompressor()
364 else:
365 self.cmp = bz2.BZ2Compressor()
366
367 def __del__(self):
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000368 if hasattr(self, "closed") and not self.closed:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000369 self.close()
370
371 def _init_write_gz(self):
372 """Initialize for writing with gzip compression.
373 """
374 self.cmp = self.zlib.compressobj(9, self.zlib.DEFLATED,
375 -self.zlib.MAX_WBITS,
376 self.zlib.DEF_MEM_LEVEL,
377 0)
378 timestamp = struct.pack("<L", long(time.time()))
379 self.__write("\037\213\010\010%s\002\377" % timestamp)
380 if self.name.endswith(".gz"):
381 self.name = self.name[:-3]
382 self.__write(self.name + NUL)
383
384 def write(self, s):
385 """Write string s to the stream.
386 """
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000387 if self.comptype == "gz":
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000388 self.crc = self.zlib.crc32(s, self.crc)
389 self.pos += len(s)
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000390 if self.comptype != "tar":
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000391 s = self.cmp.compress(s)
392 self.__write(s)
393
394 def __write(self, s):
395 """Write string s to the stream if a whole new block
396 is ready to be written.
397 """
398 self.buf += s
399 while len(self.buf) > self.bufsize:
400 self.fileobj.write(self.buf[:self.bufsize])
401 self.buf = self.buf[self.bufsize:]
402
403 def close(self):
404 """Close the _Stream object. No operation should be
405 done on it afterwards.
406 """
407 if self.closed:
408 return
409
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000410 if self.mode == "w" and self.comptype != "tar":
Martin v. Löwisc234a522004-08-22 21:28:33 +0000411 self.buf += self.cmp.flush()
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000412
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000413 if self.mode == "w" and self.buf:
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000414 blocks, remainder = divmod(len(self.buf), self.bufsize)
415 if remainder > 0:
416 self.buf += NUL * (self.bufsize - remainder)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000417 self.fileobj.write(self.buf)
418 self.buf = ""
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000419 if self.comptype == "gz":
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000420 # The native zlib crc is an unsigned 32-bit integer, but
421 # the Python wrapper implicitly casts that to a signed C
422 # long. So, on a 32-bit box self.crc may "look negative",
423 # while the same crc on a 64-bit box may "look positive".
424 # To avoid irksome warnings from the `struct` module, force
425 # it to look positive on all boxes.
426 self.fileobj.write(struct.pack("<L", self.crc & 0xffffffffL))
Andrew M. Kuchling10a44492003-10-24 17:38:34 +0000427 self.fileobj.write(struct.pack("<L", self.pos & 0xffffFFFFL))
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000428
429 if not self._extfileobj:
430 self.fileobj.close()
431
432 self.closed = True
433
434 def _init_read_gz(self):
435 """Initialize for reading a gzip compressed fileobj.
436 """
437 self.cmp = self.zlib.decompressobj(-self.zlib.MAX_WBITS)
438 self.dbuf = ""
439
440 # taken from gzip.GzipFile with some alterations
441 if self.__read(2) != "\037\213":
Thomas Wouters477c8d52006-05-27 19:21:47 +0000442 raise ReadError("not a gzip file")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000443 if self.__read(1) != "\010":
Thomas Wouters477c8d52006-05-27 19:21:47 +0000444 raise CompressionError("unsupported compression method")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000445
446 flag = ord(self.__read(1))
447 self.__read(6)
448
449 if flag & 4:
450 xlen = ord(self.__read(1)) + 256 * ord(self.__read(1))
451 self.read(xlen)
452 if flag & 8:
453 while True:
454 s = self.__read(1)
455 if not s or s == NUL:
456 break
457 if flag & 16:
458 while True:
459 s = self.__read(1)
460 if not s or s == NUL:
461 break
462 if flag & 2:
463 self.__read(2)
464
465 def tell(self):
466 """Return the stream's file pointer position.
467 """
468 return self.pos
469
470 def seek(self, pos=0):
471 """Set the stream's file pointer to pos. Negative seeking
472 is forbidden.
473 """
474 if pos - self.pos >= 0:
475 blocks, remainder = divmod(pos - self.pos, self.bufsize)
476 for i in xrange(blocks):
477 self.read(self.bufsize)
478 self.read(remainder)
479 else:
Thomas Wouters477c8d52006-05-27 19:21:47 +0000480 raise StreamError("seeking backwards is not allowed")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000481 return self.pos
482
483 def read(self, size=None):
484 """Return the next size number of bytes from the stream.
485 If size is not defined, return all bytes of the stream
486 up to EOF.
487 """
488 if size is None:
489 t = []
490 while True:
491 buf = self._read(self.bufsize)
492 if not buf:
493 break
494 t.append(buf)
495 buf = "".join(t)
496 else:
497 buf = self._read(size)
498 self.pos += len(buf)
499 return buf
500
501 def _read(self, size):
502 """Return size bytes from the stream.
503 """
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000504 if self.comptype == "tar":
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000505 return self.__read(size)
506
507 c = len(self.dbuf)
508 t = [self.dbuf]
509 while c < size:
510 buf = self.__read(self.bufsize)
511 if not buf:
512 break
513 buf = self.cmp.decompress(buf)
514 t.append(buf)
515 c += len(buf)
516 t = "".join(t)
517 self.dbuf = t[size:]
518 return t[:size]
519
520 def __read(self, size):
521 """Return size bytes from stream. If internal buffer is empty,
522 read another block from the stream.
523 """
524 c = len(self.buf)
525 t = [self.buf]
526 while c < size:
527 buf = self.fileobj.read(self.bufsize)
528 if not buf:
529 break
530 t.append(buf)
531 c += len(buf)
532 t = "".join(t)
533 self.buf = t[size:]
534 return t[:size]
535# class _Stream
536
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000537class _StreamProxy(object):
538 """Small proxy class that enables transparent compression
539 detection for the Stream interface (mode 'r|*').
540 """
541
542 def __init__(self, fileobj):
543 self.fileobj = fileobj
544 self.buf = self.fileobj.read(BLOCKSIZE)
545
546 def read(self, size):
547 self.read = self.fileobj.read
548 return self.buf
549
550 def getcomptype(self):
551 if self.buf.startswith("\037\213\010"):
552 return "gz"
553 if self.buf.startswith("BZh91"):
554 return "bz2"
555 return "tar"
556
557 def close(self):
558 self.fileobj.close()
559# class StreamProxy
560
Thomas Wouters477c8d52006-05-27 19:21:47 +0000561class _BZ2Proxy(object):
562 """Small proxy class that enables external file object
563 support for "r:bz2" and "w:bz2" modes. This is actually
564 a workaround for a limitation in bz2 module's BZ2File
565 class which (unlike gzip.GzipFile) has no support for
566 a file object argument.
567 """
568
569 blocksize = 16 * 1024
570
571 def __init__(self, fileobj, mode):
572 self.fileobj = fileobj
573 self.mode = mode
574 self.init()
575
576 def init(self):
577 import bz2
578 self.pos = 0
579 if self.mode == "r":
580 self.bz2obj = bz2.BZ2Decompressor()
581 self.fileobj.seek(0)
582 self.buf = ""
583 else:
584 self.bz2obj = bz2.BZ2Compressor()
585
586 def read(self, size):
587 b = [self.buf]
588 x = len(self.buf)
589 while x < size:
590 try:
591 raw = self.fileobj.read(self.blocksize)
592 data = self.bz2obj.decompress(raw)
593 b.append(data)
594 except EOFError:
595 break
596 x += len(data)
597 self.buf = "".join(b)
598
599 buf = self.buf[:size]
600 self.buf = self.buf[size:]
601 self.pos += len(buf)
602 return buf
603
604 def seek(self, pos):
605 if pos < self.pos:
606 self.init()
607 self.read(pos - self.pos)
608
609 def tell(self):
610 return self.pos
611
612 def write(self, data):
613 self.pos += len(data)
614 raw = self.bz2obj.compress(data)
615 self.fileobj.write(raw)
616
617 def close(self):
618 if self.mode == "w":
619 raw = self.bz2obj.flush()
620 self.fileobj.write(raw)
621 self.fileobj.close()
622# class _BZ2Proxy
623
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000624#------------------------
625# Extraction file object
626#------------------------
627class ExFileObject(object):
628 """File-like object for reading an archive member.
629 Is returned by TarFile.extractfile(). Support for
630 sparse files included.
631 """
632
633 def __init__(self, tarfile, tarinfo):
634 self.fileobj = tarfile.fileobj
635 self.name = tarinfo.name
636 self.mode = "r"
637 self.closed = False
638 self.offset = tarinfo.offset_data
639 self.size = tarinfo.size
640 self.pos = 0L
641 self.linebuffer = ""
642 if tarinfo.issparse():
643 self.sparse = tarinfo.sparse
644 self.read = self._readsparse
645 else:
646 self.read = self._readnormal
647
648 def __read(self, size):
649 """Overloadable read method.
650 """
651 return self.fileobj.read(size)
652
653 def readline(self, size=-1):
654 """Read a line with approx. size. If size is negative,
655 read a whole line. readline() and read() must not
656 be mixed up (!).
657 """
658 if size < 0:
659 size = sys.maxint
660
661 nl = self.linebuffer.find("\n")
662 if nl >= 0:
663 nl = min(nl, size)
664 else:
665 size -= len(self.linebuffer)
Martin v. Löwisc11d6f12004-08-25 10:52:58 +0000666 while (nl < 0 and size > 0):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000667 buf = self.read(min(size, 100))
668 if not buf:
669 break
670 self.linebuffer += buf
671 size -= len(buf)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000672 nl = self.linebuffer.find("\n")
673 if nl == -1:
674 s = self.linebuffer
675 self.linebuffer = ""
676 return s
677 buf = self.linebuffer[:nl]
678 self.linebuffer = self.linebuffer[nl + 1:]
679 while buf[-1:] == "\r":
680 buf = buf[:-1]
681 return buf + "\n"
682
683 def readlines(self):
684 """Return a list with all (following) lines.
685 """
686 result = []
687 while True:
688 line = self.readline()
689 if not line: break
690 result.append(line)
691 return result
692
693 def _readnormal(self, size=None):
694 """Read operation for regular files.
695 """
696 if self.closed:
Thomas Wouters477c8d52006-05-27 19:21:47 +0000697 raise ValueError("file is closed")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000698 self.fileobj.seek(self.offset + self.pos)
699 bytesleft = self.size - self.pos
700 if size is None:
701 bytestoread = bytesleft
702 else:
703 bytestoread = min(size, bytesleft)
704 self.pos += bytestoread
705 return self.__read(bytestoread)
706
707 def _readsparse(self, size=None):
708 """Read operation for sparse files.
709 """
710 if self.closed:
Thomas Wouters477c8d52006-05-27 19:21:47 +0000711 raise ValueError("file is closed")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000712
713 if size is None:
714 size = self.size - self.pos
715
716 data = []
717 while size > 0:
718 buf = self._readsparsesection(size)
719 if not buf:
720 break
721 size -= len(buf)
722 data.append(buf)
723 return "".join(data)
724
725 def _readsparsesection(self, size):
726 """Read a single section of a sparse file.
727 """
728 section = self.sparse.find(self.pos)
729
730 if section is None:
731 return ""
732
733 toread = min(size, section.offset + section.size - self.pos)
734 if isinstance(section, _data):
735 realpos = section.realpos + self.pos - section.offset
736 self.pos += toread
737 self.fileobj.seek(self.offset + realpos)
738 return self.__read(toread)
739 else:
740 self.pos += toread
741 return NUL * toread
742
743 def tell(self):
744 """Return the current file position.
745 """
746 return self.pos
747
748 def seek(self, pos, whence=0):
749 """Seek to a position in the file.
750 """
751 self.linebuffer = ""
752 if whence == 0:
753 self.pos = min(max(pos, 0), self.size)
754 if whence == 1:
755 if pos < 0:
756 self.pos = max(self.pos + pos, 0)
757 else:
758 self.pos = min(self.pos + pos, self.size)
759 if whence == 2:
760 self.pos = max(min(self.size + pos, self.size), 0)
761
762 def close(self):
763 """Close the file object.
764 """
765 self.closed = True
Martin v. Löwisdf241532005-03-03 08:17:42 +0000766
767 def __iter__(self):
768 """Get an iterator over the file object.
769 """
770 if self.closed:
771 raise ValueError("I/O operation on closed file")
772 return self
773
774 def next(self):
775 """Get the next item from the file iterator.
776 """
777 result = self.readline()
778 if not result:
779 raise StopIteration
780 return result
Tim Peterseba28be2005-03-28 01:08:02 +0000781
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000782#class ExFileObject
783
784#------------------
785# Exported Classes
786#------------------
787class TarInfo(object):
788 """Informational class which holds the details about an
789 archive member given by a tar header block.
790 TarInfo objects are returned by TarFile.getmember(),
791 TarFile.getmembers() and TarFile.gettarinfo() and are
792 usually created internally.
793 """
794
795 def __init__(self, name=""):
796 """Construct a TarInfo object. name is the optional name
797 of the member.
798 """
799
Thomas Wouters477c8d52006-05-27 19:21:47 +0000800 self.name = name # member name (dirnames must end with '/')
801 self.mode = 0666 # file permissions
802 self.uid = 0 # user id
803 self.gid = 0 # group id
804 self.size = 0 # file size
805 self.mtime = 0 # modification time
806 self.chksum = 0 # header checksum
807 self.type = REGTYPE # member type
808 self.linkname = "" # link name
809 self.uname = "user" # user name
810 self.gname = "group" # group name
811 self.devmajor = 0 # device major number
812 self.devminor = 0 # device minor number
813 self.prefix = "" # prefix to filename or information
814 # about sparse files
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000815
Thomas Wouters477c8d52006-05-27 19:21:47 +0000816 self.offset = 0 # the tar header starts here
817 self.offset_data = 0 # the file's data starts here
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000818
819 def __repr__(self):
820 return "<%s %r at %#x>" % (self.__class__.__name__,self.name,id(self))
821
Guido van Rossum75b64e62005-01-16 00:16:11 +0000822 @classmethod
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000823 def frombuf(cls, buf):
824 """Construct a TarInfo object from a 512 byte string buffer.
825 """
Thomas Wouters477c8d52006-05-27 19:21:47 +0000826 if len(buf) != BLOCKSIZE:
827 raise ValueError("truncated header")
828 if buf.count(NUL) == BLOCKSIZE:
829 raise ValueError("empty header")
830
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000831 tarinfo = cls()
Thomas Wouters477c8d52006-05-27 19:21:47 +0000832 tarinfo.buf = buf
833 tarinfo.name = buf[0:100].rstrip(NUL)
834 tarinfo.mode = nti(buf[100:108])
835 tarinfo.uid = nti(buf[108:116])
836 tarinfo.gid = nti(buf[116:124])
837 tarinfo.size = nti(buf[124:136])
838 tarinfo.mtime = nti(buf[136:148])
839 tarinfo.chksum = nti(buf[148:156])
840 tarinfo.type = buf[156:157]
841 tarinfo.linkname = buf[157:257].rstrip(NUL)
842 tarinfo.uname = buf[265:297].rstrip(NUL)
843 tarinfo.gname = buf[297:329].rstrip(NUL)
844 tarinfo.devmajor = nti(buf[329:337])
845 tarinfo.devminor = nti(buf[337:345])
Jack Jansen2b4b5a52003-04-22 22:03:11 +0000846 tarinfo.prefix = buf[345:500]
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000847
Thomas Wouters477c8d52006-05-27 19:21:47 +0000848 if tarinfo.chksum not in calc_chksums(buf):
849 raise ValueError("invalid header")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000850 return tarinfo
851
Thomas Wouters477c8d52006-05-27 19:21:47 +0000852 def tobuf(self, posix=False):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000853 """Return a tar header block as a 512 byte string.
854 """
Thomas Wouters477c8d52006-05-27 19:21:47 +0000855 parts = [
856 stn(self.name, 100),
857 itn(self.mode & 07777, 8, posix),
858 itn(self.uid, 8, posix),
859 itn(self.gid, 8, posix),
860 itn(self.size, 12, posix),
861 itn(self.mtime, 12, posix),
862 " ", # checksum field
863 self.type,
864 stn(self.linkname, 100),
865 stn(MAGIC, 6),
866 stn(VERSION, 2),
867 stn(self.uname, 32),
868 stn(self.gname, 32),
869 itn(self.devmajor, 8, posix),
870 itn(self.devminor, 8, posix),
871 stn(self.prefix, 155)
872 ]
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000873
Thomas Wouters477c8d52006-05-27 19:21:47 +0000874 buf = struct.pack("%ds" % BLOCKSIZE, "".join(parts))
875 chksum = calc_chksums(buf)[0]
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000876 buf = buf[:148] + "%06o\0" % chksum + buf[155:]
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000877 self.buf = buf
878 return buf
879
880 def isreg(self):
881 return self.type in REGULAR_TYPES
882 def isfile(self):
883 return self.isreg()
884 def isdir(self):
885 return self.type == DIRTYPE
886 def issym(self):
887 return self.type == SYMTYPE
888 def islnk(self):
889 return self.type == LNKTYPE
890 def ischr(self):
891 return self.type == CHRTYPE
892 def isblk(self):
893 return self.type == BLKTYPE
894 def isfifo(self):
895 return self.type == FIFOTYPE
896 def issparse(self):
897 return self.type == GNUTYPE_SPARSE
898 def isdev(self):
899 return self.type in (CHRTYPE, BLKTYPE, FIFOTYPE)
900# class TarInfo
901
902class TarFile(object):
903 """The TarFile Class provides an interface to tar archives.
904 """
905
906 debug = 0 # May be set from 0 (no msgs) to 3 (all msgs)
907
908 dereference = False # If true, add content of linked file to the
909 # tar file, else the link.
910
911 ignore_zeros = False # If true, skips empty or invalid blocks and
912 # continues processing.
913
914 errorlevel = 0 # If 0, fatal errors only appear in debug
915 # messages (if debug >= 0). If > 0, errors
916 # are passed to the caller as exceptions.
917
Martin v. Löwis75b9da42004-08-18 13:57:44 +0000918 posix = False # If True, generates POSIX.1-1990-compliant
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000919 # archives (no GNU extensions!)
920
921 fileobject = ExFileObject
922
923 def __init__(self, name=None, mode="r", fileobj=None):
924 """Open an (uncompressed) tar archive `name'. `mode' is either 'r' to
925 read from an existing archive, 'a' to append data to an existing
926 file or 'w' to create a new file overwriting an existing one. `mode'
927 defaults to 'r'.
928 If `fileobj' is given, it is used for reading or writing data. If it
929 can be determined, `mode' is overridden by `fileobj's mode.
930 `fileobj' is not closed, when TarFile is closed.
931 """
Martin v. Löwisfaffa152005-08-24 06:43:09 +0000932 self.name = name
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000933
934 if len(mode) > 1 or mode not in "raw":
Thomas Wouters477c8d52006-05-27 19:21:47 +0000935 raise ValueError("mode must be 'r', 'a' or 'w'")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000936 self._mode = mode
937 self.mode = {"r": "rb", "a": "r+b", "w": "wb"}[mode]
938
939 if not fileobj:
940 fileobj = file(self.name, self.mode)
941 self._extfileobj = False
942 else:
943 if self.name is None and hasattr(fileobj, "name"):
Martin v. Löwisfaffa152005-08-24 06:43:09 +0000944 self.name = fileobj.name
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000945 if hasattr(fileobj, "mode"):
946 self.mode = fileobj.mode
947 self._extfileobj = True
948 self.fileobj = fileobj
949
950 # Init datastructures
Thomas Wouters477c8d52006-05-27 19:21:47 +0000951 self.closed = False
952 self.members = [] # list of members as TarInfo objects
953 self._loaded = False # flag if all members have been read
954 self.offset = 0L # current position in the archive file
955 self.inodes = {} # dictionary caching the inodes of
956 # archive members already added
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000957
958 if self._mode == "r":
959 self.firstmember = None
960 self.firstmember = self.next()
961
962 if self._mode == "a":
963 # Move to the end of the archive,
964 # before the first empty block.
965 self.firstmember = None
966 while True:
967 try:
968 tarinfo = self.next()
969 except ReadError:
970 self.fileobj.seek(0)
971 break
972 if tarinfo is None:
973 self.fileobj.seek(- BLOCKSIZE, 1)
974 break
975
976 if self._mode in "aw":
977 self._loaded = True
978
979 #--------------------------------------------------------------------------
980 # Below are the classmethods which act as alternate constructors to the
981 # TarFile class. The open() method is the only one that is needed for
982 # public use; it is the "super"-constructor and is able to select an
983 # adequate "sub"-constructor for a particular compression using the mapping
984 # from OPEN_METH.
985 #
986 # This concept allows one to subclass TarFile without losing the comfort of
987 # the super-constructor. A sub-constructor is registered and made available
988 # by adding it to the mapping in OPEN_METH.
989
Guido van Rossum75b64e62005-01-16 00:16:11 +0000990 @classmethod
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000991 def open(cls, name=None, mode="r", fileobj=None, bufsize=20*512):
992 """Open a tar archive for reading, writing or appending. Return
993 an appropriate TarFile class.
994
995 mode:
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000996 'r' or 'r:*' open for reading with transparent compression
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000997 'r:' open for reading exclusively uncompressed
998 'r:gz' open for reading with gzip compression
999 'r:bz2' open for reading with bzip2 compression
1000 'a' or 'a:' open for appending
1001 'w' or 'w:' open for writing without compression
1002 'w:gz' open for writing with gzip compression
1003 'w:bz2' open for writing with bzip2 compression
Martin v. Löwis78be7df2005-03-05 12:47:42 +00001004
1005 'r|*' open a stream of tar blocks with transparent compression
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001006 'r|' open an uncompressed stream of tar blocks for reading
1007 'r|gz' open a gzip compressed stream of tar blocks
1008 'r|bz2' open a bzip2 compressed stream of tar blocks
1009 'w|' open an uncompressed stream for writing
1010 'w|gz' open a gzip compressed stream for writing
1011 'w|bz2' open a bzip2 compressed stream for writing
1012 """
1013
1014 if not name and not fileobj:
Thomas Wouters477c8d52006-05-27 19:21:47 +00001015 raise ValueError("nothing to open")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001016
Martin v. Löwis78be7df2005-03-05 12:47:42 +00001017 if mode in ("r", "r:*"):
1018 # Find out which *open() is appropriate for opening the file.
1019 for comptype in cls.OPEN_METH:
1020 func = getattr(cls, cls.OPEN_METH[comptype])
1021 try:
1022 return func(name, "r", fileobj)
1023 except (ReadError, CompressionError):
1024 continue
Thomas Wouters477c8d52006-05-27 19:21:47 +00001025 raise ReadError("file could not be opened successfully")
Martin v. Löwis78be7df2005-03-05 12:47:42 +00001026
1027 elif ":" in mode:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001028 filemode, comptype = mode.split(":", 1)
1029 filemode = filemode or "r"
1030 comptype = comptype or "tar"
1031
1032 # Select the *open() function according to
1033 # given compression.
1034 if comptype in cls.OPEN_METH:
1035 func = getattr(cls, cls.OPEN_METH[comptype])
1036 else:
Thomas Wouters477c8d52006-05-27 19:21:47 +00001037 raise CompressionError("unknown compression type %r" % comptype)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001038 return func(name, filemode, fileobj)
1039
1040 elif "|" in mode:
1041 filemode, comptype = mode.split("|", 1)
1042 filemode = filemode or "r"
1043 comptype = comptype or "tar"
1044
1045 if filemode not in "rw":
Thomas Wouters477c8d52006-05-27 19:21:47 +00001046 raise ValueError("mode must be 'r' or 'w'")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001047
1048 t = cls(name, filemode,
1049 _Stream(name, filemode, comptype, fileobj, bufsize))
1050 t._extfileobj = False
1051 return t
1052
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001053 elif mode in "aw":
1054 return cls.taropen(name, mode, fileobj)
1055
Thomas Wouters477c8d52006-05-27 19:21:47 +00001056 raise ValueError("undiscernible mode")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001057
Guido van Rossum75b64e62005-01-16 00:16:11 +00001058 @classmethod
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001059 def taropen(cls, name, mode="r", fileobj=None):
1060 """Open uncompressed tar archive name for reading or writing.
1061 """
1062 if len(mode) > 1 or mode not in "raw":
Thomas Wouters477c8d52006-05-27 19:21:47 +00001063 raise ValueError("mode must be 'r', 'a' or 'w'")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001064 return cls(name, mode, fileobj)
1065
Guido van Rossum75b64e62005-01-16 00:16:11 +00001066 @classmethod
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001067 def gzopen(cls, name, mode="r", fileobj=None, compresslevel=9):
1068 """Open gzip compressed tar archive name for reading or writing.
1069 Appending is not allowed.
1070 """
1071 if len(mode) > 1 or mode not in "rw":
Thomas Wouters477c8d52006-05-27 19:21:47 +00001072 raise ValueError("mode must be 'r' or 'w'")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001073
1074 try:
1075 import gzip
Neal Norwitz4ec68242003-04-11 03:05:56 +00001076 gzip.GzipFile
1077 except (ImportError, AttributeError):
Thomas Wouters477c8d52006-05-27 19:21:47 +00001078 raise CompressionError("gzip module is not available")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001079
1080 pre, ext = os.path.splitext(name)
Martin v. Löwisfaffa152005-08-24 06:43:09 +00001081 pre = os.path.basename(pre)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001082 if ext == ".tgz":
1083 ext = ".tar"
1084 if ext == ".gz":
1085 ext = ""
Martin v. Löwisfaffa152005-08-24 06:43:09 +00001086 tarname = pre + ext
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001087
1088 if fileobj is None:
1089 fileobj = file(name, mode + "b")
1090
Martin v. Löwisfaffa152005-08-24 06:43:09 +00001091 if mode != "r":
1092 name = tarname
1093
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001094 try:
Martin v. Löwisfaffa152005-08-24 06:43:09 +00001095 t = cls.taropen(tarname, mode,
1096 gzip.GzipFile(name, mode, compresslevel, fileobj)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001097 )
1098 except IOError:
Thomas Wouters477c8d52006-05-27 19:21:47 +00001099 raise ReadError("not a gzip file")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001100 t._extfileobj = False
1101 return t
1102
Guido van Rossum75b64e62005-01-16 00:16:11 +00001103 @classmethod
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001104 def bz2open(cls, name, mode="r", fileobj=None, compresslevel=9):
1105 """Open bzip2 compressed tar archive name for reading or writing.
1106 Appending is not allowed.
1107 """
1108 if len(mode) > 1 or mode not in "rw":
Thomas Wouters477c8d52006-05-27 19:21:47 +00001109 raise ValueError("mode must be 'r' or 'w'.")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001110
1111 try:
1112 import bz2
1113 except ImportError:
Thomas Wouters477c8d52006-05-27 19:21:47 +00001114 raise CompressionError("bz2 module is not available")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001115
Martin v. Löwisfaffa152005-08-24 06:43:09 +00001116 pre, ext = os.path.splitext(name)
1117 pre = os.path.basename(pre)
1118 if ext == ".tbz2":
1119 ext = ".tar"
1120 if ext == ".bz2":
1121 ext = ""
1122 tarname = pre + ext
1123
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001124 if fileobj is not None:
Thomas Wouters477c8d52006-05-27 19:21:47 +00001125 fileobj = _BZ2Proxy(fileobj, mode)
1126 else:
1127 fileobj = bz2.BZ2File(name, mode, compresslevel=compresslevel)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001128
1129 try:
Thomas Wouters477c8d52006-05-27 19:21:47 +00001130 t = cls.taropen(tarname, mode, fileobj)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001131 except IOError:
Thomas Wouters477c8d52006-05-27 19:21:47 +00001132 raise ReadError("not a bzip2 file")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001133 t._extfileobj = False
1134 return t
1135
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001136 # All *open() methods are registered here.
1137 OPEN_METH = {
1138 "tar": "taropen", # uncompressed tar
1139 "gz": "gzopen", # gzip compressed tar
1140 "bz2": "bz2open" # bzip2 compressed tar
1141 }
1142
1143 #--------------------------------------------------------------------------
1144 # The public methods which TarFile provides:
1145
1146 def close(self):
1147 """Close the TarFile. In write-mode, two finishing zero blocks are
1148 appended to the archive.
1149 """
1150 if self.closed:
1151 return
1152
1153 if self._mode in "aw":
1154 self.fileobj.write(NUL * (BLOCKSIZE * 2))
1155 self.offset += (BLOCKSIZE * 2)
1156 # fill up the end with zero-blocks
1157 # (like option -b20 for tar does)
1158 blocks, remainder = divmod(self.offset, RECORDSIZE)
1159 if remainder > 0:
1160 self.fileobj.write(NUL * (RECORDSIZE - remainder))
1161
1162 if not self._extfileobj:
1163 self.fileobj.close()
1164 self.closed = True
1165
1166 def getmember(self, name):
1167 """Return a TarInfo object for member `name'. If `name' can not be
1168 found in the archive, KeyError is raised. If a member occurs more
1169 than once in the archive, its last occurence is assumed to be the
1170 most up-to-date version.
1171 """
Martin v. Löwisf3c56112004-09-18 09:08:52 +00001172 tarinfo = self._getmember(name)
1173 if tarinfo is None:
Thomas Wouters477c8d52006-05-27 19:21:47 +00001174 raise KeyError("filename %r not found" % name)
Martin v. Löwisf3c56112004-09-18 09:08:52 +00001175 return tarinfo
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001176
1177 def getmembers(self):
1178 """Return the members of the archive as a list of TarInfo objects. The
1179 list has the same order as the members in the archive.
1180 """
1181 self._check()
1182 if not self._loaded: # if we want to obtain a list of
1183 self._load() # all members, we first have to
1184 # scan the whole archive.
1185 return self.members
1186
1187 def getnames(self):
1188 """Return the members of the archive as a list of their names. It has
1189 the same order as the list returned by getmembers().
1190 """
Martin v. Löwisf3c56112004-09-18 09:08:52 +00001191 return [tarinfo.name for tarinfo in self.getmembers()]
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001192
1193 def gettarinfo(self, name=None, arcname=None, fileobj=None):
1194 """Create a TarInfo object for either the file `name' or the file
1195 object `fileobj' (using os.fstat on its file descriptor). You can
1196 modify some of the TarInfo's attributes before you add it using
1197 addfile(). If given, `arcname' specifies an alternative name for the
1198 file in the archive.
1199 """
1200 self._check("aw")
1201
1202 # When fileobj is given, replace name by
1203 # fileobj's real name.
1204 if fileobj is not None:
1205 name = fileobj.name
1206
1207 # Building the name of the member in the archive.
1208 # Backward slashes are converted to forward slashes,
1209 # Absolute paths are turned to relative paths.
1210 if arcname is None:
1211 arcname = name
1212 arcname = normpath(arcname)
1213 drv, arcname = os.path.splitdrive(arcname)
1214 while arcname[0:1] == "/":
1215 arcname = arcname[1:]
1216
1217 # Now, fill the TarInfo object with
1218 # information specific for the file.
1219 tarinfo = TarInfo()
1220
1221 # Use os.stat or os.lstat, depending on platform
1222 # and if symlinks shall be resolved.
1223 if fileobj is None:
1224 if hasattr(os, "lstat") and not self.dereference:
1225 statres = os.lstat(name)
1226 else:
1227 statres = os.stat(name)
1228 else:
1229 statres = os.fstat(fileobj.fileno())
1230 linkname = ""
1231
1232 stmd = statres.st_mode
1233 if stat.S_ISREG(stmd):
1234 inode = (statres.st_ino, statres.st_dev)
Neal Norwitzb0e32e22005-10-20 04:50:13 +00001235 if not self.dereference and \
1236 statres.st_nlink > 1 and inode in self.inodes:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001237 # Is it a hardlink to an already
1238 # archived file?
1239 type = LNKTYPE
1240 linkname = self.inodes[inode]
1241 else:
1242 # The inode is added only if its valid.
1243 # For win32 it is always 0.
1244 type = REGTYPE
1245 if inode[0]:
1246 self.inodes[inode] = arcname
1247 elif stat.S_ISDIR(stmd):
1248 type = DIRTYPE
1249 if arcname[-1:] != "/":
1250 arcname += "/"
1251 elif stat.S_ISFIFO(stmd):
1252 type = FIFOTYPE
1253 elif stat.S_ISLNK(stmd):
1254 type = SYMTYPE
1255 linkname = os.readlink(name)
1256 elif stat.S_ISCHR(stmd):
1257 type = CHRTYPE
1258 elif stat.S_ISBLK(stmd):
1259 type = BLKTYPE
1260 else:
1261 return None
1262
1263 # Fill the TarInfo object with all
1264 # information we can get.
Martin v. Löwis5dbdc592005-08-27 10:07:56 +00001265 tarinfo.name = arcname
1266 tarinfo.mode = stmd
1267 tarinfo.uid = statres.st_uid
1268 tarinfo.gid = statres.st_gid
1269 if stat.S_ISREG(stmd):
Martin v. Löwis61d77e02004-08-20 06:35:46 +00001270 tarinfo.size = statres.st_size
Martin v. Löwis5dbdc592005-08-27 10:07:56 +00001271 else:
1272 tarinfo.size = 0L
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001273 tarinfo.mtime = statres.st_mtime
Martin v. Löwis5dbdc592005-08-27 10:07:56 +00001274 tarinfo.type = type
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001275 tarinfo.linkname = linkname
1276 if pwd:
1277 try:
1278 tarinfo.uname = pwd.getpwuid(tarinfo.uid)[0]
1279 except KeyError:
1280 pass
1281 if grp:
1282 try:
1283 tarinfo.gname = grp.getgrgid(tarinfo.gid)[0]
1284 except KeyError:
1285 pass
1286
1287 if type in (CHRTYPE, BLKTYPE):
1288 if hasattr(os, "major") and hasattr(os, "minor"):
1289 tarinfo.devmajor = os.major(statres.st_rdev)
1290 tarinfo.devminor = os.minor(statres.st_rdev)
1291 return tarinfo
1292
1293 def list(self, verbose=True):
1294 """Print a table of contents to sys.stdout. If `verbose' is False, only
1295 the names of the members are printed. If it is True, an `ls -l'-like
1296 output is produced.
1297 """
1298 self._check()
1299
1300 for tarinfo in self:
1301 if verbose:
1302 print filemode(tarinfo.mode),
1303 print "%s/%s" % (tarinfo.uname or tarinfo.uid,
1304 tarinfo.gname or tarinfo.gid),
1305 if tarinfo.ischr() or tarinfo.isblk():
1306 print "%10s" % ("%d,%d" \
1307 % (tarinfo.devmajor, tarinfo.devminor)),
1308 else:
1309 print "%10d" % tarinfo.size,
1310 print "%d-%02d-%02d %02d:%02d:%02d" \
1311 % time.localtime(tarinfo.mtime)[:6],
1312
1313 print tarinfo.name,
1314
1315 if verbose:
1316 if tarinfo.issym():
1317 print "->", tarinfo.linkname,
1318 if tarinfo.islnk():
1319 print "link to", tarinfo.linkname,
1320 print
1321
1322 def add(self, name, arcname=None, recursive=True):
1323 """Add the file `name' to the archive. `name' may be any type of file
1324 (directory, fifo, symbolic link, etc.). If given, `arcname'
1325 specifies an alternative name for the file in the archive.
1326 Directories are added recursively by default. This can be avoided by
1327 setting `recursive' to False.
1328 """
1329 self._check("aw")
1330
1331 if arcname is None:
1332 arcname = name
1333
1334 # Skip if somebody tries to archive the archive...
Martin v. Löwisfaffa152005-08-24 06:43:09 +00001335 if self.name is not None \
1336 and os.path.abspath(name) == os.path.abspath(self.name):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001337 self._dbg(2, "tarfile: Skipped %r" % name)
1338 return
1339
1340 # Special case: The user wants to add the current
1341 # working directory.
1342 if name == ".":
1343 if recursive:
1344 if arcname == ".":
1345 arcname = ""
1346 for f in os.listdir("."):
1347 self.add(f, os.path.join(arcname, f))
1348 return
1349
1350 self._dbg(1, name)
1351
1352 # Create a TarInfo object from the file.
1353 tarinfo = self.gettarinfo(name, arcname)
1354
1355 if tarinfo is None:
1356 self._dbg(1, "tarfile: Unsupported type %r" % name)
1357 return
1358
1359 # Append the tar header and data to the archive.
1360 if tarinfo.isreg():
1361 f = file(name, "rb")
1362 self.addfile(tarinfo, f)
1363 f.close()
1364
Martin v. Löwis5dbdc592005-08-27 10:07:56 +00001365 elif tarinfo.isdir():
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001366 self.addfile(tarinfo)
1367 if recursive:
1368 for f in os.listdir(name):
1369 self.add(os.path.join(name, f), os.path.join(arcname, f))
1370
Martin v. Löwis5dbdc592005-08-27 10:07:56 +00001371 else:
1372 self.addfile(tarinfo)
1373
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001374 def addfile(self, tarinfo, fileobj=None):
1375 """Add the TarInfo object `tarinfo' to the archive. If `fileobj' is
1376 given, tarinfo.size bytes are read from it and added to the archive.
1377 You can create TarInfo objects using gettarinfo().
1378 On Windows platforms, `fileobj' should always be opened with mode
1379 'rb' to avoid irritation about the file size.
1380 """
1381 self._check("aw")
1382
1383 tarinfo.name = normpath(tarinfo.name)
1384 if tarinfo.isdir():
1385 # directories should end with '/'
1386 tarinfo.name += "/"
1387
1388 if tarinfo.linkname:
1389 tarinfo.linkname = normpath(tarinfo.linkname)
1390
1391 if tarinfo.size > MAXSIZE_MEMBER:
Neal Norwitzd96d1012004-07-20 22:23:02 +00001392 if self.posix:
Thomas Wouters477c8d52006-05-27 19:21:47 +00001393 raise ValueError("file is too large (>= 8 GB)")
Neal Norwitzd96d1012004-07-20 22:23:02 +00001394 else:
1395 self._dbg(2, "tarfile: Created GNU tar largefile header")
1396
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001397
1398 if len(tarinfo.linkname) > LENGTH_LINK:
1399 if self.posix:
Thomas Wouters477c8d52006-05-27 19:21:47 +00001400 raise ValueError("linkname is too long (>%d)" % (LENGTH_LINK))
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001401 else:
1402 self._create_gnulong(tarinfo.linkname, GNUTYPE_LONGLINK)
1403 tarinfo.linkname = tarinfo.linkname[:LENGTH_LINK -1]
1404 self._dbg(2, "tarfile: Created GNU tar extension LONGLINK")
1405
1406 if len(tarinfo.name) > LENGTH_NAME:
1407 if self.posix:
1408 prefix = tarinfo.name[:LENGTH_PREFIX + 1]
1409 while prefix and prefix[-1] != "/":
Tim Peters2c60f7a2003-01-29 03:49:43 +00001410 prefix = prefix[:-1]
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001411
1412 name = tarinfo.name[len(prefix):]
1413 prefix = prefix[:-1]
1414
1415 if not prefix or len(name) > LENGTH_NAME:
Thomas Wouters477c8d52006-05-27 19:21:47 +00001416 raise ValueError("name is too long (>%d)" % (LENGTH_NAME))
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001417
1418 tarinfo.name = name
1419 tarinfo.prefix = prefix
1420 else:
1421 self._create_gnulong(tarinfo.name, GNUTYPE_LONGNAME)
1422 tarinfo.name = tarinfo.name[:LENGTH_NAME - 1]
1423 self._dbg(2, "tarfile: Created GNU tar extension LONGNAME")
1424
Thomas Wouters477c8d52006-05-27 19:21:47 +00001425 self.fileobj.write(tarinfo.tobuf(self.posix))
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001426 self.offset += BLOCKSIZE
1427
1428 # If there's data to follow, append it.
1429 if fileobj is not None:
1430 copyfileobj(fileobj, self.fileobj, tarinfo.size)
1431 blocks, remainder = divmod(tarinfo.size, BLOCKSIZE)
1432 if remainder > 0:
1433 self.fileobj.write(NUL * (BLOCKSIZE - remainder))
1434 blocks += 1
1435 self.offset += blocks * BLOCKSIZE
1436
Martin v. Löwisf3c56112004-09-18 09:08:52 +00001437 self.members.append(tarinfo)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001438
Martin v. Löwis00a73e72005-03-04 19:40:34 +00001439 def extractall(self, path=".", members=None):
1440 """Extract all members from the archive to the current working
1441 directory and set owner, modification time and permissions on
1442 directories afterwards. `path' specifies a different directory
1443 to extract to. `members' is optional and must be a subset of the
1444 list returned by getmembers().
1445 """
1446 directories = []
1447
1448 if members is None:
1449 members = self
1450
1451 for tarinfo in members:
1452 if tarinfo.isdir():
1453 # Extract directory with a safe mode, so that
1454 # all files below can be extracted as well.
1455 try:
1456 os.makedirs(os.path.join(path, tarinfo.name), 0777)
1457 except EnvironmentError:
1458 pass
1459 directories.append(tarinfo)
1460 else:
1461 self.extract(tarinfo, path)
1462
1463 # Reverse sort directories.
1464 directories.sort(lambda a, b: cmp(a.name, b.name))
1465 directories.reverse()
1466
1467 # Set correct owner, mtime and filemode on directories.
1468 for tarinfo in directories:
1469 path = os.path.join(path, tarinfo.name)
1470 try:
1471 self.chown(tarinfo, path)
1472 self.utime(tarinfo, path)
1473 self.chmod(tarinfo, path)
1474 except ExtractError, e:
1475 if self.errorlevel > 1:
1476 raise
1477 else:
1478 self._dbg(1, "tarfile: %s" % e)
1479
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001480 def extract(self, member, path=""):
1481 """Extract a member from the archive to the current working directory,
1482 using its full name. Its file information is extracted as accurately
1483 as possible. `member' may be a filename or a TarInfo object. You can
1484 specify a different directory using `path'.
1485 """
1486 self._check("r")
1487
1488 if isinstance(member, TarInfo):
1489 tarinfo = member
1490 else:
1491 tarinfo = self.getmember(member)
1492
Neal Norwitza4f651a2004-07-20 22:07:44 +00001493 # Prepare the link target for makelink().
1494 if tarinfo.islnk():
1495 tarinfo._link_target = os.path.join(path, tarinfo.linkname)
1496
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001497 try:
1498 self._extract_member(tarinfo, os.path.join(path, tarinfo.name))
1499 except EnvironmentError, e:
1500 if self.errorlevel > 0:
1501 raise
1502 else:
1503 if e.filename is None:
1504 self._dbg(1, "tarfile: %s" % e.strerror)
1505 else:
1506 self._dbg(1, "tarfile: %s %r" % (e.strerror, e.filename))
1507 except ExtractError, e:
1508 if self.errorlevel > 1:
1509 raise
1510 else:
1511 self._dbg(1, "tarfile: %s" % e)
1512
1513 def extractfile(self, member):
1514 """Extract a member from the archive as a file object. `member' may be
1515 a filename or a TarInfo object. If `member' is a regular file, a
1516 file-like object is returned. If `member' is a link, a file-like
1517 object is constructed from the link's target. If `member' is none of
1518 the above, None is returned.
1519 The file-like object is read-only and provides the following
1520 methods: read(), readline(), readlines(), seek() and tell()
1521 """
1522 self._check("r")
1523
1524 if isinstance(member, TarInfo):
1525 tarinfo = member
1526 else:
1527 tarinfo = self.getmember(member)
1528
1529 if tarinfo.isreg():
1530 return self.fileobject(self, tarinfo)
1531
1532 elif tarinfo.type not in SUPPORTED_TYPES:
1533 # If a member's type is unknown, it is treated as a
1534 # regular file.
1535 return self.fileobject(self, tarinfo)
1536
1537 elif tarinfo.islnk() or tarinfo.issym():
1538 if isinstance(self.fileobj, _Stream):
1539 # A small but ugly workaround for the case that someone tries
1540 # to extract a (sym)link as a file-object from a non-seekable
1541 # stream of tar blocks.
Thomas Wouters477c8d52006-05-27 19:21:47 +00001542 raise StreamError("cannot extract (sym)link as file object")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001543 else:
Georg Brandl7eb4b7d2005-07-22 21:49:32 +00001544 # A (sym)link's file object is its target's file object.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001545 return self.extractfile(self._getmember(tarinfo.linkname,
1546 tarinfo))
1547 else:
1548 # If there's no data associated with the member (directory, chrdev,
1549 # blkdev, etc.), return None instead of a file object.
1550 return None
1551
1552 def _extract_member(self, tarinfo, targetpath):
1553 """Extract the TarInfo object tarinfo to a physical
1554 file called targetpath.
1555 """
1556 # Fetch the TarInfo object for the given name
1557 # and build the destination pathname, replacing
1558 # forward slashes to platform specific separators.
1559 if targetpath[-1:] == "/":
1560 targetpath = targetpath[:-1]
1561 targetpath = os.path.normpath(targetpath)
1562
1563 # Create all upper directories.
1564 upperdirs = os.path.dirname(targetpath)
1565 if upperdirs and not os.path.exists(upperdirs):
1566 ti = TarInfo()
1567 ti.name = upperdirs
1568 ti.type = DIRTYPE
1569 ti.mode = 0777
1570 ti.mtime = tarinfo.mtime
1571 ti.uid = tarinfo.uid
1572 ti.gid = tarinfo.gid
1573 ti.uname = tarinfo.uname
1574 ti.gname = tarinfo.gname
1575 try:
1576 self._extract_member(ti, ti.name)
1577 except:
1578 pass
1579
1580 if tarinfo.islnk() or tarinfo.issym():
1581 self._dbg(1, "%s -> %s" % (tarinfo.name, tarinfo.linkname))
1582 else:
1583 self._dbg(1, tarinfo.name)
1584
1585 if tarinfo.isreg():
1586 self.makefile(tarinfo, targetpath)
1587 elif tarinfo.isdir():
1588 self.makedir(tarinfo, targetpath)
1589 elif tarinfo.isfifo():
1590 self.makefifo(tarinfo, targetpath)
1591 elif tarinfo.ischr() or tarinfo.isblk():
1592 self.makedev(tarinfo, targetpath)
1593 elif tarinfo.islnk() or tarinfo.issym():
1594 self.makelink(tarinfo, targetpath)
1595 elif tarinfo.type not in SUPPORTED_TYPES:
1596 self.makeunknown(tarinfo, targetpath)
1597 else:
1598 self.makefile(tarinfo, targetpath)
1599
1600 self.chown(tarinfo, targetpath)
1601 if not tarinfo.issym():
1602 self.chmod(tarinfo, targetpath)
1603 self.utime(tarinfo, targetpath)
1604
1605 #--------------------------------------------------------------------------
1606 # Below are the different file methods. They are called via
1607 # _extract_member() when extract() is called. They can be replaced in a
1608 # subclass to implement other functionality.
1609
1610 def makedir(self, tarinfo, targetpath):
1611 """Make a directory called targetpath.
1612 """
1613 try:
1614 os.mkdir(targetpath)
1615 except EnvironmentError, e:
1616 if e.errno != errno.EEXIST:
1617 raise
1618
1619 def makefile(self, tarinfo, targetpath):
1620 """Make a file called targetpath.
1621 """
1622 source = self.extractfile(tarinfo)
1623 target = file(targetpath, "wb")
1624 copyfileobj(source, target)
1625 source.close()
1626 target.close()
1627
1628 def makeunknown(self, tarinfo, targetpath):
1629 """Make a file from a TarInfo object with an unknown type
1630 at targetpath.
1631 """
1632 self.makefile(tarinfo, targetpath)
1633 self._dbg(1, "tarfile: Unknown file type %r, " \
1634 "extracted as regular file." % tarinfo.type)
1635
1636 def makefifo(self, tarinfo, targetpath):
1637 """Make a fifo called targetpath.
1638 """
1639 if hasattr(os, "mkfifo"):
1640 os.mkfifo(targetpath)
1641 else:
Thomas Wouters477c8d52006-05-27 19:21:47 +00001642 raise ExtractError("fifo not supported by system")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001643
1644 def makedev(self, tarinfo, targetpath):
1645 """Make a character or block device called targetpath.
1646 """
1647 if not hasattr(os, "mknod") or not hasattr(os, "makedev"):
Thomas Wouters477c8d52006-05-27 19:21:47 +00001648 raise ExtractError("special devices not supported by system")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001649
1650 mode = tarinfo.mode
1651 if tarinfo.isblk():
1652 mode |= stat.S_IFBLK
1653 else:
1654 mode |= stat.S_IFCHR
1655
1656 os.mknod(targetpath, mode,
1657 os.makedev(tarinfo.devmajor, tarinfo.devminor))
1658
1659 def makelink(self, tarinfo, targetpath):
1660 """Make a (symbolic) link called targetpath. If it cannot be created
1661 (platform limitation), we try to make a copy of the referenced file
1662 instead of a link.
1663 """
1664 linkpath = tarinfo.linkname
1665 try:
1666 if tarinfo.issym():
1667 os.symlink(linkpath, targetpath)
1668 else:
Neal Norwitza4f651a2004-07-20 22:07:44 +00001669 # See extract().
1670 os.link(tarinfo._link_target, targetpath)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001671 except AttributeError:
1672 if tarinfo.issym():
1673 linkpath = os.path.join(os.path.dirname(tarinfo.name),
1674 linkpath)
1675 linkpath = normpath(linkpath)
1676
1677 try:
1678 self._extract_member(self.getmember(linkpath), targetpath)
1679 except (EnvironmentError, KeyError), e:
1680 linkpath = os.path.normpath(linkpath)
1681 try:
1682 shutil.copy2(linkpath, targetpath)
1683 except EnvironmentError, e:
Thomas Wouters477c8d52006-05-27 19:21:47 +00001684 raise IOError("link could not be created")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001685
1686 def chown(self, tarinfo, targetpath):
1687 """Set owner of targetpath according to tarinfo.
1688 """
1689 if pwd and hasattr(os, "geteuid") and os.geteuid() == 0:
1690 # We have to be root to do so.
1691 try:
1692 g = grp.getgrnam(tarinfo.gname)[2]
1693 except KeyError:
1694 try:
1695 g = grp.getgrgid(tarinfo.gid)[2]
1696 except KeyError:
1697 g = os.getgid()
1698 try:
1699 u = pwd.getpwnam(tarinfo.uname)[2]
1700 except KeyError:
1701 try:
1702 u = pwd.getpwuid(tarinfo.uid)[2]
1703 except KeyError:
1704 u = os.getuid()
1705 try:
1706 if tarinfo.issym() and hasattr(os, "lchown"):
1707 os.lchown(targetpath, u, g)
1708 else:
Andrew MacIntyre7970d202003-02-19 12:51:34 +00001709 if sys.platform != "os2emx":
1710 os.chown(targetpath, u, g)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001711 except EnvironmentError, e:
Thomas Wouters477c8d52006-05-27 19:21:47 +00001712 raise ExtractError("could not change owner")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001713
1714 def chmod(self, tarinfo, targetpath):
1715 """Set file permissions of targetpath according to tarinfo.
1716 """
Jack Jansen834eff62003-03-07 12:47:06 +00001717 if hasattr(os, 'chmod'):
1718 try:
1719 os.chmod(targetpath, tarinfo.mode)
1720 except EnvironmentError, e:
Thomas Wouters477c8d52006-05-27 19:21:47 +00001721 raise ExtractError("could not change mode")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001722
1723 def utime(self, tarinfo, targetpath):
1724 """Set modification time of targetpath according to tarinfo.
1725 """
Jack Jansen834eff62003-03-07 12:47:06 +00001726 if not hasattr(os, 'utime'):
Tim Petersf9347782003-03-07 15:36:41 +00001727 return
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001728 if sys.platform == "win32" and tarinfo.isdir():
1729 # According to msdn.microsoft.com, it is an error (EACCES)
1730 # to use utime() on directories.
1731 return
1732 try:
1733 os.utime(targetpath, (tarinfo.mtime, tarinfo.mtime))
1734 except EnvironmentError, e:
Thomas Wouters477c8d52006-05-27 19:21:47 +00001735 raise ExtractError("could not change modification time")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001736
1737 #--------------------------------------------------------------------------
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001738 def next(self):
1739 """Return the next member of the archive as a TarInfo object, when
1740 TarFile is opened for reading. Return None if there is no more
1741 available.
1742 """
1743 self._check("ra")
1744 if self.firstmember is not None:
1745 m = self.firstmember
1746 self.firstmember = None
1747 return m
1748
1749 # Read the next block.
Andrew M. Kuchling864bba12004-07-10 22:02:11 +00001750 self.fileobj.seek(self.offset)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001751 while True:
1752 buf = self.fileobj.read(BLOCKSIZE)
1753 if not buf:
1754 return None
Thomas Wouters477c8d52006-05-27 19:21:47 +00001755
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001756 try:
1757 tarinfo = TarInfo.frombuf(buf)
Thomas Wouters477c8d52006-05-27 19:21:47 +00001758
Thomas Wouters477c8d52006-05-27 19:21:47 +00001759 # Set the TarInfo object's offset to the current position of the
1760 # TarFile and set self.offset to the position where the data blocks
1761 # should begin.
1762 tarinfo.offset = self.offset
1763 self.offset += BLOCKSIZE
1764
1765 tarinfo = self.proc_member(tarinfo)
1766
1767 except ValueError, e:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001768 if self.ignore_zeros:
Thomas Wouters477c8d52006-05-27 19:21:47 +00001769 self._dbg(2, "0x%X: empty or invalid block: %s" %
1770 (self.offset, e))
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001771 self.offset += BLOCKSIZE
1772 continue
1773 else:
Andrew M. Kuchling864bba12004-07-10 22:02:11 +00001774 if self.offset == 0:
Thomas Wouters477c8d52006-05-27 19:21:47 +00001775 raise ReadError("empty, unreadable or compressed "
1776 "file: %s" % e)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001777 return None
1778 break
1779
Thomas Wouters477c8d52006-05-27 19:21:47 +00001780 # Some old tar programs represent a directory as a regular
1781 # file with a trailing slash.
1782 if tarinfo.isreg() and tarinfo.name.endswith("/"):
1783 tarinfo.type = DIRTYPE
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001784
Thomas Wouters477c8d52006-05-27 19:21:47 +00001785 # The prefix field is used for filenames > 100 in
1786 # the POSIX standard.
1787 # name = prefix + '/' + name
1788 tarinfo.name = normpath(os.path.join(tarinfo.prefix.rstrip(NUL),
1789 tarinfo.name))
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001790
Thomas Wouters477c8d52006-05-27 19:21:47 +00001791 # Directory names should have a '/' at the end.
1792 if tarinfo.isdir():
1793 tarinfo.name += "/"
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001794
Martin v. Löwisf3c56112004-09-18 09:08:52 +00001795 self.members.append(tarinfo)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001796 return tarinfo
1797
1798 #--------------------------------------------------------------------------
Thomas Wouters477c8d52006-05-27 19:21:47 +00001799 # The following are methods that are called depending on the type of a
1800 # member. The entry point is proc_member() which is called with a TarInfo
1801 # object created from the header block from the current offset. The
1802 # proc_member() method can be overridden in a subclass to add custom
1803 # proc_*() methods. A proc_*() method MUST implement the following
1804 # operations:
1805 # 1. Set tarinfo.offset_data to the position where the data blocks begin,
1806 # if there is data that follows.
1807 # 2. Set self.offset to the position where the next member's header will
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001808 # begin.
Thomas Wouters477c8d52006-05-27 19:21:47 +00001809 # 3. Return tarinfo or another valid TarInfo object.
1810 def proc_member(self, tarinfo):
1811 """Choose the right processing method for tarinfo depending
1812 on its type and call it.
1813 """
1814 if tarinfo.type in (GNUTYPE_LONGNAME, GNUTYPE_LONGLINK):
1815 return self.proc_gnulong(tarinfo)
1816 elif tarinfo.type == GNUTYPE_SPARSE:
1817 return self.proc_sparse(tarinfo)
1818 else:
1819 return self.proc_builtin(tarinfo)
1820
1821 def proc_builtin(self, tarinfo):
1822 """Process a builtin type member or an unknown member
1823 which will be treated as a regular file.
1824 """
1825 tarinfo.offset_data = self.offset
1826 if tarinfo.isreg() or tarinfo.type not in SUPPORTED_TYPES:
1827 # Skip the following data blocks.
1828 self.offset += self._block(tarinfo.size)
1829 return tarinfo
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001830
1831 def proc_gnulong(self, tarinfo):
Thomas Wouters477c8d52006-05-27 19:21:47 +00001832 """Process the blocks that hold a GNU longname
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001833 or longlink member.
1834 """
1835 buf = ""
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001836 count = tarinfo.size
1837 while count > 0:
1838 block = self.fileobj.read(BLOCKSIZE)
1839 buf += block
1840 self.offset += BLOCKSIZE
1841 count -= BLOCKSIZE
1842
Thomas Wouters477c8d52006-05-27 19:21:47 +00001843 # Fetch the next header and process it.
1844 b = self.fileobj.read(BLOCKSIZE)
1845 t = TarInfo.frombuf(b)
1846 t.offset = self.offset
1847 self.offset += BLOCKSIZE
1848 next = self.proc_member(t)
Andrew M. Kuchling864bba12004-07-10 22:02:11 +00001849
Thomas Wouters477c8d52006-05-27 19:21:47 +00001850 # Patch the TarInfo object from the next header with
1851 # the longname information.
Andrew M. Kuchling864bba12004-07-10 22:02:11 +00001852 next.offset = tarinfo.offset
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001853 if tarinfo.type == GNUTYPE_LONGNAME:
Thomas Wouters477c8d52006-05-27 19:21:47 +00001854 next.name = buf.rstrip(NUL)
Andrew M. Kuchling864bba12004-07-10 22:02:11 +00001855 elif tarinfo.type == GNUTYPE_LONGLINK:
Thomas Wouters477c8d52006-05-27 19:21:47 +00001856 next.linkname = buf.rstrip(NUL)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001857
Andrew M. Kuchling864bba12004-07-10 22:02:11 +00001858 return next
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001859
1860 def proc_sparse(self, tarinfo):
Thomas Wouters477c8d52006-05-27 19:21:47 +00001861 """Process a GNU sparse header plus extra headers.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001862 """
Thomas Wouters477c8d52006-05-27 19:21:47 +00001863 buf = tarinfo.buf
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001864 sp = _ringbuffer()
1865 pos = 386
1866 lastpos = 0L
1867 realpos = 0L
1868 # There are 4 possible sparse structs in the
1869 # first header.
1870 for i in xrange(4):
1871 try:
Thomas Wouters477c8d52006-05-27 19:21:47 +00001872 offset = nti(buf[pos:pos + 12])
1873 numbytes = nti(buf[pos + 12:pos + 24])
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001874 except ValueError:
1875 break
1876 if offset > lastpos:
1877 sp.append(_hole(lastpos, offset - lastpos))
1878 sp.append(_data(offset, numbytes, realpos))
1879 realpos += numbytes
1880 lastpos = offset + numbytes
1881 pos += 24
1882
1883 isextended = ord(buf[482])
Thomas Wouters477c8d52006-05-27 19:21:47 +00001884 origsize = nti(buf[483:495])
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001885
1886 # If the isextended flag is given,
1887 # there are extra headers to process.
1888 while isextended == 1:
1889 buf = self.fileobj.read(BLOCKSIZE)
1890 self.offset += BLOCKSIZE
1891 pos = 0
1892 for i in xrange(21):
1893 try:
Thomas Wouters477c8d52006-05-27 19:21:47 +00001894 offset = nti(buf[pos:pos + 12])
1895 numbytes = nti(buf[pos + 12:pos + 24])
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001896 except ValueError:
1897 break
1898 if offset > lastpos:
1899 sp.append(_hole(lastpos, offset - lastpos))
1900 sp.append(_data(offset, numbytes, realpos))
1901 realpos += numbytes
1902 lastpos = offset + numbytes
1903 pos += 24
1904 isextended = ord(buf[504])
1905
1906 if lastpos < origsize:
1907 sp.append(_hole(lastpos, origsize - lastpos))
1908
1909 tarinfo.sparse = sp
1910
1911 tarinfo.offset_data = self.offset
1912 self.offset += self._block(tarinfo.size)
1913 tarinfo.size = origsize
Andrew M. Kuchling864bba12004-07-10 22:02:11 +00001914
Thomas Wouters477c8d52006-05-27 19:21:47 +00001915 # Clear the prefix field so that it is not used
1916 # as a pathname in next().
1917 tarinfo.prefix = ""
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001918
Thomas Wouters477c8d52006-05-27 19:21:47 +00001919 return tarinfo
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001920
1921 #--------------------------------------------------------------------------
1922 # Little helper methods:
1923
1924 def _block(self, count):
1925 """Round up a byte count by BLOCKSIZE and return it,
1926 e.g. _block(834) => 1024.
1927 """
1928 blocks, remainder = divmod(count, BLOCKSIZE)
1929 if remainder:
1930 blocks += 1
1931 return blocks * BLOCKSIZE
1932
1933 def _getmember(self, name, tarinfo=None):
1934 """Find an archive member by name from bottom to top.
1935 If tarinfo is given, it is used as the starting point.
1936 """
Martin v. Löwisf3c56112004-09-18 09:08:52 +00001937 # Ensure that all members have been loaded.
1938 members = self.getmembers()
1939
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001940 if tarinfo is None:
Martin v. Löwisf3c56112004-09-18 09:08:52 +00001941 end = len(members)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001942 else:
Martin v. Löwisf3c56112004-09-18 09:08:52 +00001943 end = members.index(tarinfo)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001944
1945 for i in xrange(end - 1, -1, -1):
Martin v. Löwisf3c56112004-09-18 09:08:52 +00001946 if name == members[i].name:
1947 return members[i]
Andrew M. Kuchling864bba12004-07-10 22:02:11 +00001948
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001949 def _load(self):
1950 """Read through the entire archive file and look for readable
1951 members.
1952 """
1953 while True:
1954 tarinfo = self.next()
1955 if tarinfo is None:
1956 break
1957 self._loaded = True
1958
1959 def _check(self, mode=None):
1960 """Check if TarFile is still open, and if the operation's mode
1961 corresponds to TarFile's mode.
1962 """
1963 if self.closed:
Thomas Wouters477c8d52006-05-27 19:21:47 +00001964 raise IOError("%s is closed" % self.__class__.__name__)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001965 if mode is not None and self._mode not in mode:
Thomas Wouters477c8d52006-05-27 19:21:47 +00001966 raise IOError("bad operation for mode %r" % self._mode)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001967
1968 def __iter__(self):
1969 """Provide an iterator object.
1970 """
1971 if self._loaded:
1972 return iter(self.members)
1973 else:
1974 return TarIter(self)
1975
1976 def _create_gnulong(self, name, type):
1977 """Write a GNU longname/longlink member to the TarFile.
1978 It consists of an extended tar header, with the length
1979 of the longname as size, followed by data blocks,
1980 which contain the longname as a null terminated string.
1981 """
Neal Norwitz0662f8a2004-07-20 21:54:18 +00001982 name += NUL
1983
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001984 tarinfo = TarInfo()
1985 tarinfo.name = "././@LongLink"
1986 tarinfo.type = type
1987 tarinfo.mode = 0
1988 tarinfo.size = len(name)
1989
1990 # write extended header
1991 self.fileobj.write(tarinfo.tobuf())
Neal Norwitz0662f8a2004-07-20 21:54:18 +00001992 self.offset += BLOCKSIZE
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001993 # write name blocks
1994 self.fileobj.write(name)
1995 blocks, remainder = divmod(tarinfo.size, BLOCKSIZE)
1996 if remainder > 0:
1997 self.fileobj.write(NUL * (BLOCKSIZE - remainder))
1998 blocks += 1
1999 self.offset += blocks * BLOCKSIZE
2000
2001 def _dbg(self, level, msg):
2002 """Write debugging output to sys.stderr.
2003 """
2004 if level <= self.debug:
2005 print >> sys.stderr, msg
2006# class TarFile
2007
2008class TarIter:
2009 """Iterator Class.
2010
2011 for tarinfo in TarFile(...):
2012 suite...
2013 """
2014
2015 def __init__(self, tarfile):
2016 """Construct a TarIter object.
2017 """
2018 self.tarfile = tarfile
Martin v. Löwis637431b2005-03-03 23:12:42 +00002019 self.index = 0
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002020 def __iter__(self):
2021 """Return iterator object.
2022 """
2023 return self
2024 def next(self):
2025 """Return the next item using TarFile's next() method.
2026 When all members have been read, set TarFile as _loaded.
2027 """
Martin v. Löwis637431b2005-03-03 23:12:42 +00002028 # Fix for SF #1100429: Under rare circumstances it can
2029 # happen that getmembers() is called during iteration,
2030 # which will cause TarIter to stop prematurely.
2031 if not self.tarfile._loaded:
2032 tarinfo = self.tarfile.next()
2033 if not tarinfo:
2034 self.tarfile._loaded = True
2035 raise StopIteration
2036 else:
2037 try:
2038 tarinfo = self.tarfile.members[self.index]
2039 except IndexError:
2040 raise StopIteration
2041 self.index += 1
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002042 return tarinfo
2043
2044# Helper classes for sparse file support
2045class _section:
2046 """Base class for _data and _hole.
2047 """
2048 def __init__(self, offset, size):
2049 self.offset = offset
2050 self.size = size
2051 def __contains__(self, offset):
2052 return self.offset <= offset < self.offset + self.size
2053
2054class _data(_section):
2055 """Represent a data section in a sparse file.
2056 """
2057 def __init__(self, offset, size, realpos):
2058 _section.__init__(self, offset, size)
2059 self.realpos = realpos
2060
2061class _hole(_section):
2062 """Represent a hole section in a sparse file.
2063 """
2064 pass
2065
2066class _ringbuffer(list):
2067 """Ringbuffer class which increases performance
2068 over a regular list.
2069 """
2070 def __init__(self):
2071 self.idx = 0
2072 def find(self, offset):
2073 idx = self.idx
2074 while True:
2075 item = self[idx]
2076 if offset in item:
2077 break
2078 idx += 1
2079 if idx == len(self):
2080 idx = 0
2081 if idx == self.idx:
2082 # End of File
2083 return None
2084 self.idx = idx
2085 return item
2086
2087#---------------------------------------------
2088# zipfile compatible TarFile class
2089#---------------------------------------------
2090TAR_PLAIN = 0 # zipfile.ZIP_STORED
2091TAR_GZIPPED = 8 # zipfile.ZIP_DEFLATED
2092class TarFileCompat:
2093 """TarFile class compatible with standard module zipfile's
2094 ZipFile class.
2095 """
2096 def __init__(self, file, mode="r", compression=TAR_PLAIN):
2097 if compression == TAR_PLAIN:
2098 self.tarfile = TarFile.taropen(file, mode)
2099 elif compression == TAR_GZIPPED:
2100 self.tarfile = TarFile.gzopen(file, mode)
2101 else:
Thomas Wouters477c8d52006-05-27 19:21:47 +00002102 raise ValueError("unknown compression constant")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002103 if mode[0:1] == "r":
2104 members = self.tarfile.getmembers()
Raymond Hettingera1d09e22005-09-11 16:34:05 +00002105 for m in members:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002106 m.filename = m.name
2107 m.file_size = m.size
2108 m.date_time = time.gmtime(m.mtime)[:6]
2109 def namelist(self):
2110 return map(lambda m: m.name, self.infolist())
2111 def infolist(self):
2112 return filter(lambda m: m.type in REGULAR_TYPES,
2113 self.tarfile.getmembers())
2114 def printdir(self):
2115 self.tarfile.list()
2116 def testzip(self):
2117 return
2118 def getinfo(self, name):
2119 return self.tarfile.getmember(name)
2120 def read(self, name):
2121 return self.tarfile.extractfile(self.tarfile.getmember(name)).read()
2122 def write(self, filename, arcname=None, compress_type=None):
2123 self.tarfile.add(filename, arcname)
2124 def writestr(self, zinfo, bytes):
Raymond Hettingera6172712004-12-31 19:15:26 +00002125 try:
2126 from cStringIO import StringIO
2127 except ImportError:
2128 from StringIO import StringIO
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002129 import calendar
2130 zinfo.name = zinfo.filename
2131 zinfo.size = zinfo.file_size
2132 zinfo.mtime = calendar.timegm(zinfo.date_time)
Raymond Hettingera6172712004-12-31 19:15:26 +00002133 self.tarfile.addfile(zinfo, StringIO(bytes))
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002134 def close(self):
2135 self.tarfile.close()
2136#class TarFileCompat
2137
2138#--------------------
2139# exported functions
2140#--------------------
2141def is_tarfile(name):
2142 """Return True if name points to a tar archive that we
2143 are able to handle, else return False.
2144 """
2145 try:
2146 t = open(name)
2147 t.close()
2148 return True
2149 except TarError:
2150 return False
2151
2152open = TarFile.open