blob: 061d0f55b9521f081e1233ff1aa89905ab1679f8 [file] [log] [blame]
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001#!/usr/bin/env python
2# -*- coding: iso-8859-1 -*-
3#-------------------------------------------------------------------
4# tarfile.py
5#-------------------------------------------------------------------
6# Copyright (C) 2002 Lars Gustäbel <lars@gustaebel.de>
7# All rights reserved.
8#
9# Permission is hereby granted, free of charge, to any person
10# obtaining a copy of this software and associated documentation
11# files (the "Software"), to deal in the Software without
12# restriction, including without limitation the rights to use,
13# copy, modify, merge, publish, distribute, sublicense, and/or sell
14# copies of the Software, and to permit persons to whom the
15# Software is furnished to do so, subject to the following
16# conditions:
17#
18# The above copyright notice and this permission notice shall be
19# included in all copies or substantial portions of the Software.
20#
21# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
22# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
23# OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
24# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
25# HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
26# WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
27# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
28# OTHER DEALINGS IN THE SOFTWARE.
29#
30"""Read from and write to tar format archives.
31"""
32
33__version__ = "$Revision$"
34# $Source$
35
Thomas Wouters477c8d52006-05-27 19:21:47 +000036version = "0.8.0"
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +000037__author__ = "Lars Gustäbel (lars@gustaebel.de)"
38__date__ = "$Date$"
39__cvsid__ = "$Id$"
40__credits__ = "Gustavo Niemeyer, Niels Gustäbel, Richard Townsend."
41
42#---------
43# Imports
44#---------
45import sys
46import os
47import shutil
48import stat
49import errno
50import time
51import struct
52
Jack Jansencfc49022003-03-07 13:37:32 +000053if sys.platform == 'mac':
54 # This module needs work for MacOS9, especially in the area of pathname
55 # handling. In many places it is assumed a simple substitution of / by the
56 # local os.path.sep is good enough to convert pathnames, but this does not
57 # work with the mac rooted:path:name versus :nonrooted:path:name syntax
58 raise ImportError, "tarfile does not work for platform==mac"
59
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +000060try:
61 import grp, pwd
62except ImportError:
63 grp = pwd = None
64
65# from tarfile import *
66__all__ = ["TarFile", "TarInfo", "is_tarfile", "TarError"]
67
68#---------------------------------------------------------
69# tar constants
70#---------------------------------------------------------
71NUL = "\0" # the null character
72BLOCKSIZE = 512 # length of processing blocks
73RECORDSIZE = BLOCKSIZE * 20 # length of records
74MAGIC = "ustar" # magic tar string
75VERSION = "00" # version number
76
77LENGTH_NAME = 100 # maximum length of a filename
78LENGTH_LINK = 100 # maximum length of a linkname
79LENGTH_PREFIX = 155 # maximum length of the prefix field
80MAXSIZE_MEMBER = 077777777777L # maximum size of a file (11 octal digits)
81
82REGTYPE = "0" # regular file
83AREGTYPE = "\0" # regular file
84LNKTYPE = "1" # link (inside tarfile)
85SYMTYPE = "2" # symbolic link
86CHRTYPE = "3" # character special device
87BLKTYPE = "4" # block special device
88DIRTYPE = "5" # directory
89FIFOTYPE = "6" # fifo special device
90CONTTYPE = "7" # contiguous file
91
92GNUTYPE_LONGNAME = "L" # GNU tar extension for longnames
93GNUTYPE_LONGLINK = "K" # GNU tar extension for longlink
94GNUTYPE_SPARSE = "S" # GNU tar extension for sparse file
95
96#---------------------------------------------------------
97# tarfile constants
98#---------------------------------------------------------
99SUPPORTED_TYPES = (REGTYPE, AREGTYPE, LNKTYPE, # file types that tarfile
100 SYMTYPE, DIRTYPE, FIFOTYPE, # can cope with.
101 CONTTYPE, CHRTYPE, BLKTYPE,
102 GNUTYPE_LONGNAME, GNUTYPE_LONGLINK,
103 GNUTYPE_SPARSE)
104
105REGULAR_TYPES = (REGTYPE, AREGTYPE, # file types that somehow
106 CONTTYPE, GNUTYPE_SPARSE) # represent regular files
107
108#---------------------------------------------------------
109# Bits used in the mode field, values in octal.
110#---------------------------------------------------------
111S_IFLNK = 0120000 # symbolic link
112S_IFREG = 0100000 # regular file
113S_IFBLK = 0060000 # block device
114S_IFDIR = 0040000 # directory
115S_IFCHR = 0020000 # character device
116S_IFIFO = 0010000 # fifo
117
118TSUID = 04000 # set UID on execution
119TSGID = 02000 # set GID on execution
120TSVTX = 01000 # reserved
121
122TUREAD = 0400 # read by owner
123TUWRITE = 0200 # write by owner
124TUEXEC = 0100 # execute/search by owner
125TGREAD = 0040 # read by group
126TGWRITE = 0020 # write by group
127TGEXEC = 0010 # execute/search by group
128TOREAD = 0004 # read by other
129TOWRITE = 0002 # write by other
130TOEXEC = 0001 # execute/search by other
131
132#---------------------------------------------------------
133# Some useful functions
134#---------------------------------------------------------
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000135
Thomas Wouters477c8d52006-05-27 19:21:47 +0000136def stn(s, length):
137 """Convert a python string to a null-terminated string buffer.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000138 """
Thomas Wouters477c8d52006-05-27 19:21:47 +0000139 return s[:length-1] + (length - len(s) - 1) * NUL + NUL
140
141def nti(s):
142 """Convert a number field to a python number.
143 """
144 # There are two possible encodings for a number field, see
145 # itn() below.
146 if s[0] != chr(0200):
147 n = int(s.rstrip(NUL) or "0", 8)
148 else:
149 n = 0L
150 for i in xrange(len(s) - 1):
151 n <<= 8
152 n += ord(s[i + 1])
153 return n
154
155def itn(n, digits=8, posix=False):
156 """Convert a python number to a number field.
157 """
158 # POSIX 1003.1-1988 requires numbers to be encoded as a string of
159 # octal digits followed by a null-byte, this allows values up to
160 # (8**(digits-1))-1. GNU tar allows storing numbers greater than
161 # that if necessary. A leading 0200 byte indicates this particular
162 # encoding, the following digits-1 bytes are a big-endian
163 # representation. This allows values up to (256**(digits-1))-1.
164 if 0 <= n < 8 ** (digits - 1):
165 s = "%0*o" % (digits - 1, n) + NUL
166 else:
167 if posix:
168 raise ValueError("overflow in number field")
169
170 if n < 0:
171 # XXX We mimic GNU tar's behaviour with negative numbers,
172 # this could raise OverflowError.
173 n = struct.unpack("L", struct.pack("l", n))[0]
174
175 s = ""
176 for i in xrange(digits - 1):
177 s = chr(n & 0377) + s
178 n >>= 8
179 s = chr(0200) + s
180 return s
181
182def calc_chksums(buf):
183 """Calculate the checksum for a member's header by summing up all
184 characters except for the chksum field which is treated as if
185 it was filled with spaces. According to the GNU tar sources,
186 some tars (Sun and NeXT) calculate chksum with signed char,
187 which will be different if there are chars in the buffer with
188 the high bit set. So we calculate two checksums, unsigned and
189 signed.
190 """
191 unsigned_chksum = 256 + sum(struct.unpack("148B", buf[:148]) + struct.unpack("356B", buf[156:512]))
192 signed_chksum = 256 + sum(struct.unpack("148b", buf[:148]) + struct.unpack("356b", buf[156:512]))
193 return unsigned_chksum, signed_chksum
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000194
195def copyfileobj(src, dst, length=None):
196 """Copy length bytes from fileobj src to fileobj dst.
197 If length is None, copy the entire content.
198 """
199 if length == 0:
200 return
201 if length is None:
202 shutil.copyfileobj(src, dst)
203 return
204
205 BUFSIZE = 16 * 1024
206 blocks, remainder = divmod(length, BUFSIZE)
207 for b in xrange(blocks):
208 buf = src.read(BUFSIZE)
209 if len(buf) < BUFSIZE:
Thomas Wouters477c8d52006-05-27 19:21:47 +0000210 raise IOError("end of file reached")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000211 dst.write(buf)
212
213 if remainder != 0:
214 buf = src.read(remainder)
215 if len(buf) < remainder:
Thomas Wouters477c8d52006-05-27 19:21:47 +0000216 raise IOError("end of file reached")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000217 dst.write(buf)
218 return
219
220filemode_table = (
Andrew M. Kuchling8bc462f2004-10-20 11:48:42 +0000221 ((S_IFLNK, "l"),
222 (S_IFREG, "-"),
223 (S_IFBLK, "b"),
224 (S_IFDIR, "d"),
225 (S_IFCHR, "c"),
226 (S_IFIFO, "p")),
227
228 ((TUREAD, "r"),),
229 ((TUWRITE, "w"),),
230 ((TUEXEC|TSUID, "s"),
231 (TSUID, "S"),
232 (TUEXEC, "x")),
233
234 ((TGREAD, "r"),),
235 ((TGWRITE, "w"),),
236 ((TGEXEC|TSGID, "s"),
237 (TSGID, "S"),
238 (TGEXEC, "x")),
239
240 ((TOREAD, "r"),),
241 ((TOWRITE, "w"),),
242 ((TOEXEC|TSVTX, "t"),
243 (TSVTX, "T"),
244 (TOEXEC, "x"))
245)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000246
247def filemode(mode):
248 """Convert a file's mode to a string of the form
249 -rwxrwxrwx.
250 Used by TarFile.list()
251 """
Andrew M. Kuchling8bc462f2004-10-20 11:48:42 +0000252 perm = []
253 for table in filemode_table:
254 for bit, char in table:
255 if mode & bit == bit:
256 perm.append(char)
257 break
258 else:
259 perm.append("-")
260 return "".join(perm)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000261
262if os.sep != "/":
263 normpath = lambda path: os.path.normpath(path).replace(os.sep, "/")
264else:
265 normpath = os.path.normpath
266
267class TarError(Exception):
268 """Base exception."""
269 pass
270class ExtractError(TarError):
271 """General exception for extract errors."""
272 pass
273class ReadError(TarError):
274 """Exception for unreadble tar archives."""
275 pass
276class CompressionError(TarError):
277 """Exception for unavailable compression methods."""
278 pass
279class StreamError(TarError):
280 """Exception for unsupported operations on stream-like TarFiles."""
281 pass
282
283#---------------------------
284# internal stream interface
285#---------------------------
286class _LowLevelFile:
287 """Low-level file object. Supports reading and writing.
288 It is used instead of a regular file object for streaming
289 access.
290 """
291
292 def __init__(self, name, mode):
293 mode = {
294 "r": os.O_RDONLY,
295 "w": os.O_WRONLY | os.O_CREAT | os.O_TRUNC,
296 }[mode]
297 if hasattr(os, "O_BINARY"):
298 mode |= os.O_BINARY
299 self.fd = os.open(name, mode)
300
301 def close(self):
302 os.close(self.fd)
303
304 def read(self, size):
305 return os.read(self.fd, size)
306
307 def write(self, s):
308 os.write(self.fd, s)
309
310class _Stream:
311 """Class that serves as an adapter between TarFile and
312 a stream-like object. The stream-like object only
313 needs to have a read() or write() method and is accessed
314 blockwise. Use of gzip or bzip2 compression is possible.
315 A stream-like object could be for example: sys.stdin,
316 sys.stdout, a socket, a tape device etc.
317
318 _Stream is intended to be used only internally.
319 """
320
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000321 def __init__(self, name, mode, comptype, fileobj, bufsize):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000322 """Construct a _Stream object.
323 """
324 self._extfileobj = True
325 if fileobj is None:
326 fileobj = _LowLevelFile(name, mode)
327 self._extfileobj = False
328
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000329 if comptype == '*':
330 # Enable transparent compression detection for the
331 # stream interface
332 fileobj = _StreamProxy(fileobj)
333 comptype = fileobj.getcomptype()
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000334
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000335 self.name = name or ""
336 self.mode = mode
337 self.comptype = comptype
338 self.fileobj = fileobj
339 self.bufsize = bufsize
340 self.buf = ""
341 self.pos = 0L
342 self.closed = False
343
344 if comptype == "gz":
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000345 try:
346 import zlib
347 except ImportError:
Thomas Wouters477c8d52006-05-27 19:21:47 +0000348 raise CompressionError("zlib module is not available")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000349 self.zlib = zlib
350 self.crc = zlib.crc32("")
351 if mode == "r":
352 self._init_read_gz()
353 else:
354 self._init_write_gz()
355
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000356 if comptype == "bz2":
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000357 try:
358 import bz2
359 except ImportError:
Thomas Wouters477c8d52006-05-27 19:21:47 +0000360 raise CompressionError("bz2 module is not available")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000361 if mode == "r":
362 self.dbuf = ""
363 self.cmp = bz2.BZ2Decompressor()
364 else:
365 self.cmp = bz2.BZ2Compressor()
366
367 def __del__(self):
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000368 if hasattr(self, "closed") and not self.closed:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000369 self.close()
370
371 def _init_write_gz(self):
372 """Initialize for writing with gzip compression.
373 """
374 self.cmp = self.zlib.compressobj(9, self.zlib.DEFLATED,
375 -self.zlib.MAX_WBITS,
376 self.zlib.DEF_MEM_LEVEL,
377 0)
378 timestamp = struct.pack("<L", long(time.time()))
379 self.__write("\037\213\010\010%s\002\377" % timestamp)
380 if self.name.endswith(".gz"):
381 self.name = self.name[:-3]
382 self.__write(self.name + NUL)
383
384 def write(self, s):
385 """Write string s to the stream.
386 """
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000387 if self.comptype == "gz":
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000388 self.crc = self.zlib.crc32(s, self.crc)
389 self.pos += len(s)
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000390 if self.comptype != "tar":
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000391 s = self.cmp.compress(s)
392 self.__write(s)
393
394 def __write(self, s):
395 """Write string s to the stream if a whole new block
396 is ready to be written.
397 """
398 self.buf += s
399 while len(self.buf) > self.bufsize:
400 self.fileobj.write(self.buf[:self.bufsize])
401 self.buf = self.buf[self.bufsize:]
402
403 def close(self):
404 """Close the _Stream object. No operation should be
405 done on it afterwards.
406 """
407 if self.closed:
408 return
409
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000410 if self.mode == "w" and self.comptype != "tar":
Martin v. Löwisc234a522004-08-22 21:28:33 +0000411 self.buf += self.cmp.flush()
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000412
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000413 if self.mode == "w" and self.buf:
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000414 blocks, remainder = divmod(len(self.buf), self.bufsize)
415 if remainder > 0:
416 self.buf += NUL * (self.bufsize - remainder)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000417 self.fileobj.write(self.buf)
418 self.buf = ""
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000419 if self.comptype == "gz":
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000420 self.fileobj.write(struct.pack("<l", self.crc))
Andrew M. Kuchling10a44492003-10-24 17:38:34 +0000421 self.fileobj.write(struct.pack("<L", self.pos & 0xffffFFFFL))
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000422
423 if not self._extfileobj:
424 self.fileobj.close()
425
426 self.closed = True
427
428 def _init_read_gz(self):
429 """Initialize for reading a gzip compressed fileobj.
430 """
431 self.cmp = self.zlib.decompressobj(-self.zlib.MAX_WBITS)
432 self.dbuf = ""
433
434 # taken from gzip.GzipFile with some alterations
435 if self.__read(2) != "\037\213":
Thomas Wouters477c8d52006-05-27 19:21:47 +0000436 raise ReadError("not a gzip file")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000437 if self.__read(1) != "\010":
Thomas Wouters477c8d52006-05-27 19:21:47 +0000438 raise CompressionError("unsupported compression method")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000439
440 flag = ord(self.__read(1))
441 self.__read(6)
442
443 if flag & 4:
444 xlen = ord(self.__read(1)) + 256 * ord(self.__read(1))
445 self.read(xlen)
446 if flag & 8:
447 while True:
448 s = self.__read(1)
449 if not s or s == NUL:
450 break
451 if flag & 16:
452 while True:
453 s = self.__read(1)
454 if not s or s == NUL:
455 break
456 if flag & 2:
457 self.__read(2)
458
459 def tell(self):
460 """Return the stream's file pointer position.
461 """
462 return self.pos
463
464 def seek(self, pos=0):
465 """Set the stream's file pointer to pos. Negative seeking
466 is forbidden.
467 """
468 if pos - self.pos >= 0:
469 blocks, remainder = divmod(pos - self.pos, self.bufsize)
470 for i in xrange(blocks):
471 self.read(self.bufsize)
472 self.read(remainder)
473 else:
Thomas Wouters477c8d52006-05-27 19:21:47 +0000474 raise StreamError("seeking backwards is not allowed")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000475 return self.pos
476
477 def read(self, size=None):
478 """Return the next size number of bytes from the stream.
479 If size is not defined, return all bytes of the stream
480 up to EOF.
481 """
482 if size is None:
483 t = []
484 while True:
485 buf = self._read(self.bufsize)
486 if not buf:
487 break
488 t.append(buf)
489 buf = "".join(t)
490 else:
491 buf = self._read(size)
492 self.pos += len(buf)
493 return buf
494
495 def _read(self, size):
496 """Return size bytes from the stream.
497 """
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000498 if self.comptype == "tar":
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000499 return self.__read(size)
500
501 c = len(self.dbuf)
502 t = [self.dbuf]
503 while c < size:
504 buf = self.__read(self.bufsize)
505 if not buf:
506 break
507 buf = self.cmp.decompress(buf)
508 t.append(buf)
509 c += len(buf)
510 t = "".join(t)
511 self.dbuf = t[size:]
512 return t[:size]
513
514 def __read(self, size):
515 """Return size bytes from stream. If internal buffer is empty,
516 read another block from the stream.
517 """
518 c = len(self.buf)
519 t = [self.buf]
520 while c < size:
521 buf = self.fileobj.read(self.bufsize)
522 if not buf:
523 break
524 t.append(buf)
525 c += len(buf)
526 t = "".join(t)
527 self.buf = t[size:]
528 return t[:size]
529# class _Stream
530
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000531class _StreamProxy(object):
532 """Small proxy class that enables transparent compression
533 detection for the Stream interface (mode 'r|*').
534 """
535
536 def __init__(self, fileobj):
537 self.fileobj = fileobj
538 self.buf = self.fileobj.read(BLOCKSIZE)
539
540 def read(self, size):
541 self.read = self.fileobj.read
542 return self.buf
543
544 def getcomptype(self):
545 if self.buf.startswith("\037\213\010"):
546 return "gz"
547 if self.buf.startswith("BZh91"):
548 return "bz2"
549 return "tar"
550
551 def close(self):
552 self.fileobj.close()
553# class StreamProxy
554
Thomas Wouters477c8d52006-05-27 19:21:47 +0000555class _BZ2Proxy(object):
556 """Small proxy class that enables external file object
557 support for "r:bz2" and "w:bz2" modes. This is actually
558 a workaround for a limitation in bz2 module's BZ2File
559 class which (unlike gzip.GzipFile) has no support for
560 a file object argument.
561 """
562
563 blocksize = 16 * 1024
564
565 def __init__(self, fileobj, mode):
566 self.fileobj = fileobj
567 self.mode = mode
568 self.init()
569
570 def init(self):
571 import bz2
572 self.pos = 0
573 if self.mode == "r":
574 self.bz2obj = bz2.BZ2Decompressor()
575 self.fileobj.seek(0)
576 self.buf = ""
577 else:
578 self.bz2obj = bz2.BZ2Compressor()
579
580 def read(self, size):
581 b = [self.buf]
582 x = len(self.buf)
583 while x < size:
584 try:
585 raw = self.fileobj.read(self.blocksize)
586 data = self.bz2obj.decompress(raw)
587 b.append(data)
588 except EOFError:
589 break
590 x += len(data)
591 self.buf = "".join(b)
592
593 buf = self.buf[:size]
594 self.buf = self.buf[size:]
595 self.pos += len(buf)
596 return buf
597
598 def seek(self, pos):
599 if pos < self.pos:
600 self.init()
601 self.read(pos - self.pos)
602
603 def tell(self):
604 return self.pos
605
606 def write(self, data):
607 self.pos += len(data)
608 raw = self.bz2obj.compress(data)
609 self.fileobj.write(raw)
610
611 def close(self):
612 if self.mode == "w":
613 raw = self.bz2obj.flush()
614 self.fileobj.write(raw)
615 self.fileobj.close()
616# class _BZ2Proxy
617
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000618#------------------------
619# Extraction file object
620#------------------------
621class ExFileObject(object):
622 """File-like object for reading an archive member.
623 Is returned by TarFile.extractfile(). Support for
624 sparse files included.
625 """
626
627 def __init__(self, tarfile, tarinfo):
628 self.fileobj = tarfile.fileobj
629 self.name = tarinfo.name
630 self.mode = "r"
631 self.closed = False
632 self.offset = tarinfo.offset_data
633 self.size = tarinfo.size
634 self.pos = 0L
635 self.linebuffer = ""
636 if tarinfo.issparse():
637 self.sparse = tarinfo.sparse
638 self.read = self._readsparse
639 else:
640 self.read = self._readnormal
641
642 def __read(self, size):
643 """Overloadable read method.
644 """
645 return self.fileobj.read(size)
646
647 def readline(self, size=-1):
648 """Read a line with approx. size. If size is negative,
649 read a whole line. readline() and read() must not
650 be mixed up (!).
651 """
652 if size < 0:
653 size = sys.maxint
654
655 nl = self.linebuffer.find("\n")
656 if nl >= 0:
657 nl = min(nl, size)
658 else:
659 size -= len(self.linebuffer)
Martin v. Löwisc11d6f12004-08-25 10:52:58 +0000660 while (nl < 0 and size > 0):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000661 buf = self.read(min(size, 100))
662 if not buf:
663 break
664 self.linebuffer += buf
665 size -= len(buf)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000666 nl = self.linebuffer.find("\n")
667 if nl == -1:
668 s = self.linebuffer
669 self.linebuffer = ""
670 return s
671 buf = self.linebuffer[:nl]
672 self.linebuffer = self.linebuffer[nl + 1:]
673 while buf[-1:] == "\r":
674 buf = buf[:-1]
675 return buf + "\n"
676
677 def readlines(self):
678 """Return a list with all (following) lines.
679 """
680 result = []
681 while True:
682 line = self.readline()
683 if not line: break
684 result.append(line)
685 return result
686
687 def _readnormal(self, size=None):
688 """Read operation for regular files.
689 """
690 if self.closed:
Thomas Wouters477c8d52006-05-27 19:21:47 +0000691 raise ValueError("file is closed")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000692 self.fileobj.seek(self.offset + self.pos)
693 bytesleft = self.size - self.pos
694 if size is None:
695 bytestoread = bytesleft
696 else:
697 bytestoread = min(size, bytesleft)
698 self.pos += bytestoread
699 return self.__read(bytestoread)
700
701 def _readsparse(self, size=None):
702 """Read operation for sparse files.
703 """
704 if self.closed:
Thomas Wouters477c8d52006-05-27 19:21:47 +0000705 raise ValueError("file is closed")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000706
707 if size is None:
708 size = self.size - self.pos
709
710 data = []
711 while size > 0:
712 buf = self._readsparsesection(size)
713 if not buf:
714 break
715 size -= len(buf)
716 data.append(buf)
717 return "".join(data)
718
719 def _readsparsesection(self, size):
720 """Read a single section of a sparse file.
721 """
722 section = self.sparse.find(self.pos)
723
724 if section is None:
725 return ""
726
727 toread = min(size, section.offset + section.size - self.pos)
728 if isinstance(section, _data):
729 realpos = section.realpos + self.pos - section.offset
730 self.pos += toread
731 self.fileobj.seek(self.offset + realpos)
732 return self.__read(toread)
733 else:
734 self.pos += toread
735 return NUL * toread
736
737 def tell(self):
738 """Return the current file position.
739 """
740 return self.pos
741
742 def seek(self, pos, whence=0):
743 """Seek to a position in the file.
744 """
745 self.linebuffer = ""
746 if whence == 0:
747 self.pos = min(max(pos, 0), self.size)
748 if whence == 1:
749 if pos < 0:
750 self.pos = max(self.pos + pos, 0)
751 else:
752 self.pos = min(self.pos + pos, self.size)
753 if whence == 2:
754 self.pos = max(min(self.size + pos, self.size), 0)
755
756 def close(self):
757 """Close the file object.
758 """
759 self.closed = True
Martin v. Löwisdf241532005-03-03 08:17:42 +0000760
761 def __iter__(self):
762 """Get an iterator over the file object.
763 """
764 if self.closed:
765 raise ValueError("I/O operation on closed file")
766 return self
767
768 def next(self):
769 """Get the next item from the file iterator.
770 """
771 result = self.readline()
772 if not result:
773 raise StopIteration
774 return result
Tim Peterseba28be2005-03-28 01:08:02 +0000775
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000776#class ExFileObject
777
778#------------------
779# Exported Classes
780#------------------
781class TarInfo(object):
782 """Informational class which holds the details about an
783 archive member given by a tar header block.
784 TarInfo objects are returned by TarFile.getmember(),
785 TarFile.getmembers() and TarFile.gettarinfo() and are
786 usually created internally.
787 """
788
789 def __init__(self, name=""):
790 """Construct a TarInfo object. name is the optional name
791 of the member.
792 """
793
Thomas Wouters477c8d52006-05-27 19:21:47 +0000794 self.name = name # member name (dirnames must end with '/')
795 self.mode = 0666 # file permissions
796 self.uid = 0 # user id
797 self.gid = 0 # group id
798 self.size = 0 # file size
799 self.mtime = 0 # modification time
800 self.chksum = 0 # header checksum
801 self.type = REGTYPE # member type
802 self.linkname = "" # link name
803 self.uname = "user" # user name
804 self.gname = "group" # group name
805 self.devmajor = 0 # device major number
806 self.devminor = 0 # device minor number
807 self.prefix = "" # prefix to filename or information
808 # about sparse files
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000809
Thomas Wouters477c8d52006-05-27 19:21:47 +0000810 self.offset = 0 # the tar header starts here
811 self.offset_data = 0 # the file's data starts here
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000812
813 def __repr__(self):
814 return "<%s %r at %#x>" % (self.__class__.__name__,self.name,id(self))
815
Guido van Rossum75b64e62005-01-16 00:16:11 +0000816 @classmethod
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000817 def frombuf(cls, buf):
818 """Construct a TarInfo object from a 512 byte string buffer.
819 """
Thomas Wouters477c8d52006-05-27 19:21:47 +0000820 if len(buf) != BLOCKSIZE:
821 raise ValueError("truncated header")
822 if buf.count(NUL) == BLOCKSIZE:
823 raise ValueError("empty header")
824
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000825 tarinfo = cls()
Thomas Wouters477c8d52006-05-27 19:21:47 +0000826 tarinfo.buf = buf
827 tarinfo.name = buf[0:100].rstrip(NUL)
828 tarinfo.mode = nti(buf[100:108])
829 tarinfo.uid = nti(buf[108:116])
830 tarinfo.gid = nti(buf[116:124])
831 tarinfo.size = nti(buf[124:136])
832 tarinfo.mtime = nti(buf[136:148])
833 tarinfo.chksum = nti(buf[148:156])
834 tarinfo.type = buf[156:157]
835 tarinfo.linkname = buf[157:257].rstrip(NUL)
836 tarinfo.uname = buf[265:297].rstrip(NUL)
837 tarinfo.gname = buf[297:329].rstrip(NUL)
838 tarinfo.devmajor = nti(buf[329:337])
839 tarinfo.devminor = nti(buf[337:345])
Jack Jansen2b4b5a52003-04-22 22:03:11 +0000840 tarinfo.prefix = buf[345:500]
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000841
Thomas Wouters477c8d52006-05-27 19:21:47 +0000842 if tarinfo.chksum not in calc_chksums(buf):
843 raise ValueError("invalid header")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000844 return tarinfo
845
Thomas Wouters477c8d52006-05-27 19:21:47 +0000846 def tobuf(self, posix=False):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000847 """Return a tar header block as a 512 byte string.
848 """
Thomas Wouters477c8d52006-05-27 19:21:47 +0000849 parts = [
850 stn(self.name, 100),
851 itn(self.mode & 07777, 8, posix),
852 itn(self.uid, 8, posix),
853 itn(self.gid, 8, posix),
854 itn(self.size, 12, posix),
855 itn(self.mtime, 12, posix),
856 " ", # checksum field
857 self.type,
858 stn(self.linkname, 100),
859 stn(MAGIC, 6),
860 stn(VERSION, 2),
861 stn(self.uname, 32),
862 stn(self.gname, 32),
863 itn(self.devmajor, 8, posix),
864 itn(self.devminor, 8, posix),
865 stn(self.prefix, 155)
866 ]
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000867
Thomas Wouters477c8d52006-05-27 19:21:47 +0000868 buf = struct.pack("%ds" % BLOCKSIZE, "".join(parts))
869 chksum = calc_chksums(buf)[0]
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000870 buf = buf[:148] + "%06o\0" % chksum + buf[155:]
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000871 self.buf = buf
872 return buf
873
874 def isreg(self):
875 return self.type in REGULAR_TYPES
876 def isfile(self):
877 return self.isreg()
878 def isdir(self):
879 return self.type == DIRTYPE
880 def issym(self):
881 return self.type == SYMTYPE
882 def islnk(self):
883 return self.type == LNKTYPE
884 def ischr(self):
885 return self.type == CHRTYPE
886 def isblk(self):
887 return self.type == BLKTYPE
888 def isfifo(self):
889 return self.type == FIFOTYPE
890 def issparse(self):
891 return self.type == GNUTYPE_SPARSE
892 def isdev(self):
893 return self.type in (CHRTYPE, BLKTYPE, FIFOTYPE)
894# class TarInfo
895
896class TarFile(object):
897 """The TarFile Class provides an interface to tar archives.
898 """
899
900 debug = 0 # May be set from 0 (no msgs) to 3 (all msgs)
901
902 dereference = False # If true, add content of linked file to the
903 # tar file, else the link.
904
905 ignore_zeros = False # If true, skips empty or invalid blocks and
906 # continues processing.
907
908 errorlevel = 0 # If 0, fatal errors only appear in debug
909 # messages (if debug >= 0). If > 0, errors
910 # are passed to the caller as exceptions.
911
Martin v. Löwis75b9da42004-08-18 13:57:44 +0000912 posix = False # If True, generates POSIX.1-1990-compliant
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000913 # archives (no GNU extensions!)
914
915 fileobject = ExFileObject
916
917 def __init__(self, name=None, mode="r", fileobj=None):
918 """Open an (uncompressed) tar archive `name'. `mode' is either 'r' to
919 read from an existing archive, 'a' to append data to an existing
920 file or 'w' to create a new file overwriting an existing one. `mode'
921 defaults to 'r'.
922 If `fileobj' is given, it is used for reading or writing data. If it
923 can be determined, `mode' is overridden by `fileobj's mode.
924 `fileobj' is not closed, when TarFile is closed.
925 """
Martin v. Löwisfaffa152005-08-24 06:43:09 +0000926 self.name = name
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000927
928 if len(mode) > 1 or mode not in "raw":
Thomas Wouters477c8d52006-05-27 19:21:47 +0000929 raise ValueError("mode must be 'r', 'a' or 'w'")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000930 self._mode = mode
931 self.mode = {"r": "rb", "a": "r+b", "w": "wb"}[mode]
932
933 if not fileobj:
934 fileobj = file(self.name, self.mode)
935 self._extfileobj = False
936 else:
937 if self.name is None and hasattr(fileobj, "name"):
Martin v. Löwisfaffa152005-08-24 06:43:09 +0000938 self.name = fileobj.name
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000939 if hasattr(fileobj, "mode"):
940 self.mode = fileobj.mode
941 self._extfileobj = True
942 self.fileobj = fileobj
943
944 # Init datastructures
Thomas Wouters477c8d52006-05-27 19:21:47 +0000945 self.closed = False
946 self.members = [] # list of members as TarInfo objects
947 self._loaded = False # flag if all members have been read
948 self.offset = 0L # current position in the archive file
949 self.inodes = {} # dictionary caching the inodes of
950 # archive members already added
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000951
952 if self._mode == "r":
953 self.firstmember = None
954 self.firstmember = self.next()
955
956 if self._mode == "a":
957 # Move to the end of the archive,
958 # before the first empty block.
959 self.firstmember = None
960 while True:
961 try:
962 tarinfo = self.next()
963 except ReadError:
964 self.fileobj.seek(0)
965 break
966 if tarinfo is None:
967 self.fileobj.seek(- BLOCKSIZE, 1)
968 break
969
970 if self._mode in "aw":
971 self._loaded = True
972
973 #--------------------------------------------------------------------------
974 # Below are the classmethods which act as alternate constructors to the
975 # TarFile class. The open() method is the only one that is needed for
976 # public use; it is the "super"-constructor and is able to select an
977 # adequate "sub"-constructor for a particular compression using the mapping
978 # from OPEN_METH.
979 #
980 # This concept allows one to subclass TarFile without losing the comfort of
981 # the super-constructor. A sub-constructor is registered and made available
982 # by adding it to the mapping in OPEN_METH.
983
Guido van Rossum75b64e62005-01-16 00:16:11 +0000984 @classmethod
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000985 def open(cls, name=None, mode="r", fileobj=None, bufsize=20*512):
986 """Open a tar archive for reading, writing or appending. Return
987 an appropriate TarFile class.
988
989 mode:
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000990 'r' or 'r:*' open for reading with transparent compression
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000991 'r:' open for reading exclusively uncompressed
992 'r:gz' open for reading with gzip compression
993 'r:bz2' open for reading with bzip2 compression
994 'a' or 'a:' open for appending
995 'w' or 'w:' open for writing without compression
996 'w:gz' open for writing with gzip compression
997 'w:bz2' open for writing with bzip2 compression
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000998
999 'r|*' open a stream of tar blocks with transparent compression
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001000 'r|' open an uncompressed stream of tar blocks for reading
1001 'r|gz' open a gzip compressed stream of tar blocks
1002 'r|bz2' open a bzip2 compressed stream of tar blocks
1003 'w|' open an uncompressed stream for writing
1004 'w|gz' open a gzip compressed stream for writing
1005 'w|bz2' open a bzip2 compressed stream for writing
1006 """
1007
1008 if not name and not fileobj:
Thomas Wouters477c8d52006-05-27 19:21:47 +00001009 raise ValueError("nothing to open")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001010
Martin v. Löwis78be7df2005-03-05 12:47:42 +00001011 if mode in ("r", "r:*"):
1012 # Find out which *open() is appropriate for opening the file.
1013 for comptype in cls.OPEN_METH:
1014 func = getattr(cls, cls.OPEN_METH[comptype])
1015 try:
1016 return func(name, "r", fileobj)
1017 except (ReadError, CompressionError):
1018 continue
Thomas Wouters477c8d52006-05-27 19:21:47 +00001019 raise ReadError("file could not be opened successfully")
Martin v. Löwis78be7df2005-03-05 12:47:42 +00001020
1021 elif ":" in mode:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001022 filemode, comptype = mode.split(":", 1)
1023 filemode = filemode or "r"
1024 comptype = comptype or "tar"
1025
1026 # Select the *open() function according to
1027 # given compression.
1028 if comptype in cls.OPEN_METH:
1029 func = getattr(cls, cls.OPEN_METH[comptype])
1030 else:
Thomas Wouters477c8d52006-05-27 19:21:47 +00001031 raise CompressionError("unknown compression type %r" % comptype)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001032 return func(name, filemode, fileobj)
1033
1034 elif "|" in mode:
1035 filemode, comptype = mode.split("|", 1)
1036 filemode = filemode or "r"
1037 comptype = comptype or "tar"
1038
1039 if filemode not in "rw":
Thomas Wouters477c8d52006-05-27 19:21:47 +00001040 raise ValueError("mode must be 'r' or 'w'")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001041
1042 t = cls(name, filemode,
1043 _Stream(name, filemode, comptype, fileobj, bufsize))
1044 t._extfileobj = False
1045 return t
1046
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001047 elif mode in "aw":
1048 return cls.taropen(name, mode, fileobj)
1049
Thomas Wouters477c8d52006-05-27 19:21:47 +00001050 raise ValueError("undiscernible mode")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001051
Guido van Rossum75b64e62005-01-16 00:16:11 +00001052 @classmethod
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001053 def taropen(cls, name, mode="r", fileobj=None):
1054 """Open uncompressed tar archive name for reading or writing.
1055 """
1056 if len(mode) > 1 or mode not in "raw":
Thomas Wouters477c8d52006-05-27 19:21:47 +00001057 raise ValueError("mode must be 'r', 'a' or 'w'")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001058 return cls(name, mode, fileobj)
1059
Guido van Rossum75b64e62005-01-16 00:16:11 +00001060 @classmethod
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001061 def gzopen(cls, name, mode="r", fileobj=None, compresslevel=9):
1062 """Open gzip compressed tar archive name for reading or writing.
1063 Appending is not allowed.
1064 """
1065 if len(mode) > 1 or mode not in "rw":
Thomas Wouters477c8d52006-05-27 19:21:47 +00001066 raise ValueError("mode must be 'r' or 'w'")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001067
1068 try:
1069 import gzip
Neal Norwitz4ec68242003-04-11 03:05:56 +00001070 gzip.GzipFile
1071 except (ImportError, AttributeError):
Thomas Wouters477c8d52006-05-27 19:21:47 +00001072 raise CompressionError("gzip module is not available")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001073
1074 pre, ext = os.path.splitext(name)
Martin v. Löwisfaffa152005-08-24 06:43:09 +00001075 pre = os.path.basename(pre)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001076 if ext == ".tgz":
1077 ext = ".tar"
1078 if ext == ".gz":
1079 ext = ""
Martin v. Löwisfaffa152005-08-24 06:43:09 +00001080 tarname = pre + ext
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001081
1082 if fileobj is None:
1083 fileobj = file(name, mode + "b")
1084
Martin v. Löwisfaffa152005-08-24 06:43:09 +00001085 if mode != "r":
1086 name = tarname
1087
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001088 try:
Martin v. Löwisfaffa152005-08-24 06:43:09 +00001089 t = cls.taropen(tarname, mode,
1090 gzip.GzipFile(name, mode, compresslevel, fileobj)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001091 )
1092 except IOError:
Thomas Wouters477c8d52006-05-27 19:21:47 +00001093 raise ReadError("not a gzip file")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001094 t._extfileobj = False
1095 return t
1096
Guido van Rossum75b64e62005-01-16 00:16:11 +00001097 @classmethod
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001098 def bz2open(cls, name, mode="r", fileobj=None, compresslevel=9):
1099 """Open bzip2 compressed tar archive name for reading or writing.
1100 Appending is not allowed.
1101 """
1102 if len(mode) > 1 or mode not in "rw":
Thomas Wouters477c8d52006-05-27 19:21:47 +00001103 raise ValueError("mode must be 'r' or 'w'.")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001104
1105 try:
1106 import bz2
1107 except ImportError:
Thomas Wouters477c8d52006-05-27 19:21:47 +00001108 raise CompressionError("bz2 module is not available")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001109
Martin v. Löwisfaffa152005-08-24 06:43:09 +00001110 pre, ext = os.path.splitext(name)
1111 pre = os.path.basename(pre)
1112 if ext == ".tbz2":
1113 ext = ".tar"
1114 if ext == ".bz2":
1115 ext = ""
1116 tarname = pre + ext
1117
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001118 if fileobj is not None:
Thomas Wouters477c8d52006-05-27 19:21:47 +00001119 fileobj = _BZ2Proxy(fileobj, mode)
1120 else:
1121 fileobj = bz2.BZ2File(name, mode, compresslevel=compresslevel)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001122
1123 try:
Thomas Wouters477c8d52006-05-27 19:21:47 +00001124 t = cls.taropen(tarname, mode, fileobj)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001125 except IOError:
Thomas Wouters477c8d52006-05-27 19:21:47 +00001126 raise ReadError("not a bzip2 file")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001127 t._extfileobj = False
1128 return t
1129
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001130 # All *open() methods are registered here.
1131 OPEN_METH = {
1132 "tar": "taropen", # uncompressed tar
1133 "gz": "gzopen", # gzip compressed tar
1134 "bz2": "bz2open" # bzip2 compressed tar
1135 }
1136
1137 #--------------------------------------------------------------------------
1138 # The public methods which TarFile provides:
1139
1140 def close(self):
1141 """Close the TarFile. In write-mode, two finishing zero blocks are
1142 appended to the archive.
1143 """
1144 if self.closed:
1145 return
1146
1147 if self._mode in "aw":
1148 self.fileobj.write(NUL * (BLOCKSIZE * 2))
1149 self.offset += (BLOCKSIZE * 2)
1150 # fill up the end with zero-blocks
1151 # (like option -b20 for tar does)
1152 blocks, remainder = divmod(self.offset, RECORDSIZE)
1153 if remainder > 0:
1154 self.fileobj.write(NUL * (RECORDSIZE - remainder))
1155
1156 if not self._extfileobj:
1157 self.fileobj.close()
1158 self.closed = True
1159
1160 def getmember(self, name):
1161 """Return a TarInfo object for member `name'. If `name' can not be
1162 found in the archive, KeyError is raised. If a member occurs more
1163 than once in the archive, its last occurence is assumed to be the
1164 most up-to-date version.
1165 """
Martin v. Löwisf3c56112004-09-18 09:08:52 +00001166 tarinfo = self._getmember(name)
1167 if tarinfo is None:
Thomas Wouters477c8d52006-05-27 19:21:47 +00001168 raise KeyError("filename %r not found" % name)
Martin v. Löwisf3c56112004-09-18 09:08:52 +00001169 return tarinfo
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001170
1171 def getmembers(self):
1172 """Return the members of the archive as a list of TarInfo objects. The
1173 list has the same order as the members in the archive.
1174 """
1175 self._check()
1176 if not self._loaded: # if we want to obtain a list of
1177 self._load() # all members, we first have to
1178 # scan the whole archive.
1179 return self.members
1180
1181 def getnames(self):
1182 """Return the members of the archive as a list of their names. It has
1183 the same order as the list returned by getmembers().
1184 """
Martin v. Löwisf3c56112004-09-18 09:08:52 +00001185 return [tarinfo.name for tarinfo in self.getmembers()]
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001186
1187 def gettarinfo(self, name=None, arcname=None, fileobj=None):
1188 """Create a TarInfo object for either the file `name' or the file
1189 object `fileobj' (using os.fstat on its file descriptor). You can
1190 modify some of the TarInfo's attributes before you add it using
1191 addfile(). If given, `arcname' specifies an alternative name for the
1192 file in the archive.
1193 """
1194 self._check("aw")
1195
1196 # When fileobj is given, replace name by
1197 # fileobj's real name.
1198 if fileobj is not None:
1199 name = fileobj.name
1200
1201 # Building the name of the member in the archive.
1202 # Backward slashes are converted to forward slashes,
1203 # Absolute paths are turned to relative paths.
1204 if arcname is None:
1205 arcname = name
1206 arcname = normpath(arcname)
1207 drv, arcname = os.path.splitdrive(arcname)
1208 while arcname[0:1] == "/":
1209 arcname = arcname[1:]
1210
1211 # Now, fill the TarInfo object with
1212 # information specific for the file.
1213 tarinfo = TarInfo()
1214
1215 # Use os.stat or os.lstat, depending on platform
1216 # and if symlinks shall be resolved.
1217 if fileobj is None:
1218 if hasattr(os, "lstat") and not self.dereference:
1219 statres = os.lstat(name)
1220 else:
1221 statres = os.stat(name)
1222 else:
1223 statres = os.fstat(fileobj.fileno())
1224 linkname = ""
1225
1226 stmd = statres.st_mode
1227 if stat.S_ISREG(stmd):
1228 inode = (statres.st_ino, statres.st_dev)
Neal Norwitzb0e32e22005-10-20 04:50:13 +00001229 if not self.dereference and \
1230 statres.st_nlink > 1 and inode in self.inodes:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001231 # Is it a hardlink to an already
1232 # archived file?
1233 type = LNKTYPE
1234 linkname = self.inodes[inode]
1235 else:
1236 # The inode is added only if its valid.
1237 # For win32 it is always 0.
1238 type = REGTYPE
1239 if inode[0]:
1240 self.inodes[inode] = arcname
1241 elif stat.S_ISDIR(stmd):
1242 type = DIRTYPE
1243 if arcname[-1:] != "/":
1244 arcname += "/"
1245 elif stat.S_ISFIFO(stmd):
1246 type = FIFOTYPE
1247 elif stat.S_ISLNK(stmd):
1248 type = SYMTYPE
1249 linkname = os.readlink(name)
1250 elif stat.S_ISCHR(stmd):
1251 type = CHRTYPE
1252 elif stat.S_ISBLK(stmd):
1253 type = BLKTYPE
1254 else:
1255 return None
1256
1257 # Fill the TarInfo object with all
1258 # information we can get.
Martin v. Löwis5dbdc592005-08-27 10:07:56 +00001259 tarinfo.name = arcname
1260 tarinfo.mode = stmd
1261 tarinfo.uid = statres.st_uid
1262 tarinfo.gid = statres.st_gid
1263 if stat.S_ISREG(stmd):
Martin v. Löwis61d77e02004-08-20 06:35:46 +00001264 tarinfo.size = statres.st_size
Martin v. Löwis5dbdc592005-08-27 10:07:56 +00001265 else:
1266 tarinfo.size = 0L
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001267 tarinfo.mtime = statres.st_mtime
Martin v. Löwis5dbdc592005-08-27 10:07:56 +00001268 tarinfo.type = type
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001269 tarinfo.linkname = linkname
1270 if pwd:
1271 try:
1272 tarinfo.uname = pwd.getpwuid(tarinfo.uid)[0]
1273 except KeyError:
1274 pass
1275 if grp:
1276 try:
1277 tarinfo.gname = grp.getgrgid(tarinfo.gid)[0]
1278 except KeyError:
1279 pass
1280
1281 if type in (CHRTYPE, BLKTYPE):
1282 if hasattr(os, "major") and hasattr(os, "minor"):
1283 tarinfo.devmajor = os.major(statres.st_rdev)
1284 tarinfo.devminor = os.minor(statres.st_rdev)
1285 return tarinfo
1286
1287 def list(self, verbose=True):
1288 """Print a table of contents to sys.stdout. If `verbose' is False, only
1289 the names of the members are printed. If it is True, an `ls -l'-like
1290 output is produced.
1291 """
1292 self._check()
1293
1294 for tarinfo in self:
1295 if verbose:
1296 print filemode(tarinfo.mode),
1297 print "%s/%s" % (tarinfo.uname or tarinfo.uid,
1298 tarinfo.gname or tarinfo.gid),
1299 if tarinfo.ischr() or tarinfo.isblk():
1300 print "%10s" % ("%d,%d" \
1301 % (tarinfo.devmajor, tarinfo.devminor)),
1302 else:
1303 print "%10d" % tarinfo.size,
1304 print "%d-%02d-%02d %02d:%02d:%02d" \
1305 % time.localtime(tarinfo.mtime)[:6],
1306
1307 print tarinfo.name,
1308
1309 if verbose:
1310 if tarinfo.issym():
1311 print "->", tarinfo.linkname,
1312 if tarinfo.islnk():
1313 print "link to", tarinfo.linkname,
1314 print
1315
1316 def add(self, name, arcname=None, recursive=True):
1317 """Add the file `name' to the archive. `name' may be any type of file
1318 (directory, fifo, symbolic link, etc.). If given, `arcname'
1319 specifies an alternative name for the file in the archive.
1320 Directories are added recursively by default. This can be avoided by
1321 setting `recursive' to False.
1322 """
1323 self._check("aw")
1324
1325 if arcname is None:
1326 arcname = name
1327
1328 # Skip if somebody tries to archive the archive...
Martin v. Löwisfaffa152005-08-24 06:43:09 +00001329 if self.name is not None \
1330 and os.path.abspath(name) == os.path.abspath(self.name):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001331 self._dbg(2, "tarfile: Skipped %r" % name)
1332 return
1333
1334 # Special case: The user wants to add the current
1335 # working directory.
1336 if name == ".":
1337 if recursive:
1338 if arcname == ".":
1339 arcname = ""
1340 for f in os.listdir("."):
1341 self.add(f, os.path.join(arcname, f))
1342 return
1343
1344 self._dbg(1, name)
1345
1346 # Create a TarInfo object from the file.
1347 tarinfo = self.gettarinfo(name, arcname)
1348
1349 if tarinfo is None:
1350 self._dbg(1, "tarfile: Unsupported type %r" % name)
1351 return
1352
1353 # Append the tar header and data to the archive.
1354 if tarinfo.isreg():
1355 f = file(name, "rb")
1356 self.addfile(tarinfo, f)
1357 f.close()
1358
Martin v. Löwis5dbdc592005-08-27 10:07:56 +00001359 elif tarinfo.isdir():
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001360 self.addfile(tarinfo)
1361 if recursive:
1362 for f in os.listdir(name):
1363 self.add(os.path.join(name, f), os.path.join(arcname, f))
1364
Martin v. Löwis5dbdc592005-08-27 10:07:56 +00001365 else:
1366 self.addfile(tarinfo)
1367
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001368 def addfile(self, tarinfo, fileobj=None):
1369 """Add the TarInfo object `tarinfo' to the archive. If `fileobj' is
1370 given, tarinfo.size bytes are read from it and added to the archive.
1371 You can create TarInfo objects using gettarinfo().
1372 On Windows platforms, `fileobj' should always be opened with mode
1373 'rb' to avoid irritation about the file size.
1374 """
1375 self._check("aw")
1376
1377 tarinfo.name = normpath(tarinfo.name)
1378 if tarinfo.isdir():
1379 # directories should end with '/'
1380 tarinfo.name += "/"
1381
1382 if tarinfo.linkname:
1383 tarinfo.linkname = normpath(tarinfo.linkname)
1384
1385 if tarinfo.size > MAXSIZE_MEMBER:
Neal Norwitzd96d1012004-07-20 22:23:02 +00001386 if self.posix:
Thomas Wouters477c8d52006-05-27 19:21:47 +00001387 raise ValueError("file is too large (>= 8 GB)")
Neal Norwitzd96d1012004-07-20 22:23:02 +00001388 else:
1389 self._dbg(2, "tarfile: Created GNU tar largefile header")
1390
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001391
1392 if len(tarinfo.linkname) > LENGTH_LINK:
1393 if self.posix:
Thomas Wouters477c8d52006-05-27 19:21:47 +00001394 raise ValueError("linkname is too long (>%d)" % (LENGTH_LINK))
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001395 else:
1396 self._create_gnulong(tarinfo.linkname, GNUTYPE_LONGLINK)
1397 tarinfo.linkname = tarinfo.linkname[:LENGTH_LINK -1]
1398 self._dbg(2, "tarfile: Created GNU tar extension LONGLINK")
1399
1400 if len(tarinfo.name) > LENGTH_NAME:
1401 if self.posix:
1402 prefix = tarinfo.name[:LENGTH_PREFIX + 1]
1403 while prefix and prefix[-1] != "/":
Tim Peters2c60f7a2003-01-29 03:49:43 +00001404 prefix = prefix[:-1]
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001405
1406 name = tarinfo.name[len(prefix):]
1407 prefix = prefix[:-1]
1408
1409 if not prefix or len(name) > LENGTH_NAME:
Thomas Wouters477c8d52006-05-27 19:21:47 +00001410 raise ValueError("name is too long (>%d)" % (LENGTH_NAME))
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001411
1412 tarinfo.name = name
1413 tarinfo.prefix = prefix
1414 else:
1415 self._create_gnulong(tarinfo.name, GNUTYPE_LONGNAME)
1416 tarinfo.name = tarinfo.name[:LENGTH_NAME - 1]
1417 self._dbg(2, "tarfile: Created GNU tar extension LONGNAME")
1418
Thomas Wouters477c8d52006-05-27 19:21:47 +00001419 self.fileobj.write(tarinfo.tobuf(self.posix))
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001420 self.offset += BLOCKSIZE
1421
1422 # If there's data to follow, append it.
1423 if fileobj is not None:
1424 copyfileobj(fileobj, self.fileobj, tarinfo.size)
1425 blocks, remainder = divmod(tarinfo.size, BLOCKSIZE)
1426 if remainder > 0:
1427 self.fileobj.write(NUL * (BLOCKSIZE - remainder))
1428 blocks += 1
1429 self.offset += blocks * BLOCKSIZE
1430
Martin v. Löwisf3c56112004-09-18 09:08:52 +00001431 self.members.append(tarinfo)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001432
Martin v. Löwis00a73e72005-03-04 19:40:34 +00001433 def extractall(self, path=".", members=None):
1434 """Extract all members from the archive to the current working
1435 directory and set owner, modification time and permissions on
1436 directories afterwards. `path' specifies a different directory
1437 to extract to. `members' is optional and must be a subset of the
1438 list returned by getmembers().
1439 """
1440 directories = []
1441
1442 if members is None:
1443 members = self
1444
1445 for tarinfo in members:
1446 if tarinfo.isdir():
1447 # Extract directory with a safe mode, so that
1448 # all files below can be extracted as well.
1449 try:
1450 os.makedirs(os.path.join(path, tarinfo.name), 0777)
1451 except EnvironmentError:
1452 pass
1453 directories.append(tarinfo)
1454 else:
1455 self.extract(tarinfo, path)
1456
1457 # Reverse sort directories.
1458 directories.sort(lambda a, b: cmp(a.name, b.name))
1459 directories.reverse()
1460
1461 # Set correct owner, mtime and filemode on directories.
1462 for tarinfo in directories:
1463 path = os.path.join(path, tarinfo.name)
1464 try:
1465 self.chown(tarinfo, path)
1466 self.utime(tarinfo, path)
1467 self.chmod(tarinfo, path)
1468 except ExtractError, e:
1469 if self.errorlevel > 1:
1470 raise
1471 else:
1472 self._dbg(1, "tarfile: %s" % e)
1473
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001474 def extract(self, member, path=""):
1475 """Extract a member from the archive to the current working directory,
1476 using its full name. Its file information is extracted as accurately
1477 as possible. `member' may be a filename or a TarInfo object. You can
1478 specify a different directory using `path'.
1479 """
1480 self._check("r")
1481
1482 if isinstance(member, TarInfo):
1483 tarinfo = member
1484 else:
1485 tarinfo = self.getmember(member)
1486
Neal Norwitza4f651a2004-07-20 22:07:44 +00001487 # Prepare the link target for makelink().
1488 if tarinfo.islnk():
1489 tarinfo._link_target = os.path.join(path, tarinfo.linkname)
1490
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001491 try:
1492 self._extract_member(tarinfo, os.path.join(path, tarinfo.name))
1493 except EnvironmentError, e:
1494 if self.errorlevel > 0:
1495 raise
1496 else:
1497 if e.filename is None:
1498 self._dbg(1, "tarfile: %s" % e.strerror)
1499 else:
1500 self._dbg(1, "tarfile: %s %r" % (e.strerror, e.filename))
1501 except ExtractError, e:
1502 if self.errorlevel > 1:
1503 raise
1504 else:
1505 self._dbg(1, "tarfile: %s" % e)
1506
1507 def extractfile(self, member):
1508 """Extract a member from the archive as a file object. `member' may be
1509 a filename or a TarInfo object. If `member' is a regular file, a
1510 file-like object is returned. If `member' is a link, a file-like
1511 object is constructed from the link's target. If `member' is none of
1512 the above, None is returned.
1513 The file-like object is read-only and provides the following
1514 methods: read(), readline(), readlines(), seek() and tell()
1515 """
1516 self._check("r")
1517
1518 if isinstance(member, TarInfo):
1519 tarinfo = member
1520 else:
1521 tarinfo = self.getmember(member)
1522
1523 if tarinfo.isreg():
1524 return self.fileobject(self, tarinfo)
1525
1526 elif tarinfo.type not in SUPPORTED_TYPES:
1527 # If a member's type is unknown, it is treated as a
1528 # regular file.
1529 return self.fileobject(self, tarinfo)
1530
1531 elif tarinfo.islnk() or tarinfo.issym():
1532 if isinstance(self.fileobj, _Stream):
1533 # A small but ugly workaround for the case that someone tries
1534 # to extract a (sym)link as a file-object from a non-seekable
1535 # stream of tar blocks.
Thomas Wouters477c8d52006-05-27 19:21:47 +00001536 raise StreamError("cannot extract (sym)link as file object")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001537 else:
Georg Brandl7eb4b7d2005-07-22 21:49:32 +00001538 # A (sym)link's file object is its target's file object.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001539 return self.extractfile(self._getmember(tarinfo.linkname,
1540 tarinfo))
1541 else:
1542 # If there's no data associated with the member (directory, chrdev,
1543 # blkdev, etc.), return None instead of a file object.
1544 return None
1545
1546 def _extract_member(self, tarinfo, targetpath):
1547 """Extract the TarInfo object tarinfo to a physical
1548 file called targetpath.
1549 """
1550 # Fetch the TarInfo object for the given name
1551 # and build the destination pathname, replacing
1552 # forward slashes to platform specific separators.
1553 if targetpath[-1:] == "/":
1554 targetpath = targetpath[:-1]
1555 targetpath = os.path.normpath(targetpath)
1556
1557 # Create all upper directories.
1558 upperdirs = os.path.dirname(targetpath)
1559 if upperdirs and not os.path.exists(upperdirs):
1560 ti = TarInfo()
1561 ti.name = upperdirs
1562 ti.type = DIRTYPE
1563 ti.mode = 0777
1564 ti.mtime = tarinfo.mtime
1565 ti.uid = tarinfo.uid
1566 ti.gid = tarinfo.gid
1567 ti.uname = tarinfo.uname
1568 ti.gname = tarinfo.gname
1569 try:
1570 self._extract_member(ti, ti.name)
1571 except:
1572 pass
1573
1574 if tarinfo.islnk() or tarinfo.issym():
1575 self._dbg(1, "%s -> %s" % (tarinfo.name, tarinfo.linkname))
1576 else:
1577 self._dbg(1, tarinfo.name)
1578
1579 if tarinfo.isreg():
1580 self.makefile(tarinfo, targetpath)
1581 elif tarinfo.isdir():
1582 self.makedir(tarinfo, targetpath)
1583 elif tarinfo.isfifo():
1584 self.makefifo(tarinfo, targetpath)
1585 elif tarinfo.ischr() or tarinfo.isblk():
1586 self.makedev(tarinfo, targetpath)
1587 elif tarinfo.islnk() or tarinfo.issym():
1588 self.makelink(tarinfo, targetpath)
1589 elif tarinfo.type not in SUPPORTED_TYPES:
1590 self.makeunknown(tarinfo, targetpath)
1591 else:
1592 self.makefile(tarinfo, targetpath)
1593
1594 self.chown(tarinfo, targetpath)
1595 if not tarinfo.issym():
1596 self.chmod(tarinfo, targetpath)
1597 self.utime(tarinfo, targetpath)
1598
1599 #--------------------------------------------------------------------------
1600 # Below are the different file methods. They are called via
1601 # _extract_member() when extract() is called. They can be replaced in a
1602 # subclass to implement other functionality.
1603
1604 def makedir(self, tarinfo, targetpath):
1605 """Make a directory called targetpath.
1606 """
1607 try:
1608 os.mkdir(targetpath)
1609 except EnvironmentError, e:
1610 if e.errno != errno.EEXIST:
1611 raise
1612
1613 def makefile(self, tarinfo, targetpath):
1614 """Make a file called targetpath.
1615 """
1616 source = self.extractfile(tarinfo)
1617 target = file(targetpath, "wb")
1618 copyfileobj(source, target)
1619 source.close()
1620 target.close()
1621
1622 def makeunknown(self, tarinfo, targetpath):
1623 """Make a file from a TarInfo object with an unknown type
1624 at targetpath.
1625 """
1626 self.makefile(tarinfo, targetpath)
1627 self._dbg(1, "tarfile: Unknown file type %r, " \
1628 "extracted as regular file." % tarinfo.type)
1629
1630 def makefifo(self, tarinfo, targetpath):
1631 """Make a fifo called targetpath.
1632 """
1633 if hasattr(os, "mkfifo"):
1634 os.mkfifo(targetpath)
1635 else:
Thomas Wouters477c8d52006-05-27 19:21:47 +00001636 raise ExtractError("fifo not supported by system")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001637
1638 def makedev(self, tarinfo, targetpath):
1639 """Make a character or block device called targetpath.
1640 """
1641 if not hasattr(os, "mknod") or not hasattr(os, "makedev"):
Thomas Wouters477c8d52006-05-27 19:21:47 +00001642 raise ExtractError("special devices not supported by system")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001643
1644 mode = tarinfo.mode
1645 if tarinfo.isblk():
1646 mode |= stat.S_IFBLK
1647 else:
1648 mode |= stat.S_IFCHR
1649
1650 os.mknod(targetpath, mode,
1651 os.makedev(tarinfo.devmajor, tarinfo.devminor))
1652
1653 def makelink(self, tarinfo, targetpath):
1654 """Make a (symbolic) link called targetpath. If it cannot be created
1655 (platform limitation), we try to make a copy of the referenced file
1656 instead of a link.
1657 """
1658 linkpath = tarinfo.linkname
1659 try:
1660 if tarinfo.issym():
1661 os.symlink(linkpath, targetpath)
1662 else:
Neal Norwitza4f651a2004-07-20 22:07:44 +00001663 # See extract().
1664 os.link(tarinfo._link_target, targetpath)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001665 except AttributeError:
1666 if tarinfo.issym():
1667 linkpath = os.path.join(os.path.dirname(tarinfo.name),
1668 linkpath)
1669 linkpath = normpath(linkpath)
1670
1671 try:
1672 self._extract_member(self.getmember(linkpath), targetpath)
1673 except (EnvironmentError, KeyError), e:
1674 linkpath = os.path.normpath(linkpath)
1675 try:
1676 shutil.copy2(linkpath, targetpath)
1677 except EnvironmentError, e:
Thomas Wouters477c8d52006-05-27 19:21:47 +00001678 raise IOError("link could not be created")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001679
1680 def chown(self, tarinfo, targetpath):
1681 """Set owner of targetpath according to tarinfo.
1682 """
1683 if pwd and hasattr(os, "geteuid") and os.geteuid() == 0:
1684 # We have to be root to do so.
1685 try:
1686 g = grp.getgrnam(tarinfo.gname)[2]
1687 except KeyError:
1688 try:
1689 g = grp.getgrgid(tarinfo.gid)[2]
1690 except KeyError:
1691 g = os.getgid()
1692 try:
1693 u = pwd.getpwnam(tarinfo.uname)[2]
1694 except KeyError:
1695 try:
1696 u = pwd.getpwuid(tarinfo.uid)[2]
1697 except KeyError:
1698 u = os.getuid()
1699 try:
1700 if tarinfo.issym() and hasattr(os, "lchown"):
1701 os.lchown(targetpath, u, g)
1702 else:
Andrew MacIntyre7970d202003-02-19 12:51:34 +00001703 if sys.platform != "os2emx":
1704 os.chown(targetpath, u, g)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001705 except EnvironmentError, e:
Thomas Wouters477c8d52006-05-27 19:21:47 +00001706 raise ExtractError("could not change owner")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001707
1708 def chmod(self, tarinfo, targetpath):
1709 """Set file permissions of targetpath according to tarinfo.
1710 """
Jack Jansen834eff62003-03-07 12:47:06 +00001711 if hasattr(os, 'chmod'):
1712 try:
1713 os.chmod(targetpath, tarinfo.mode)
1714 except EnvironmentError, e:
Thomas Wouters477c8d52006-05-27 19:21:47 +00001715 raise ExtractError("could not change mode")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001716
1717 def utime(self, tarinfo, targetpath):
1718 """Set modification time of targetpath according to tarinfo.
1719 """
Jack Jansen834eff62003-03-07 12:47:06 +00001720 if not hasattr(os, 'utime'):
Tim Petersf9347782003-03-07 15:36:41 +00001721 return
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001722 if sys.platform == "win32" and tarinfo.isdir():
1723 # According to msdn.microsoft.com, it is an error (EACCES)
1724 # to use utime() on directories.
1725 return
1726 try:
1727 os.utime(targetpath, (tarinfo.mtime, tarinfo.mtime))
1728 except EnvironmentError, e:
Thomas Wouters477c8d52006-05-27 19:21:47 +00001729 raise ExtractError("could not change modification time")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001730
1731 #--------------------------------------------------------------------------
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001732 def next(self):
1733 """Return the next member of the archive as a TarInfo object, when
1734 TarFile is opened for reading. Return None if there is no more
1735 available.
1736 """
1737 self._check("ra")
1738 if self.firstmember is not None:
1739 m = self.firstmember
1740 self.firstmember = None
1741 return m
1742
1743 # Read the next block.
Andrew M. Kuchling864bba12004-07-10 22:02:11 +00001744 self.fileobj.seek(self.offset)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001745 while True:
1746 buf = self.fileobj.read(BLOCKSIZE)
1747 if not buf:
1748 return None
Thomas Wouters477c8d52006-05-27 19:21:47 +00001749
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001750 try:
1751 tarinfo = TarInfo.frombuf(buf)
Thomas Wouters477c8d52006-05-27 19:21:47 +00001752
1753 # We shouldn't rely on this checksum, because some tar programs
1754 # calculate it differently and it is merely validating the
1755 # header block. We could just as well skip this part, which would
1756 # have a slight effect on performance...
1757 if tarinfo.chksum not in calc_chksums(buf):
1758 self._dbg(1, "tarfile: Bad Checksum %r" % tarinfo.name)
1759
1760 # Set the TarInfo object's offset to the current position of the
1761 # TarFile and set self.offset to the position where the data blocks
1762 # should begin.
1763 tarinfo.offset = self.offset
1764 self.offset += BLOCKSIZE
1765
1766 tarinfo = self.proc_member(tarinfo)
1767
1768 except ValueError, e:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001769 if self.ignore_zeros:
Thomas Wouters477c8d52006-05-27 19:21:47 +00001770 self._dbg(2, "0x%X: empty or invalid block: %s" %
1771 (self.offset, e))
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001772 self.offset += BLOCKSIZE
1773 continue
1774 else:
Andrew M. Kuchling864bba12004-07-10 22:02:11 +00001775 if self.offset == 0:
Thomas Wouters477c8d52006-05-27 19:21:47 +00001776 raise ReadError("empty, unreadable or compressed "
1777 "file: %s" % e)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001778 return None
1779 break
1780
Thomas Wouters477c8d52006-05-27 19:21:47 +00001781 # Some old tar programs represent a directory as a regular
1782 # file with a trailing slash.
1783 if tarinfo.isreg() and tarinfo.name.endswith("/"):
1784 tarinfo.type = DIRTYPE
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001785
Thomas Wouters477c8d52006-05-27 19:21:47 +00001786 # The prefix field is used for filenames > 100 in
1787 # the POSIX standard.
1788 # name = prefix + '/' + name
1789 tarinfo.name = normpath(os.path.join(tarinfo.prefix.rstrip(NUL),
1790 tarinfo.name))
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001791
Thomas Wouters477c8d52006-05-27 19:21:47 +00001792 # Directory names should have a '/' at the end.
1793 if tarinfo.isdir():
1794 tarinfo.name += "/"
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001795
Martin v. Löwisf3c56112004-09-18 09:08:52 +00001796 self.members.append(tarinfo)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001797 return tarinfo
1798
1799 #--------------------------------------------------------------------------
Thomas Wouters477c8d52006-05-27 19:21:47 +00001800 # The following are methods that are called depending on the type of a
1801 # member. The entry point is proc_member() which is called with a TarInfo
1802 # object created from the header block from the current offset. The
1803 # proc_member() method can be overridden in a subclass to add custom
1804 # proc_*() methods. A proc_*() method MUST implement the following
1805 # operations:
1806 # 1. Set tarinfo.offset_data to the position where the data blocks begin,
1807 # if there is data that follows.
1808 # 2. Set self.offset to the position where the next member's header will
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001809 # begin.
Thomas Wouters477c8d52006-05-27 19:21:47 +00001810 # 3. Return tarinfo or another valid TarInfo object.
1811 def proc_member(self, tarinfo):
1812 """Choose the right processing method for tarinfo depending
1813 on its type and call it.
1814 """
1815 if tarinfo.type in (GNUTYPE_LONGNAME, GNUTYPE_LONGLINK):
1816 return self.proc_gnulong(tarinfo)
1817 elif tarinfo.type == GNUTYPE_SPARSE:
1818 return self.proc_sparse(tarinfo)
1819 else:
1820 return self.proc_builtin(tarinfo)
1821
1822 def proc_builtin(self, tarinfo):
1823 """Process a builtin type member or an unknown member
1824 which will be treated as a regular file.
1825 """
1826 tarinfo.offset_data = self.offset
1827 if tarinfo.isreg() or tarinfo.type not in SUPPORTED_TYPES:
1828 # Skip the following data blocks.
1829 self.offset += self._block(tarinfo.size)
1830 return tarinfo
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001831
1832 def proc_gnulong(self, tarinfo):
Thomas Wouters477c8d52006-05-27 19:21:47 +00001833 """Process the blocks that hold a GNU longname
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001834 or longlink member.
1835 """
1836 buf = ""
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001837 count = tarinfo.size
1838 while count > 0:
1839 block = self.fileobj.read(BLOCKSIZE)
1840 buf += block
1841 self.offset += BLOCKSIZE
1842 count -= BLOCKSIZE
1843
Thomas Wouters477c8d52006-05-27 19:21:47 +00001844 # Fetch the next header and process it.
1845 b = self.fileobj.read(BLOCKSIZE)
1846 t = TarInfo.frombuf(b)
1847 t.offset = self.offset
1848 self.offset += BLOCKSIZE
1849 next = self.proc_member(t)
Andrew M. Kuchling864bba12004-07-10 22:02:11 +00001850
Thomas Wouters477c8d52006-05-27 19:21:47 +00001851 # Patch the TarInfo object from the next header with
1852 # the longname information.
Andrew M. Kuchling864bba12004-07-10 22:02:11 +00001853 next.offset = tarinfo.offset
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001854 if tarinfo.type == GNUTYPE_LONGNAME:
Thomas Wouters477c8d52006-05-27 19:21:47 +00001855 next.name = buf.rstrip(NUL)
Andrew M. Kuchling864bba12004-07-10 22:02:11 +00001856 elif tarinfo.type == GNUTYPE_LONGLINK:
Thomas Wouters477c8d52006-05-27 19:21:47 +00001857 next.linkname = buf.rstrip(NUL)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001858
Andrew M. Kuchling864bba12004-07-10 22:02:11 +00001859 return next
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001860
1861 def proc_sparse(self, tarinfo):
Thomas Wouters477c8d52006-05-27 19:21:47 +00001862 """Process a GNU sparse header plus extra headers.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001863 """
Thomas Wouters477c8d52006-05-27 19:21:47 +00001864 buf = tarinfo.buf
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001865 sp = _ringbuffer()
1866 pos = 386
1867 lastpos = 0L
1868 realpos = 0L
1869 # There are 4 possible sparse structs in the
1870 # first header.
1871 for i in xrange(4):
1872 try:
Thomas Wouters477c8d52006-05-27 19:21:47 +00001873 offset = nti(buf[pos:pos + 12])
1874 numbytes = nti(buf[pos + 12:pos + 24])
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001875 except ValueError:
1876 break
1877 if offset > lastpos:
1878 sp.append(_hole(lastpos, offset - lastpos))
1879 sp.append(_data(offset, numbytes, realpos))
1880 realpos += numbytes
1881 lastpos = offset + numbytes
1882 pos += 24
1883
1884 isextended = ord(buf[482])
Thomas Wouters477c8d52006-05-27 19:21:47 +00001885 origsize = nti(buf[483:495])
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001886
1887 # If the isextended flag is given,
1888 # there are extra headers to process.
1889 while isextended == 1:
1890 buf = self.fileobj.read(BLOCKSIZE)
1891 self.offset += BLOCKSIZE
1892 pos = 0
1893 for i in xrange(21):
1894 try:
Thomas Wouters477c8d52006-05-27 19:21:47 +00001895 offset = nti(buf[pos:pos + 12])
1896 numbytes = nti(buf[pos + 12:pos + 24])
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001897 except ValueError:
1898 break
1899 if offset > lastpos:
1900 sp.append(_hole(lastpos, offset - lastpos))
1901 sp.append(_data(offset, numbytes, realpos))
1902 realpos += numbytes
1903 lastpos = offset + numbytes
1904 pos += 24
1905 isextended = ord(buf[504])
1906
1907 if lastpos < origsize:
1908 sp.append(_hole(lastpos, origsize - lastpos))
1909
1910 tarinfo.sparse = sp
1911
1912 tarinfo.offset_data = self.offset
1913 self.offset += self._block(tarinfo.size)
1914 tarinfo.size = origsize
Andrew M. Kuchling864bba12004-07-10 22:02:11 +00001915
Thomas Wouters477c8d52006-05-27 19:21:47 +00001916 # Clear the prefix field so that it is not used
1917 # as a pathname in next().
1918 tarinfo.prefix = ""
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001919
Thomas Wouters477c8d52006-05-27 19:21:47 +00001920 return tarinfo
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001921
1922 #--------------------------------------------------------------------------
1923 # Little helper methods:
1924
1925 def _block(self, count):
1926 """Round up a byte count by BLOCKSIZE and return it,
1927 e.g. _block(834) => 1024.
1928 """
1929 blocks, remainder = divmod(count, BLOCKSIZE)
1930 if remainder:
1931 blocks += 1
1932 return blocks * BLOCKSIZE
1933
1934 def _getmember(self, name, tarinfo=None):
1935 """Find an archive member by name from bottom to top.
1936 If tarinfo is given, it is used as the starting point.
1937 """
Martin v. Löwisf3c56112004-09-18 09:08:52 +00001938 # Ensure that all members have been loaded.
1939 members = self.getmembers()
1940
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001941 if tarinfo is None:
Martin v. Löwisf3c56112004-09-18 09:08:52 +00001942 end = len(members)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001943 else:
Martin v. Löwisf3c56112004-09-18 09:08:52 +00001944 end = members.index(tarinfo)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001945
1946 for i in xrange(end - 1, -1, -1):
Martin v. Löwisf3c56112004-09-18 09:08:52 +00001947 if name == members[i].name:
1948 return members[i]
Andrew M. Kuchling864bba12004-07-10 22:02:11 +00001949
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001950 def _load(self):
1951 """Read through the entire archive file and look for readable
1952 members.
1953 """
1954 while True:
1955 tarinfo = self.next()
1956 if tarinfo is None:
1957 break
1958 self._loaded = True
1959
1960 def _check(self, mode=None):
1961 """Check if TarFile is still open, and if the operation's mode
1962 corresponds to TarFile's mode.
1963 """
1964 if self.closed:
Thomas Wouters477c8d52006-05-27 19:21:47 +00001965 raise IOError("%s is closed" % self.__class__.__name__)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001966 if mode is not None and self._mode not in mode:
Thomas Wouters477c8d52006-05-27 19:21:47 +00001967 raise IOError("bad operation for mode %r" % self._mode)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001968
1969 def __iter__(self):
1970 """Provide an iterator object.
1971 """
1972 if self._loaded:
1973 return iter(self.members)
1974 else:
1975 return TarIter(self)
1976
1977 def _create_gnulong(self, name, type):
1978 """Write a GNU longname/longlink member to the TarFile.
1979 It consists of an extended tar header, with the length
1980 of the longname as size, followed by data blocks,
1981 which contain the longname as a null terminated string.
1982 """
Neal Norwitz0662f8a2004-07-20 21:54:18 +00001983 name += NUL
1984
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001985 tarinfo = TarInfo()
1986 tarinfo.name = "././@LongLink"
1987 tarinfo.type = type
1988 tarinfo.mode = 0
1989 tarinfo.size = len(name)
1990
1991 # write extended header
1992 self.fileobj.write(tarinfo.tobuf())
Neal Norwitz0662f8a2004-07-20 21:54:18 +00001993 self.offset += BLOCKSIZE
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001994 # write name blocks
1995 self.fileobj.write(name)
1996 blocks, remainder = divmod(tarinfo.size, BLOCKSIZE)
1997 if remainder > 0:
1998 self.fileobj.write(NUL * (BLOCKSIZE - remainder))
1999 blocks += 1
2000 self.offset += blocks * BLOCKSIZE
2001
2002 def _dbg(self, level, msg):
2003 """Write debugging output to sys.stderr.
2004 """
2005 if level <= self.debug:
2006 print >> sys.stderr, msg
2007# class TarFile
2008
2009class TarIter:
2010 """Iterator Class.
2011
2012 for tarinfo in TarFile(...):
2013 suite...
2014 """
2015
2016 def __init__(self, tarfile):
2017 """Construct a TarIter object.
2018 """
2019 self.tarfile = tarfile
Martin v. Löwis637431b2005-03-03 23:12:42 +00002020 self.index = 0
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002021 def __iter__(self):
2022 """Return iterator object.
2023 """
2024 return self
2025 def next(self):
2026 """Return the next item using TarFile's next() method.
2027 When all members have been read, set TarFile as _loaded.
2028 """
Martin v. Löwis637431b2005-03-03 23:12:42 +00002029 # Fix for SF #1100429: Under rare circumstances it can
2030 # happen that getmembers() is called during iteration,
2031 # which will cause TarIter to stop prematurely.
2032 if not self.tarfile._loaded:
2033 tarinfo = self.tarfile.next()
2034 if not tarinfo:
2035 self.tarfile._loaded = True
2036 raise StopIteration
2037 else:
2038 try:
2039 tarinfo = self.tarfile.members[self.index]
2040 except IndexError:
2041 raise StopIteration
2042 self.index += 1
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002043 return tarinfo
2044
2045# Helper classes for sparse file support
2046class _section:
2047 """Base class for _data and _hole.
2048 """
2049 def __init__(self, offset, size):
2050 self.offset = offset
2051 self.size = size
2052 def __contains__(self, offset):
2053 return self.offset <= offset < self.offset + self.size
2054
2055class _data(_section):
2056 """Represent a data section in a sparse file.
2057 """
2058 def __init__(self, offset, size, realpos):
2059 _section.__init__(self, offset, size)
2060 self.realpos = realpos
2061
2062class _hole(_section):
2063 """Represent a hole section in a sparse file.
2064 """
2065 pass
2066
2067class _ringbuffer(list):
2068 """Ringbuffer class which increases performance
2069 over a regular list.
2070 """
2071 def __init__(self):
2072 self.idx = 0
2073 def find(self, offset):
2074 idx = self.idx
2075 while True:
2076 item = self[idx]
2077 if offset in item:
2078 break
2079 idx += 1
2080 if idx == len(self):
2081 idx = 0
2082 if idx == self.idx:
2083 # End of File
2084 return None
2085 self.idx = idx
2086 return item
2087
2088#---------------------------------------------
2089# zipfile compatible TarFile class
2090#---------------------------------------------
2091TAR_PLAIN = 0 # zipfile.ZIP_STORED
2092TAR_GZIPPED = 8 # zipfile.ZIP_DEFLATED
2093class TarFileCompat:
2094 """TarFile class compatible with standard module zipfile's
2095 ZipFile class.
2096 """
2097 def __init__(self, file, mode="r", compression=TAR_PLAIN):
2098 if compression == TAR_PLAIN:
2099 self.tarfile = TarFile.taropen(file, mode)
2100 elif compression == TAR_GZIPPED:
2101 self.tarfile = TarFile.gzopen(file, mode)
2102 else:
Thomas Wouters477c8d52006-05-27 19:21:47 +00002103 raise ValueError("unknown compression constant")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002104 if mode[0:1] == "r":
2105 members = self.tarfile.getmembers()
Raymond Hettingera1d09e22005-09-11 16:34:05 +00002106 for m in members:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002107 m.filename = m.name
2108 m.file_size = m.size
2109 m.date_time = time.gmtime(m.mtime)[:6]
2110 def namelist(self):
2111 return map(lambda m: m.name, self.infolist())
2112 def infolist(self):
2113 return filter(lambda m: m.type in REGULAR_TYPES,
2114 self.tarfile.getmembers())
2115 def printdir(self):
2116 self.tarfile.list()
2117 def testzip(self):
2118 return
2119 def getinfo(self, name):
2120 return self.tarfile.getmember(name)
2121 def read(self, name):
2122 return self.tarfile.extractfile(self.tarfile.getmember(name)).read()
2123 def write(self, filename, arcname=None, compress_type=None):
2124 self.tarfile.add(filename, arcname)
2125 def writestr(self, zinfo, bytes):
Raymond Hettingera6172712004-12-31 19:15:26 +00002126 try:
2127 from cStringIO import StringIO
2128 except ImportError:
2129 from StringIO import StringIO
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002130 import calendar
2131 zinfo.name = zinfo.filename
2132 zinfo.size = zinfo.file_size
2133 zinfo.mtime = calendar.timegm(zinfo.date_time)
Raymond Hettingera6172712004-12-31 19:15:26 +00002134 self.tarfile.addfile(zinfo, StringIO(bytes))
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002135 def close(self):
2136 self.tarfile.close()
2137#class TarFileCompat
2138
2139#--------------------
2140# exported functions
2141#--------------------
2142def is_tarfile(name):
2143 """Return True if name points to a tar archive that we
2144 are able to handle, else return False.
2145 """
2146 try:
2147 t = open(name)
2148 t.close()
2149 return True
2150 except TarError:
2151 return False
2152
2153open = TarFile.open