blob: 56cce0331c8754607eab75c78ad95935633ed024 [file] [log] [blame]
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001#!/usr/bin/env python
2# -*- coding: iso-8859-1 -*-
3#-------------------------------------------------------------------
4# tarfile.py
5#-------------------------------------------------------------------
6# Copyright (C) 2002 Lars Gustäbel <lars@gustaebel.de>
7# All rights reserved.
8#
9# Permission is hereby granted, free of charge, to any person
10# obtaining a copy of this software and associated documentation
11# files (the "Software"), to deal in the Software without
12# restriction, including without limitation the rights to use,
13# copy, modify, merge, publish, distribute, sublicense, and/or sell
14# copies of the Software, and to permit persons to whom the
15# Software is furnished to do so, subject to the following
16# conditions:
17#
18# The above copyright notice and this permission notice shall be
19# included in all copies or substantial portions of the Software.
20#
21# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
22# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
23# OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
24# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
25# HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
26# WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
27# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
28# OTHER DEALINGS IN THE SOFTWARE.
29#
30"""Read from and write to tar format archives.
31"""
32
33__version__ = "$Revision$"
34# $Source$
35
36version = "0.6.4"
37__author__ = "Lars Gustäbel (lars@gustaebel.de)"
38__date__ = "$Date$"
39__cvsid__ = "$Id$"
40__credits__ = "Gustavo Niemeyer, Niels Gustäbel, Richard Townsend."
41
42#---------
43# Imports
44#---------
45import sys
46import os
47import shutil
48import stat
49import errno
50import time
51import struct
52
Jack Jansencfc49022003-03-07 13:37:32 +000053if sys.platform == 'mac':
54 # This module needs work for MacOS9, especially in the area of pathname
55 # handling. In many places it is assumed a simple substitution of / by the
56 # local os.path.sep is good enough to convert pathnames, but this does not
57 # work with the mac rooted:path:name versus :nonrooted:path:name syntax
58 raise ImportError, "tarfile does not work for platform==mac"
59
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +000060try:
61 import grp, pwd
62except ImportError:
63 grp = pwd = None
64
65# from tarfile import *
66__all__ = ["TarFile", "TarInfo", "is_tarfile", "TarError"]
67
68#---------------------------------------------------------
69# tar constants
70#---------------------------------------------------------
71NUL = "\0" # the null character
72BLOCKSIZE = 512 # length of processing blocks
73RECORDSIZE = BLOCKSIZE * 20 # length of records
74MAGIC = "ustar" # magic tar string
75VERSION = "00" # version number
76
77LENGTH_NAME = 100 # maximum length of a filename
78LENGTH_LINK = 100 # maximum length of a linkname
79LENGTH_PREFIX = 155 # maximum length of the prefix field
80MAXSIZE_MEMBER = 077777777777L # maximum size of a file (11 octal digits)
81
82REGTYPE = "0" # regular file
83AREGTYPE = "\0" # regular file
84LNKTYPE = "1" # link (inside tarfile)
85SYMTYPE = "2" # symbolic link
86CHRTYPE = "3" # character special device
87BLKTYPE = "4" # block special device
88DIRTYPE = "5" # directory
89FIFOTYPE = "6" # fifo special device
90CONTTYPE = "7" # contiguous file
91
92GNUTYPE_LONGNAME = "L" # GNU tar extension for longnames
93GNUTYPE_LONGLINK = "K" # GNU tar extension for longlink
94GNUTYPE_SPARSE = "S" # GNU tar extension for sparse file
95
96#---------------------------------------------------------
97# tarfile constants
98#---------------------------------------------------------
99SUPPORTED_TYPES = (REGTYPE, AREGTYPE, LNKTYPE, # file types that tarfile
100 SYMTYPE, DIRTYPE, FIFOTYPE, # can cope with.
101 CONTTYPE, CHRTYPE, BLKTYPE,
102 GNUTYPE_LONGNAME, GNUTYPE_LONGLINK,
103 GNUTYPE_SPARSE)
104
105REGULAR_TYPES = (REGTYPE, AREGTYPE, # file types that somehow
106 CONTTYPE, GNUTYPE_SPARSE) # represent regular files
107
108#---------------------------------------------------------
109# Bits used in the mode field, values in octal.
110#---------------------------------------------------------
111S_IFLNK = 0120000 # symbolic link
112S_IFREG = 0100000 # regular file
113S_IFBLK = 0060000 # block device
114S_IFDIR = 0040000 # directory
115S_IFCHR = 0020000 # character device
116S_IFIFO = 0010000 # fifo
117
118TSUID = 04000 # set UID on execution
119TSGID = 02000 # set GID on execution
120TSVTX = 01000 # reserved
121
122TUREAD = 0400 # read by owner
123TUWRITE = 0200 # write by owner
124TUEXEC = 0100 # execute/search by owner
125TGREAD = 0040 # read by group
126TGWRITE = 0020 # write by group
127TGEXEC = 0010 # execute/search by group
128TOREAD = 0004 # read by other
129TOWRITE = 0002 # write by other
130TOEXEC = 0001 # execute/search by other
131
132#---------------------------------------------------------
133# Some useful functions
134#---------------------------------------------------------
135def nts(s):
136 """Convert a null-terminated string buffer to a python string.
137 """
Andrew M. Kuchling864bba12004-07-10 22:02:11 +0000138 return s.rstrip(NUL)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000139
140def calc_chksum(buf):
141 """Calculate the checksum for a member's header. It's a simple addition
142 of all bytes, treating the chksum field as if filled with spaces.
143 buf is a 512 byte long string buffer which holds the header.
144 """
145 chk = 256 # chksum field is treated as blanks,
146 # so the initial value is 8 * ord(" ")
147 for c in buf[:148]: chk += ord(c) # sum up all bytes before chksum
148 for c in buf[156:]: chk += ord(c) # sum up all bytes after chksum
149 return chk
150
151def copyfileobj(src, dst, length=None):
152 """Copy length bytes from fileobj src to fileobj dst.
153 If length is None, copy the entire content.
154 """
155 if length == 0:
156 return
157 if length is None:
158 shutil.copyfileobj(src, dst)
159 return
160
161 BUFSIZE = 16 * 1024
162 blocks, remainder = divmod(length, BUFSIZE)
163 for b in xrange(blocks):
164 buf = src.read(BUFSIZE)
165 if len(buf) < BUFSIZE:
166 raise IOError, "end of file reached"
167 dst.write(buf)
168
169 if remainder != 0:
170 buf = src.read(remainder)
171 if len(buf) < remainder:
172 raise IOError, "end of file reached"
173 dst.write(buf)
174 return
175
176filemode_table = (
Andrew M. Kuchling8bc462f2004-10-20 11:48:42 +0000177 ((S_IFLNK, "l"),
178 (S_IFREG, "-"),
179 (S_IFBLK, "b"),
180 (S_IFDIR, "d"),
181 (S_IFCHR, "c"),
182 (S_IFIFO, "p")),
183
184 ((TUREAD, "r"),),
185 ((TUWRITE, "w"),),
186 ((TUEXEC|TSUID, "s"),
187 (TSUID, "S"),
188 (TUEXEC, "x")),
189
190 ((TGREAD, "r"),),
191 ((TGWRITE, "w"),),
192 ((TGEXEC|TSGID, "s"),
193 (TSGID, "S"),
194 (TGEXEC, "x")),
195
196 ((TOREAD, "r"),),
197 ((TOWRITE, "w"),),
198 ((TOEXEC|TSVTX, "t"),
199 (TSVTX, "T"),
200 (TOEXEC, "x"))
201)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000202
203def filemode(mode):
204 """Convert a file's mode to a string of the form
205 -rwxrwxrwx.
206 Used by TarFile.list()
207 """
Andrew M. Kuchling8bc462f2004-10-20 11:48:42 +0000208 perm = []
209 for table in filemode_table:
210 for bit, char in table:
211 if mode & bit == bit:
212 perm.append(char)
213 break
214 else:
215 perm.append("-")
216 return "".join(perm)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000217
218if os.sep != "/":
219 normpath = lambda path: os.path.normpath(path).replace(os.sep, "/")
220else:
221 normpath = os.path.normpath
222
223class TarError(Exception):
224 """Base exception."""
225 pass
226class ExtractError(TarError):
227 """General exception for extract errors."""
228 pass
229class ReadError(TarError):
230 """Exception for unreadble tar archives."""
231 pass
232class CompressionError(TarError):
233 """Exception for unavailable compression methods."""
234 pass
235class StreamError(TarError):
236 """Exception for unsupported operations on stream-like TarFiles."""
237 pass
238
239#---------------------------
240# internal stream interface
241#---------------------------
242class _LowLevelFile:
243 """Low-level file object. Supports reading and writing.
244 It is used instead of a regular file object for streaming
245 access.
246 """
247
248 def __init__(self, name, mode):
249 mode = {
250 "r": os.O_RDONLY,
251 "w": os.O_WRONLY | os.O_CREAT | os.O_TRUNC,
252 }[mode]
253 if hasattr(os, "O_BINARY"):
254 mode |= os.O_BINARY
255 self.fd = os.open(name, mode)
256
257 def close(self):
258 os.close(self.fd)
259
260 def read(self, size):
261 return os.read(self.fd, size)
262
263 def write(self, s):
264 os.write(self.fd, s)
265
266class _Stream:
267 """Class that serves as an adapter between TarFile and
268 a stream-like object. The stream-like object only
269 needs to have a read() or write() method and is accessed
270 blockwise. Use of gzip or bzip2 compression is possible.
271 A stream-like object could be for example: sys.stdin,
272 sys.stdout, a socket, a tape device etc.
273
274 _Stream is intended to be used only internally.
275 """
276
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000277 def __init__(self, name, mode, comptype, fileobj, bufsize):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000278 """Construct a _Stream object.
279 """
280 self._extfileobj = True
281 if fileobj is None:
282 fileobj = _LowLevelFile(name, mode)
283 self._extfileobj = False
284
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000285 if comptype == '*':
286 # Enable transparent compression detection for the
287 # stream interface
288 fileobj = _StreamProxy(fileobj)
289 comptype = fileobj.getcomptype()
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000290
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000291 self.name = name or ""
292 self.mode = mode
293 self.comptype = comptype
294 self.fileobj = fileobj
295 self.bufsize = bufsize
296 self.buf = ""
297 self.pos = 0L
298 self.closed = False
299
300 if comptype == "gz":
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000301 try:
302 import zlib
303 except ImportError:
304 raise CompressionError, "zlib module is not available"
305 self.zlib = zlib
306 self.crc = zlib.crc32("")
307 if mode == "r":
308 self._init_read_gz()
309 else:
310 self._init_write_gz()
311
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000312 if comptype == "bz2":
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000313 try:
314 import bz2
315 except ImportError:
316 raise CompressionError, "bz2 module is not available"
317 if mode == "r":
318 self.dbuf = ""
319 self.cmp = bz2.BZ2Decompressor()
320 else:
321 self.cmp = bz2.BZ2Compressor()
322
323 def __del__(self):
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000324 if hasattr(self, "closed") and not self.closed:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000325 self.close()
326
327 def _init_write_gz(self):
328 """Initialize for writing with gzip compression.
329 """
330 self.cmp = self.zlib.compressobj(9, self.zlib.DEFLATED,
331 -self.zlib.MAX_WBITS,
332 self.zlib.DEF_MEM_LEVEL,
333 0)
334 timestamp = struct.pack("<L", long(time.time()))
335 self.__write("\037\213\010\010%s\002\377" % timestamp)
336 if self.name.endswith(".gz"):
337 self.name = self.name[:-3]
338 self.__write(self.name + NUL)
339
340 def write(self, s):
341 """Write string s to the stream.
342 """
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000343 if self.comptype == "gz":
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000344 self.crc = self.zlib.crc32(s, self.crc)
345 self.pos += len(s)
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000346 if self.comptype != "tar":
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000347 s = self.cmp.compress(s)
348 self.__write(s)
349
350 def __write(self, s):
351 """Write string s to the stream if a whole new block
352 is ready to be written.
353 """
354 self.buf += s
355 while len(self.buf) > self.bufsize:
356 self.fileobj.write(self.buf[:self.bufsize])
357 self.buf = self.buf[self.bufsize:]
358
359 def close(self):
360 """Close the _Stream object. No operation should be
361 done on it afterwards.
362 """
363 if self.closed:
364 return
365
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000366 if self.mode == "w" and self.comptype != "tar":
Martin v. Löwisc234a522004-08-22 21:28:33 +0000367 self.buf += self.cmp.flush()
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000368
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000369 if self.mode == "w" and self.buf:
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000370 blocks, remainder = divmod(len(self.buf), self.bufsize)
371 if remainder > 0:
372 self.buf += NUL * (self.bufsize - remainder)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000373 self.fileobj.write(self.buf)
374 self.buf = ""
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000375 if self.comptype == "gz":
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000376 self.fileobj.write(struct.pack("<l", self.crc))
Andrew M. Kuchling10a44492003-10-24 17:38:34 +0000377 self.fileobj.write(struct.pack("<L", self.pos & 0xffffFFFFL))
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000378
379 if not self._extfileobj:
380 self.fileobj.close()
381
382 self.closed = True
383
384 def _init_read_gz(self):
385 """Initialize for reading a gzip compressed fileobj.
386 """
387 self.cmp = self.zlib.decompressobj(-self.zlib.MAX_WBITS)
388 self.dbuf = ""
389
390 # taken from gzip.GzipFile with some alterations
391 if self.__read(2) != "\037\213":
392 raise ReadError, "not a gzip file"
393 if self.__read(1) != "\010":
394 raise CompressionError, "unsupported compression method"
395
396 flag = ord(self.__read(1))
397 self.__read(6)
398
399 if flag & 4:
400 xlen = ord(self.__read(1)) + 256 * ord(self.__read(1))
401 self.read(xlen)
402 if flag & 8:
403 while True:
404 s = self.__read(1)
405 if not s or s == NUL:
406 break
407 if flag & 16:
408 while True:
409 s = self.__read(1)
410 if not s or s == NUL:
411 break
412 if flag & 2:
413 self.__read(2)
414
415 def tell(self):
416 """Return the stream's file pointer position.
417 """
418 return self.pos
419
420 def seek(self, pos=0):
421 """Set the stream's file pointer to pos. Negative seeking
422 is forbidden.
423 """
424 if pos - self.pos >= 0:
425 blocks, remainder = divmod(pos - self.pos, self.bufsize)
426 for i in xrange(blocks):
427 self.read(self.bufsize)
428 self.read(remainder)
429 else:
430 raise StreamError, "seeking backwards is not allowed"
431 return self.pos
432
433 def read(self, size=None):
434 """Return the next size number of bytes from the stream.
435 If size is not defined, return all bytes of the stream
436 up to EOF.
437 """
438 if size is None:
439 t = []
440 while True:
441 buf = self._read(self.bufsize)
442 if not buf:
443 break
444 t.append(buf)
445 buf = "".join(t)
446 else:
447 buf = self._read(size)
448 self.pos += len(buf)
449 return buf
450
451 def _read(self, size):
452 """Return size bytes from the stream.
453 """
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000454 if self.comptype == "tar":
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000455 return self.__read(size)
456
457 c = len(self.dbuf)
458 t = [self.dbuf]
459 while c < size:
460 buf = self.__read(self.bufsize)
461 if not buf:
462 break
463 buf = self.cmp.decompress(buf)
464 t.append(buf)
465 c += len(buf)
466 t = "".join(t)
467 self.dbuf = t[size:]
468 return t[:size]
469
470 def __read(self, size):
471 """Return size bytes from stream. If internal buffer is empty,
472 read another block from the stream.
473 """
474 c = len(self.buf)
475 t = [self.buf]
476 while c < size:
477 buf = self.fileobj.read(self.bufsize)
478 if not buf:
479 break
480 t.append(buf)
481 c += len(buf)
482 t = "".join(t)
483 self.buf = t[size:]
484 return t[:size]
485# class _Stream
486
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000487class _StreamProxy(object):
488 """Small proxy class that enables transparent compression
489 detection for the Stream interface (mode 'r|*').
490 """
491
492 def __init__(self, fileobj):
493 self.fileobj = fileobj
494 self.buf = self.fileobj.read(BLOCKSIZE)
495
496 def read(self, size):
497 self.read = self.fileobj.read
498 return self.buf
499
500 def getcomptype(self):
501 if self.buf.startswith("\037\213\010"):
502 return "gz"
503 if self.buf.startswith("BZh91"):
504 return "bz2"
505 return "tar"
506
507 def close(self):
508 self.fileobj.close()
509# class StreamProxy
510
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000511#------------------------
512# Extraction file object
513#------------------------
514class ExFileObject(object):
515 """File-like object for reading an archive member.
516 Is returned by TarFile.extractfile(). Support for
517 sparse files included.
518 """
519
520 def __init__(self, tarfile, tarinfo):
521 self.fileobj = tarfile.fileobj
522 self.name = tarinfo.name
523 self.mode = "r"
524 self.closed = False
525 self.offset = tarinfo.offset_data
526 self.size = tarinfo.size
527 self.pos = 0L
528 self.linebuffer = ""
529 if tarinfo.issparse():
530 self.sparse = tarinfo.sparse
531 self.read = self._readsparse
532 else:
533 self.read = self._readnormal
534
535 def __read(self, size):
536 """Overloadable read method.
537 """
538 return self.fileobj.read(size)
539
540 def readline(self, size=-1):
541 """Read a line with approx. size. If size is negative,
542 read a whole line. readline() and read() must not
543 be mixed up (!).
544 """
545 if size < 0:
546 size = sys.maxint
547
548 nl = self.linebuffer.find("\n")
549 if nl >= 0:
550 nl = min(nl, size)
551 else:
552 size -= len(self.linebuffer)
Martin v. Löwisc11d6f12004-08-25 10:52:58 +0000553 while (nl < 0 and size > 0):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000554 buf = self.read(min(size, 100))
555 if not buf:
556 break
557 self.linebuffer += buf
558 size -= len(buf)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000559 nl = self.linebuffer.find("\n")
560 if nl == -1:
561 s = self.linebuffer
562 self.linebuffer = ""
563 return s
564 buf = self.linebuffer[:nl]
565 self.linebuffer = self.linebuffer[nl + 1:]
566 while buf[-1:] == "\r":
567 buf = buf[:-1]
568 return buf + "\n"
569
570 def readlines(self):
571 """Return a list with all (following) lines.
572 """
573 result = []
574 while True:
575 line = self.readline()
576 if not line: break
577 result.append(line)
578 return result
579
580 def _readnormal(self, size=None):
581 """Read operation for regular files.
582 """
583 if self.closed:
584 raise ValueError, "file is closed"
585 self.fileobj.seek(self.offset + self.pos)
586 bytesleft = self.size - self.pos
587 if size is None:
588 bytestoread = bytesleft
589 else:
590 bytestoread = min(size, bytesleft)
591 self.pos += bytestoread
592 return self.__read(bytestoread)
593
594 def _readsparse(self, size=None):
595 """Read operation for sparse files.
596 """
597 if self.closed:
598 raise ValueError, "file is closed"
599
600 if size is None:
601 size = self.size - self.pos
602
603 data = []
604 while size > 0:
605 buf = self._readsparsesection(size)
606 if not buf:
607 break
608 size -= len(buf)
609 data.append(buf)
610 return "".join(data)
611
612 def _readsparsesection(self, size):
613 """Read a single section of a sparse file.
614 """
615 section = self.sparse.find(self.pos)
616
617 if section is None:
618 return ""
619
620 toread = min(size, section.offset + section.size - self.pos)
621 if isinstance(section, _data):
622 realpos = section.realpos + self.pos - section.offset
623 self.pos += toread
624 self.fileobj.seek(self.offset + realpos)
625 return self.__read(toread)
626 else:
627 self.pos += toread
628 return NUL * toread
629
630 def tell(self):
631 """Return the current file position.
632 """
633 return self.pos
634
635 def seek(self, pos, whence=0):
636 """Seek to a position in the file.
637 """
638 self.linebuffer = ""
639 if whence == 0:
640 self.pos = min(max(pos, 0), self.size)
641 if whence == 1:
642 if pos < 0:
643 self.pos = max(self.pos + pos, 0)
644 else:
645 self.pos = min(self.pos + pos, self.size)
646 if whence == 2:
647 self.pos = max(min(self.size + pos, self.size), 0)
648
649 def close(self):
650 """Close the file object.
651 """
652 self.closed = True
Martin v. Löwisdf241532005-03-03 08:17:42 +0000653
654 def __iter__(self):
655 """Get an iterator over the file object.
656 """
657 if self.closed:
658 raise ValueError("I/O operation on closed file")
659 return self
660
661 def next(self):
662 """Get the next item from the file iterator.
663 """
664 result = self.readline()
665 if not result:
666 raise StopIteration
667 return result
668
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000669#class ExFileObject
670
671#------------------
672# Exported Classes
673#------------------
674class TarInfo(object):
675 """Informational class which holds the details about an
676 archive member given by a tar header block.
677 TarInfo objects are returned by TarFile.getmember(),
678 TarFile.getmembers() and TarFile.gettarinfo() and are
679 usually created internally.
680 """
681
682 def __init__(self, name=""):
683 """Construct a TarInfo object. name is the optional name
684 of the member.
685 """
686
687 self.name = name # member name (dirnames must end with '/')
688 self.mode = 0666 # file permissions
689 self.uid = 0 # user id
690 self.gid = 0 # group id
691 self.size = 0 # file size
692 self.mtime = 0 # modification time
693 self.chksum = 0 # header checksum
694 self.type = REGTYPE # member type
695 self.linkname = "" # link name
696 self.uname = "user" # user name
697 self.gname = "group" # group name
698 self.devmajor = 0 #-
699 self.devminor = 0 #-for use with CHRTYPE and BLKTYPE
700 self.prefix = "" # prefix to filename or holding information
701 # about sparse files
702
703 self.offset = 0 # the tar header starts here
704 self.offset_data = 0 # the file's data starts here
705
706 def __repr__(self):
707 return "<%s %r at %#x>" % (self.__class__.__name__,self.name,id(self))
708
Guido van Rossum75b64e62005-01-16 00:16:11 +0000709 @classmethod
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000710 def frombuf(cls, buf):
711 """Construct a TarInfo object from a 512 byte string buffer.
712 """
713 tarinfo = cls()
Neal Norwitzd96d1012004-07-20 22:23:02 +0000714 tarinfo.name = nts(buf[0:100])
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000715 tarinfo.mode = int(buf[100:108], 8)
716 tarinfo.uid = int(buf[108:116],8)
717 tarinfo.gid = int(buf[116:124],8)
Neal Norwitzd96d1012004-07-20 22:23:02 +0000718
719 # There are two possible codings for the size field we
720 # have to discriminate, see comment in tobuf() below.
721 if buf[124] != chr(0200):
722 tarinfo.size = long(buf[124:136], 8)
723 else:
724 tarinfo.size = 0L
725 for i in range(11):
726 tarinfo.size <<= 8
727 tarinfo.size += ord(buf[125 + i])
728
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000729 tarinfo.mtime = long(buf[136:148], 8)
730 tarinfo.chksum = int(buf[148:156], 8)
731 tarinfo.type = buf[156:157]
732 tarinfo.linkname = nts(buf[157:257])
733 tarinfo.uname = nts(buf[265:297])
734 tarinfo.gname = nts(buf[297:329])
735 try:
736 tarinfo.devmajor = int(buf[329:337], 8)
737 tarinfo.devminor = int(buf[337:345], 8)
738 except ValueError:
739 tarinfo.devmajor = tarinfo.devmajor = 0
Jack Jansen2b4b5a52003-04-22 22:03:11 +0000740 tarinfo.prefix = buf[345:500]
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000741
742 # The prefix field is used for filenames > 100 in
743 # the POSIX standard.
Jack Jansen2b4b5a52003-04-22 22:03:11 +0000744 # name = prefix + '/' + name
745 if tarinfo.type != GNUTYPE_SPARSE:
746 tarinfo.name = normpath(os.path.join(nts(tarinfo.prefix), tarinfo.name))
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000747
748 # Directory names should have a '/' at the end.
749 if tarinfo.isdir() and tarinfo.name[-1:] != "/":
750 tarinfo.name += "/"
751 return tarinfo
752
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000753 def tobuf(self):
754 """Return a tar header block as a 512 byte string.
755 """
Neal Norwitzd96d1012004-07-20 22:23:02 +0000756 # Prefer the size to be encoded as 11 octal ascii digits
757 # which is the most portable. If the size exceeds this
758 # limit (>= 8 GB), encode it as an 88-bit value which is
759 # a GNU tar feature.
760 if self.size <= MAXSIZE_MEMBER:
761 size = "%011o" % self.size
762 else:
763 s = self.size
764 size = ""
765 for i in range(11):
766 size = chr(s & 0377) + size
767 s >>= 8
768 size = chr(0200) + size
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000769
770 # The following code was contributed by Detlef Lannert.
771 parts = []
772 for value, fieldsize in (
Neal Norwitzd96d1012004-07-20 22:23:02 +0000773 (self.name, 100),
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000774 ("%07o" % (self.mode & 07777), 8),
775 ("%07o" % self.uid, 8),
776 ("%07o" % self.gid, 8),
Neal Norwitzd96d1012004-07-20 22:23:02 +0000777 (size, 12),
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000778 ("%011o" % self.mtime, 12),
779 (" ", 8),
780 (self.type, 1),
781 (self.linkname, 100),
782 (MAGIC, 6),
783 (VERSION, 2),
784 (self.uname, 32),
785 (self.gname, 32),
786 ("%07o" % self.devmajor, 8),
787 ("%07o" % self.devminor, 8),
788 (self.prefix, 155)
789 ):
790 l = len(value)
Andrew M. Kuchling864bba12004-07-10 22:02:11 +0000791 parts.append(value[:fieldsize] + (fieldsize - l) * NUL)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000792
793 buf = "".join(parts)
794 chksum = calc_chksum(buf)
795 buf = buf[:148] + "%06o\0" % chksum + buf[155:]
796 buf += (BLOCKSIZE - len(buf)) * NUL
797 self.buf = buf
798 return buf
799
800 def isreg(self):
801 return self.type in REGULAR_TYPES
802 def isfile(self):
803 return self.isreg()
804 def isdir(self):
805 return self.type == DIRTYPE
806 def issym(self):
807 return self.type == SYMTYPE
808 def islnk(self):
809 return self.type == LNKTYPE
810 def ischr(self):
811 return self.type == CHRTYPE
812 def isblk(self):
813 return self.type == BLKTYPE
814 def isfifo(self):
815 return self.type == FIFOTYPE
816 def issparse(self):
817 return self.type == GNUTYPE_SPARSE
818 def isdev(self):
819 return self.type in (CHRTYPE, BLKTYPE, FIFOTYPE)
820# class TarInfo
821
822class TarFile(object):
823 """The TarFile Class provides an interface to tar archives.
824 """
825
826 debug = 0 # May be set from 0 (no msgs) to 3 (all msgs)
827
828 dereference = False # If true, add content of linked file to the
829 # tar file, else the link.
830
831 ignore_zeros = False # If true, skips empty or invalid blocks and
832 # continues processing.
833
834 errorlevel = 0 # If 0, fatal errors only appear in debug
835 # messages (if debug >= 0). If > 0, errors
836 # are passed to the caller as exceptions.
837
Martin v. Löwis75b9da42004-08-18 13:57:44 +0000838 posix = False # If True, generates POSIX.1-1990-compliant
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000839 # archives (no GNU extensions!)
840
841 fileobject = ExFileObject
842
843 def __init__(self, name=None, mode="r", fileobj=None):
844 """Open an (uncompressed) tar archive `name'. `mode' is either 'r' to
845 read from an existing archive, 'a' to append data to an existing
846 file or 'w' to create a new file overwriting an existing one. `mode'
847 defaults to 'r'.
848 If `fileobj' is given, it is used for reading or writing data. If it
849 can be determined, `mode' is overridden by `fileobj's mode.
850 `fileobj' is not closed, when TarFile is closed.
851 """
852 self.name = name
853
854 if len(mode) > 1 or mode not in "raw":
855 raise ValueError, "mode must be 'r', 'a' or 'w'"
856 self._mode = mode
857 self.mode = {"r": "rb", "a": "r+b", "w": "wb"}[mode]
858
859 if not fileobj:
860 fileobj = file(self.name, self.mode)
861 self._extfileobj = False
862 else:
863 if self.name is None and hasattr(fileobj, "name"):
864 self.name = fileobj.name
865 if hasattr(fileobj, "mode"):
866 self.mode = fileobj.mode
867 self._extfileobj = True
868 self.fileobj = fileobj
869
870 # Init datastructures
871 self.closed = False
872 self.members = [] # list of members as TarInfo objects
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000873 self._loaded = False # flag if all members have been read
874 self.offset = 0L # current position in the archive file
875 self.inodes = {} # dictionary caching the inodes of
876 # archive members already added
877
878 if self._mode == "r":
879 self.firstmember = None
880 self.firstmember = self.next()
881
882 if self._mode == "a":
883 # Move to the end of the archive,
884 # before the first empty block.
885 self.firstmember = None
886 while True:
887 try:
888 tarinfo = self.next()
889 except ReadError:
890 self.fileobj.seek(0)
891 break
892 if tarinfo is None:
893 self.fileobj.seek(- BLOCKSIZE, 1)
894 break
895
896 if self._mode in "aw":
897 self._loaded = True
898
899 #--------------------------------------------------------------------------
900 # Below are the classmethods which act as alternate constructors to the
901 # TarFile class. The open() method is the only one that is needed for
902 # public use; it is the "super"-constructor and is able to select an
903 # adequate "sub"-constructor for a particular compression using the mapping
904 # from OPEN_METH.
905 #
906 # This concept allows one to subclass TarFile without losing the comfort of
907 # the super-constructor. A sub-constructor is registered and made available
908 # by adding it to the mapping in OPEN_METH.
909
Guido van Rossum75b64e62005-01-16 00:16:11 +0000910 @classmethod
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000911 def open(cls, name=None, mode="r", fileobj=None, bufsize=20*512):
912 """Open a tar archive for reading, writing or appending. Return
913 an appropriate TarFile class.
914
915 mode:
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000916 'r' or 'r:*' open for reading with transparent compression
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000917 'r:' open for reading exclusively uncompressed
918 'r:gz' open for reading with gzip compression
919 'r:bz2' open for reading with bzip2 compression
920 'a' or 'a:' open for appending
921 'w' or 'w:' open for writing without compression
922 'w:gz' open for writing with gzip compression
923 'w:bz2' open for writing with bzip2 compression
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000924
925 'r|*' open a stream of tar blocks with transparent compression
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000926 'r|' open an uncompressed stream of tar blocks for reading
927 'r|gz' open a gzip compressed stream of tar blocks
928 'r|bz2' open a bzip2 compressed stream of tar blocks
929 'w|' open an uncompressed stream for writing
930 'w|gz' open a gzip compressed stream for writing
931 'w|bz2' open a bzip2 compressed stream for writing
932 """
933
934 if not name and not fileobj:
935 raise ValueError, "nothing to open"
936
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000937 if mode in ("r", "r:*"):
938 # Find out which *open() is appropriate for opening the file.
939 for comptype in cls.OPEN_METH:
940 func = getattr(cls, cls.OPEN_METH[comptype])
941 try:
942 return func(name, "r", fileobj)
943 except (ReadError, CompressionError):
944 continue
945 raise ReadError, "file could not be opened successfully"
946
947 elif ":" in mode:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000948 filemode, comptype = mode.split(":", 1)
949 filemode = filemode or "r"
950 comptype = comptype or "tar"
951
952 # Select the *open() function according to
953 # given compression.
954 if comptype in cls.OPEN_METH:
955 func = getattr(cls, cls.OPEN_METH[comptype])
956 else:
957 raise CompressionError, "unknown compression type %r" % comptype
958 return func(name, filemode, fileobj)
959
960 elif "|" in mode:
961 filemode, comptype = mode.split("|", 1)
962 filemode = filemode or "r"
963 comptype = comptype or "tar"
964
965 if filemode not in "rw":
966 raise ValueError, "mode must be 'r' or 'w'"
967
968 t = cls(name, filemode,
969 _Stream(name, filemode, comptype, fileobj, bufsize))
970 t._extfileobj = False
971 return t
972
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000973 elif mode in "aw":
974 return cls.taropen(name, mode, fileobj)
975
976 raise ValueError, "undiscernible mode"
977
Guido van Rossum75b64e62005-01-16 00:16:11 +0000978 @classmethod
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000979 def taropen(cls, name, mode="r", fileobj=None):
980 """Open uncompressed tar archive name for reading or writing.
981 """
982 if len(mode) > 1 or mode not in "raw":
983 raise ValueError, "mode must be 'r', 'a' or 'w'"
984 return cls(name, mode, fileobj)
985
Guido van Rossum75b64e62005-01-16 00:16:11 +0000986 @classmethod
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000987 def gzopen(cls, name, mode="r", fileobj=None, compresslevel=9):
988 """Open gzip compressed tar archive name for reading or writing.
989 Appending is not allowed.
990 """
991 if len(mode) > 1 or mode not in "rw":
992 raise ValueError, "mode must be 'r' or 'w'"
993
994 try:
995 import gzip
Neal Norwitz4ec68242003-04-11 03:05:56 +0000996 gzip.GzipFile
997 except (ImportError, AttributeError):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000998 raise CompressionError, "gzip module is not available"
999
1000 pre, ext = os.path.splitext(name)
1001 pre = os.path.basename(pre)
1002 if ext == ".tgz":
1003 ext = ".tar"
1004 if ext == ".gz":
1005 ext = ""
1006 tarname = pre + ext
1007
1008 if fileobj is None:
1009 fileobj = file(name, mode + "b")
1010
1011 if mode != "r":
1012 name = tarname
1013
1014 try:
1015 t = cls.taropen(tarname, mode,
1016 gzip.GzipFile(name, mode, compresslevel, fileobj)
1017 )
1018 except IOError:
1019 raise ReadError, "not a gzip file"
1020 t._extfileobj = False
1021 return t
1022
Guido van Rossum75b64e62005-01-16 00:16:11 +00001023 @classmethod
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001024 def bz2open(cls, name, mode="r", fileobj=None, compresslevel=9):
1025 """Open bzip2 compressed tar archive name for reading or writing.
1026 Appending is not allowed.
1027 """
1028 if len(mode) > 1 or mode not in "rw":
1029 raise ValueError, "mode must be 'r' or 'w'."
1030
1031 try:
1032 import bz2
1033 except ImportError:
1034 raise CompressionError, "bz2 module is not available"
1035
1036 pre, ext = os.path.splitext(name)
1037 pre = os.path.basename(pre)
1038 if ext == ".tbz2":
1039 ext = ".tar"
1040 if ext == ".bz2":
1041 ext = ""
1042 tarname = pre + ext
1043
1044 if fileobj is not None:
1045 raise ValueError, "no support for external file objects"
1046
1047 try:
1048 t = cls.taropen(tarname, mode, bz2.BZ2File(name, mode, compresslevel=compresslevel))
1049 except IOError:
1050 raise ReadError, "not a bzip2 file"
1051 t._extfileobj = False
1052 return t
1053
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001054 # All *open() methods are registered here.
1055 OPEN_METH = {
1056 "tar": "taropen", # uncompressed tar
1057 "gz": "gzopen", # gzip compressed tar
1058 "bz2": "bz2open" # bzip2 compressed tar
1059 }
1060
1061 #--------------------------------------------------------------------------
1062 # The public methods which TarFile provides:
1063
1064 def close(self):
1065 """Close the TarFile. In write-mode, two finishing zero blocks are
1066 appended to the archive.
1067 """
1068 if self.closed:
1069 return
1070
1071 if self._mode in "aw":
1072 self.fileobj.write(NUL * (BLOCKSIZE * 2))
1073 self.offset += (BLOCKSIZE * 2)
1074 # fill up the end with zero-blocks
1075 # (like option -b20 for tar does)
1076 blocks, remainder = divmod(self.offset, RECORDSIZE)
1077 if remainder > 0:
1078 self.fileobj.write(NUL * (RECORDSIZE - remainder))
1079
1080 if not self._extfileobj:
1081 self.fileobj.close()
1082 self.closed = True
1083
1084 def getmember(self, name):
1085 """Return a TarInfo object for member `name'. If `name' can not be
1086 found in the archive, KeyError is raised. If a member occurs more
1087 than once in the archive, its last occurence is assumed to be the
1088 most up-to-date version.
1089 """
Martin v. Löwisf3c56112004-09-18 09:08:52 +00001090 tarinfo = self._getmember(name)
1091 if tarinfo is None:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001092 raise KeyError, "filename %r not found" % name
Martin v. Löwisf3c56112004-09-18 09:08:52 +00001093 return tarinfo
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001094
1095 def getmembers(self):
1096 """Return the members of the archive as a list of TarInfo objects. The
1097 list has the same order as the members in the archive.
1098 """
1099 self._check()
1100 if not self._loaded: # if we want to obtain a list of
1101 self._load() # all members, we first have to
1102 # scan the whole archive.
1103 return self.members
1104
1105 def getnames(self):
1106 """Return the members of the archive as a list of their names. It has
1107 the same order as the list returned by getmembers().
1108 """
Martin v. Löwisf3c56112004-09-18 09:08:52 +00001109 return [tarinfo.name for tarinfo in self.getmembers()]
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001110
1111 def gettarinfo(self, name=None, arcname=None, fileobj=None):
1112 """Create a TarInfo object for either the file `name' or the file
1113 object `fileobj' (using os.fstat on its file descriptor). You can
1114 modify some of the TarInfo's attributes before you add it using
1115 addfile(). If given, `arcname' specifies an alternative name for the
1116 file in the archive.
1117 """
1118 self._check("aw")
1119
1120 # When fileobj is given, replace name by
1121 # fileobj's real name.
1122 if fileobj is not None:
1123 name = fileobj.name
1124
1125 # Building the name of the member in the archive.
1126 # Backward slashes are converted to forward slashes,
1127 # Absolute paths are turned to relative paths.
1128 if arcname is None:
1129 arcname = name
1130 arcname = normpath(arcname)
1131 drv, arcname = os.path.splitdrive(arcname)
1132 while arcname[0:1] == "/":
1133 arcname = arcname[1:]
1134
1135 # Now, fill the TarInfo object with
1136 # information specific for the file.
1137 tarinfo = TarInfo()
1138
1139 # Use os.stat or os.lstat, depending on platform
1140 # and if symlinks shall be resolved.
1141 if fileobj is None:
1142 if hasattr(os, "lstat") and not self.dereference:
1143 statres = os.lstat(name)
1144 else:
1145 statres = os.stat(name)
1146 else:
1147 statres = os.fstat(fileobj.fileno())
1148 linkname = ""
1149
1150 stmd = statres.st_mode
1151 if stat.S_ISREG(stmd):
1152 inode = (statres.st_ino, statres.st_dev)
1153 if inode in self.inodes and not self.dereference:
1154 # Is it a hardlink to an already
1155 # archived file?
1156 type = LNKTYPE
1157 linkname = self.inodes[inode]
1158 else:
1159 # The inode is added only if its valid.
1160 # For win32 it is always 0.
1161 type = REGTYPE
1162 if inode[0]:
1163 self.inodes[inode] = arcname
1164 elif stat.S_ISDIR(stmd):
1165 type = DIRTYPE
1166 if arcname[-1:] != "/":
1167 arcname += "/"
1168 elif stat.S_ISFIFO(stmd):
1169 type = FIFOTYPE
1170 elif stat.S_ISLNK(stmd):
1171 type = SYMTYPE
1172 linkname = os.readlink(name)
1173 elif stat.S_ISCHR(stmd):
1174 type = CHRTYPE
1175 elif stat.S_ISBLK(stmd):
1176 type = BLKTYPE
1177 else:
1178 return None
1179
1180 # Fill the TarInfo object with all
1181 # information we can get.
1182 tarinfo.name = arcname
1183 tarinfo.mode = stmd
1184 tarinfo.uid = statres.st_uid
1185 tarinfo.gid = statres.st_gid
Martin v. Löwis61d77e02004-08-20 06:35:46 +00001186 if stat.S_ISDIR(stmd):
1187 # For a directory, the size must be 0
1188 tarinfo.size = 0
1189 else:
1190 tarinfo.size = statres.st_size
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001191 tarinfo.mtime = statres.st_mtime
1192 tarinfo.type = type
1193 tarinfo.linkname = linkname
1194 if pwd:
1195 try:
1196 tarinfo.uname = pwd.getpwuid(tarinfo.uid)[0]
1197 except KeyError:
1198 pass
1199 if grp:
1200 try:
1201 tarinfo.gname = grp.getgrgid(tarinfo.gid)[0]
1202 except KeyError:
1203 pass
1204
1205 if type in (CHRTYPE, BLKTYPE):
1206 if hasattr(os, "major") and hasattr(os, "minor"):
1207 tarinfo.devmajor = os.major(statres.st_rdev)
1208 tarinfo.devminor = os.minor(statres.st_rdev)
1209 return tarinfo
1210
1211 def list(self, verbose=True):
1212 """Print a table of contents to sys.stdout. If `verbose' is False, only
1213 the names of the members are printed. If it is True, an `ls -l'-like
1214 output is produced.
1215 """
1216 self._check()
1217
1218 for tarinfo in self:
1219 if verbose:
1220 print filemode(tarinfo.mode),
1221 print "%s/%s" % (tarinfo.uname or tarinfo.uid,
1222 tarinfo.gname or tarinfo.gid),
1223 if tarinfo.ischr() or tarinfo.isblk():
1224 print "%10s" % ("%d,%d" \
1225 % (tarinfo.devmajor, tarinfo.devminor)),
1226 else:
1227 print "%10d" % tarinfo.size,
1228 print "%d-%02d-%02d %02d:%02d:%02d" \
1229 % time.localtime(tarinfo.mtime)[:6],
1230
1231 print tarinfo.name,
1232
1233 if verbose:
1234 if tarinfo.issym():
1235 print "->", tarinfo.linkname,
1236 if tarinfo.islnk():
1237 print "link to", tarinfo.linkname,
1238 print
1239
1240 def add(self, name, arcname=None, recursive=True):
1241 """Add the file `name' to the archive. `name' may be any type of file
1242 (directory, fifo, symbolic link, etc.). If given, `arcname'
1243 specifies an alternative name for the file in the archive.
1244 Directories are added recursively by default. This can be avoided by
1245 setting `recursive' to False.
1246 """
1247 self._check("aw")
1248
1249 if arcname is None:
1250 arcname = name
1251
1252 # Skip if somebody tries to archive the archive...
1253 if self.name is not None \
1254 and os.path.abspath(name) == os.path.abspath(self.name):
1255 self._dbg(2, "tarfile: Skipped %r" % name)
1256 return
1257
1258 # Special case: The user wants to add the current
1259 # working directory.
1260 if name == ".":
1261 if recursive:
1262 if arcname == ".":
1263 arcname = ""
1264 for f in os.listdir("."):
1265 self.add(f, os.path.join(arcname, f))
1266 return
1267
1268 self._dbg(1, name)
1269
1270 # Create a TarInfo object from the file.
1271 tarinfo = self.gettarinfo(name, arcname)
1272
1273 if tarinfo is None:
1274 self._dbg(1, "tarfile: Unsupported type %r" % name)
1275 return
1276
1277 # Append the tar header and data to the archive.
1278 if tarinfo.isreg():
1279 f = file(name, "rb")
1280 self.addfile(tarinfo, f)
1281 f.close()
1282
1283 if tarinfo.type in (LNKTYPE, SYMTYPE, FIFOTYPE, CHRTYPE, BLKTYPE):
1284 tarinfo.size = 0L
1285 self.addfile(tarinfo)
1286
1287 if tarinfo.isdir():
1288 self.addfile(tarinfo)
1289 if recursive:
1290 for f in os.listdir(name):
1291 self.add(os.path.join(name, f), os.path.join(arcname, f))
1292
1293 def addfile(self, tarinfo, fileobj=None):
1294 """Add the TarInfo object `tarinfo' to the archive. If `fileobj' is
1295 given, tarinfo.size bytes are read from it and added to the archive.
1296 You can create TarInfo objects using gettarinfo().
1297 On Windows platforms, `fileobj' should always be opened with mode
1298 'rb' to avoid irritation about the file size.
1299 """
1300 self._check("aw")
1301
1302 tarinfo.name = normpath(tarinfo.name)
1303 if tarinfo.isdir():
1304 # directories should end with '/'
1305 tarinfo.name += "/"
1306
1307 if tarinfo.linkname:
1308 tarinfo.linkname = normpath(tarinfo.linkname)
1309
1310 if tarinfo.size > MAXSIZE_MEMBER:
Neal Norwitzd96d1012004-07-20 22:23:02 +00001311 if self.posix:
1312 raise ValueError, "file is too large (>= 8 GB)"
1313 else:
1314 self._dbg(2, "tarfile: Created GNU tar largefile header")
1315
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001316
1317 if len(tarinfo.linkname) > LENGTH_LINK:
1318 if self.posix:
1319 raise ValueError, "linkname is too long (>%d)" \
1320 % (LENGTH_LINK)
1321 else:
1322 self._create_gnulong(tarinfo.linkname, GNUTYPE_LONGLINK)
1323 tarinfo.linkname = tarinfo.linkname[:LENGTH_LINK -1]
1324 self._dbg(2, "tarfile: Created GNU tar extension LONGLINK")
1325
1326 if len(tarinfo.name) > LENGTH_NAME:
1327 if self.posix:
1328 prefix = tarinfo.name[:LENGTH_PREFIX + 1]
1329 while prefix and prefix[-1] != "/":
Tim Peters2c60f7a2003-01-29 03:49:43 +00001330 prefix = prefix[:-1]
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001331
1332 name = tarinfo.name[len(prefix):]
1333 prefix = prefix[:-1]
1334
1335 if not prefix or len(name) > LENGTH_NAME:
1336 raise ValueError, "name is too long (>%d)" \
1337 % (LENGTH_NAME)
1338
1339 tarinfo.name = name
1340 tarinfo.prefix = prefix
1341 else:
1342 self._create_gnulong(tarinfo.name, GNUTYPE_LONGNAME)
1343 tarinfo.name = tarinfo.name[:LENGTH_NAME - 1]
1344 self._dbg(2, "tarfile: Created GNU tar extension LONGNAME")
1345
1346 self.fileobj.write(tarinfo.tobuf())
1347 self.offset += BLOCKSIZE
1348
1349 # If there's data to follow, append it.
1350 if fileobj is not None:
1351 copyfileobj(fileobj, self.fileobj, tarinfo.size)
1352 blocks, remainder = divmod(tarinfo.size, BLOCKSIZE)
1353 if remainder > 0:
1354 self.fileobj.write(NUL * (BLOCKSIZE - remainder))
1355 blocks += 1
1356 self.offset += blocks * BLOCKSIZE
1357
Martin v. Löwisf3c56112004-09-18 09:08:52 +00001358 self.members.append(tarinfo)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001359
Martin v. Löwis00a73e72005-03-04 19:40:34 +00001360 def extractall(self, path=".", members=None):
1361 """Extract all members from the archive to the current working
1362 directory and set owner, modification time and permissions on
1363 directories afterwards. `path' specifies a different directory
1364 to extract to. `members' is optional and must be a subset of the
1365 list returned by getmembers().
1366 """
1367 directories = []
1368
1369 if members is None:
1370 members = self
1371
1372 for tarinfo in members:
1373 if tarinfo.isdir():
1374 # Extract directory with a safe mode, so that
1375 # all files below can be extracted as well.
1376 try:
1377 os.makedirs(os.path.join(path, tarinfo.name), 0777)
1378 except EnvironmentError:
1379 pass
1380 directories.append(tarinfo)
1381 else:
1382 self.extract(tarinfo, path)
1383
1384 # Reverse sort directories.
1385 directories.sort(lambda a, b: cmp(a.name, b.name))
1386 directories.reverse()
1387
1388 # Set correct owner, mtime and filemode on directories.
1389 for tarinfo in directories:
1390 path = os.path.join(path, tarinfo.name)
1391 try:
1392 self.chown(tarinfo, path)
1393 self.utime(tarinfo, path)
1394 self.chmod(tarinfo, path)
1395 except ExtractError, e:
1396 if self.errorlevel > 1:
1397 raise
1398 else:
1399 self._dbg(1, "tarfile: %s" % e)
1400
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001401 def extract(self, member, path=""):
1402 """Extract a member from the archive to the current working directory,
1403 using its full name. Its file information is extracted as accurately
1404 as possible. `member' may be a filename or a TarInfo object. You can
1405 specify a different directory using `path'.
1406 """
1407 self._check("r")
1408
1409 if isinstance(member, TarInfo):
1410 tarinfo = member
1411 else:
1412 tarinfo = self.getmember(member)
1413
Neal Norwitza4f651a2004-07-20 22:07:44 +00001414 # Prepare the link target for makelink().
1415 if tarinfo.islnk():
1416 tarinfo._link_target = os.path.join(path, tarinfo.linkname)
1417
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001418 try:
1419 self._extract_member(tarinfo, os.path.join(path, tarinfo.name))
1420 except EnvironmentError, e:
1421 if self.errorlevel > 0:
1422 raise
1423 else:
1424 if e.filename is None:
1425 self._dbg(1, "tarfile: %s" % e.strerror)
1426 else:
1427 self._dbg(1, "tarfile: %s %r" % (e.strerror, e.filename))
1428 except ExtractError, e:
1429 if self.errorlevel > 1:
1430 raise
1431 else:
1432 self._dbg(1, "tarfile: %s" % e)
1433
1434 def extractfile(self, member):
1435 """Extract a member from the archive as a file object. `member' may be
1436 a filename or a TarInfo object. If `member' is a regular file, a
1437 file-like object is returned. If `member' is a link, a file-like
1438 object is constructed from the link's target. If `member' is none of
1439 the above, None is returned.
1440 The file-like object is read-only and provides the following
1441 methods: read(), readline(), readlines(), seek() and tell()
1442 """
1443 self._check("r")
1444
1445 if isinstance(member, TarInfo):
1446 tarinfo = member
1447 else:
1448 tarinfo = self.getmember(member)
1449
1450 if tarinfo.isreg():
1451 return self.fileobject(self, tarinfo)
1452
1453 elif tarinfo.type not in SUPPORTED_TYPES:
1454 # If a member's type is unknown, it is treated as a
1455 # regular file.
1456 return self.fileobject(self, tarinfo)
1457
1458 elif tarinfo.islnk() or tarinfo.issym():
1459 if isinstance(self.fileobj, _Stream):
1460 # A small but ugly workaround for the case that someone tries
1461 # to extract a (sym)link as a file-object from a non-seekable
1462 # stream of tar blocks.
1463 raise StreamError, "cannot extract (sym)link as file object"
1464 else:
1465 # A (sym)link's file object is it's target's file object.
1466 return self.extractfile(self._getmember(tarinfo.linkname,
1467 tarinfo))
1468 else:
1469 # If there's no data associated with the member (directory, chrdev,
1470 # blkdev, etc.), return None instead of a file object.
1471 return None
1472
1473 def _extract_member(self, tarinfo, targetpath):
1474 """Extract the TarInfo object tarinfo to a physical
1475 file called targetpath.
1476 """
1477 # Fetch the TarInfo object for the given name
1478 # and build the destination pathname, replacing
1479 # forward slashes to platform specific separators.
1480 if targetpath[-1:] == "/":
1481 targetpath = targetpath[:-1]
1482 targetpath = os.path.normpath(targetpath)
1483
1484 # Create all upper directories.
1485 upperdirs = os.path.dirname(targetpath)
1486 if upperdirs and not os.path.exists(upperdirs):
1487 ti = TarInfo()
1488 ti.name = upperdirs
1489 ti.type = DIRTYPE
1490 ti.mode = 0777
1491 ti.mtime = tarinfo.mtime
1492 ti.uid = tarinfo.uid
1493 ti.gid = tarinfo.gid
1494 ti.uname = tarinfo.uname
1495 ti.gname = tarinfo.gname
1496 try:
1497 self._extract_member(ti, ti.name)
1498 except:
1499 pass
1500
1501 if tarinfo.islnk() or tarinfo.issym():
1502 self._dbg(1, "%s -> %s" % (tarinfo.name, tarinfo.linkname))
1503 else:
1504 self._dbg(1, tarinfo.name)
1505
1506 if tarinfo.isreg():
1507 self.makefile(tarinfo, targetpath)
1508 elif tarinfo.isdir():
1509 self.makedir(tarinfo, targetpath)
1510 elif tarinfo.isfifo():
1511 self.makefifo(tarinfo, targetpath)
1512 elif tarinfo.ischr() or tarinfo.isblk():
1513 self.makedev(tarinfo, targetpath)
1514 elif tarinfo.islnk() or tarinfo.issym():
1515 self.makelink(tarinfo, targetpath)
1516 elif tarinfo.type not in SUPPORTED_TYPES:
1517 self.makeunknown(tarinfo, targetpath)
1518 else:
1519 self.makefile(tarinfo, targetpath)
1520
1521 self.chown(tarinfo, targetpath)
1522 if not tarinfo.issym():
1523 self.chmod(tarinfo, targetpath)
1524 self.utime(tarinfo, targetpath)
1525
1526 #--------------------------------------------------------------------------
1527 # Below are the different file methods. They are called via
1528 # _extract_member() when extract() is called. They can be replaced in a
1529 # subclass to implement other functionality.
1530
1531 def makedir(self, tarinfo, targetpath):
1532 """Make a directory called targetpath.
1533 """
1534 try:
1535 os.mkdir(targetpath)
1536 except EnvironmentError, e:
1537 if e.errno != errno.EEXIST:
1538 raise
1539
1540 def makefile(self, tarinfo, targetpath):
1541 """Make a file called targetpath.
1542 """
1543 source = self.extractfile(tarinfo)
1544 target = file(targetpath, "wb")
1545 copyfileobj(source, target)
1546 source.close()
1547 target.close()
1548
1549 def makeunknown(self, tarinfo, targetpath):
1550 """Make a file from a TarInfo object with an unknown type
1551 at targetpath.
1552 """
1553 self.makefile(tarinfo, targetpath)
1554 self._dbg(1, "tarfile: Unknown file type %r, " \
1555 "extracted as regular file." % tarinfo.type)
1556
1557 def makefifo(self, tarinfo, targetpath):
1558 """Make a fifo called targetpath.
1559 """
1560 if hasattr(os, "mkfifo"):
1561 os.mkfifo(targetpath)
1562 else:
1563 raise ExtractError, "fifo not supported by system"
1564
1565 def makedev(self, tarinfo, targetpath):
1566 """Make a character or block device called targetpath.
1567 """
1568 if not hasattr(os, "mknod") or not hasattr(os, "makedev"):
1569 raise ExtractError, "special devices not supported by system"
1570
1571 mode = tarinfo.mode
1572 if tarinfo.isblk():
1573 mode |= stat.S_IFBLK
1574 else:
1575 mode |= stat.S_IFCHR
1576
1577 os.mknod(targetpath, mode,
1578 os.makedev(tarinfo.devmajor, tarinfo.devminor))
1579
1580 def makelink(self, tarinfo, targetpath):
1581 """Make a (symbolic) link called targetpath. If it cannot be created
1582 (platform limitation), we try to make a copy of the referenced file
1583 instead of a link.
1584 """
1585 linkpath = tarinfo.linkname
1586 try:
1587 if tarinfo.issym():
1588 os.symlink(linkpath, targetpath)
1589 else:
Neal Norwitza4f651a2004-07-20 22:07:44 +00001590 # See extract().
1591 os.link(tarinfo._link_target, targetpath)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001592 except AttributeError:
1593 if tarinfo.issym():
1594 linkpath = os.path.join(os.path.dirname(tarinfo.name),
1595 linkpath)
1596 linkpath = normpath(linkpath)
1597
1598 try:
1599 self._extract_member(self.getmember(linkpath), targetpath)
1600 except (EnvironmentError, KeyError), e:
1601 linkpath = os.path.normpath(linkpath)
1602 try:
1603 shutil.copy2(linkpath, targetpath)
1604 except EnvironmentError, e:
1605 raise IOError, "link could not be created"
1606
1607 def chown(self, tarinfo, targetpath):
1608 """Set owner of targetpath according to tarinfo.
1609 """
1610 if pwd and hasattr(os, "geteuid") and os.geteuid() == 0:
1611 # We have to be root to do so.
1612 try:
1613 g = grp.getgrnam(tarinfo.gname)[2]
1614 except KeyError:
1615 try:
1616 g = grp.getgrgid(tarinfo.gid)[2]
1617 except KeyError:
1618 g = os.getgid()
1619 try:
1620 u = pwd.getpwnam(tarinfo.uname)[2]
1621 except KeyError:
1622 try:
1623 u = pwd.getpwuid(tarinfo.uid)[2]
1624 except KeyError:
1625 u = os.getuid()
1626 try:
1627 if tarinfo.issym() and hasattr(os, "lchown"):
1628 os.lchown(targetpath, u, g)
1629 else:
Andrew MacIntyre7970d202003-02-19 12:51:34 +00001630 if sys.platform != "os2emx":
1631 os.chown(targetpath, u, g)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001632 except EnvironmentError, e:
1633 raise ExtractError, "could not change owner"
1634
1635 def chmod(self, tarinfo, targetpath):
1636 """Set file permissions of targetpath according to tarinfo.
1637 """
Jack Jansen834eff62003-03-07 12:47:06 +00001638 if hasattr(os, 'chmod'):
1639 try:
1640 os.chmod(targetpath, tarinfo.mode)
1641 except EnvironmentError, e:
1642 raise ExtractError, "could not change mode"
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001643
1644 def utime(self, tarinfo, targetpath):
1645 """Set modification time of targetpath according to tarinfo.
1646 """
Jack Jansen834eff62003-03-07 12:47:06 +00001647 if not hasattr(os, 'utime'):
Tim Petersf9347782003-03-07 15:36:41 +00001648 return
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001649 if sys.platform == "win32" and tarinfo.isdir():
1650 # According to msdn.microsoft.com, it is an error (EACCES)
1651 # to use utime() on directories.
1652 return
1653 try:
1654 os.utime(targetpath, (tarinfo.mtime, tarinfo.mtime))
1655 except EnvironmentError, e:
1656 raise ExtractError, "could not change modification time"
1657
1658 #--------------------------------------------------------------------------
1659
1660 def next(self):
1661 """Return the next member of the archive as a TarInfo object, when
1662 TarFile is opened for reading. Return None if there is no more
1663 available.
1664 """
1665 self._check("ra")
1666 if self.firstmember is not None:
1667 m = self.firstmember
1668 self.firstmember = None
1669 return m
1670
1671 # Read the next block.
Andrew M. Kuchling864bba12004-07-10 22:02:11 +00001672 self.fileobj.seek(self.offset)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001673 while True:
1674 buf = self.fileobj.read(BLOCKSIZE)
1675 if not buf:
1676 return None
1677 try:
1678 tarinfo = TarInfo.frombuf(buf)
1679 except ValueError:
1680 if self.ignore_zeros:
1681 if buf.count(NUL) == BLOCKSIZE:
1682 adj = "empty"
1683 else:
1684 adj = "invalid"
1685 self._dbg(2, "0x%X: %s block" % (self.offset, adj))
1686 self.offset += BLOCKSIZE
1687 continue
1688 else:
1689 # Block is empty or unreadable.
Andrew M. Kuchling864bba12004-07-10 22:02:11 +00001690 if self.offset == 0:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001691 # If the first block is invalid. That does not
1692 # look like a tar archive we can handle.
1693 raise ReadError,"empty, unreadable or compressed file"
1694 return None
1695 break
1696
1697 # We shouldn't rely on this checksum, because some tar programs
1698 # calculate it differently and it is merely validating the
1699 # header block. We could just as well skip this part, which would
1700 # have a slight effect on performance...
1701 if tarinfo.chksum != calc_chksum(buf):
1702 self._dbg(1, "tarfile: Bad Checksum %r" % tarinfo.name)
1703
1704 # Set the TarInfo object's offset to the current position of the
1705 # TarFile and set self.offset to the position where the data blocks
1706 # should begin.
1707 tarinfo.offset = self.offset
1708 self.offset += BLOCKSIZE
1709
1710 # Check if the TarInfo object has a typeflag for which a callback
1711 # method is registered in the TYPE_METH. If so, then call it.
1712 if tarinfo.type in self.TYPE_METH:
Andrew M. Kuchling864bba12004-07-10 22:02:11 +00001713 return self.TYPE_METH[tarinfo.type](self, tarinfo)
1714
1715 tarinfo.offset_data = self.offset
1716 if tarinfo.isreg() or tarinfo.type not in SUPPORTED_TYPES:
1717 # Skip the following data blocks.
1718 self.offset += self._block(tarinfo.size)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001719
1720 if tarinfo.isreg() and tarinfo.name[:-1] == "/":
1721 # some old tar programs don't know DIRTYPE
1722 tarinfo.type = DIRTYPE
1723
Martin v. Löwisf3c56112004-09-18 09:08:52 +00001724 self.members.append(tarinfo)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001725 return tarinfo
1726
1727 #--------------------------------------------------------------------------
1728 # Below are some methods which are called for special typeflags in the
1729 # next() method, e.g. for unwrapping GNU longname/longlink blocks. They
1730 # are registered in TYPE_METH below. You can register your own methods
1731 # with this mapping.
1732 # A registered method is called with a TarInfo object as only argument.
1733 #
1734 # During its execution the method MUST perform the following tasks:
1735 # 1. set tarinfo.offset_data to the position where the data blocks begin,
1736 # if there is data to follow.
1737 # 2. set self.offset to the position where the next member's header will
1738 # begin.
Martin v. Löwisf3c56112004-09-18 09:08:52 +00001739 # 3. append the tarinfo object to self.members, if it is supposed to appear
1740 # as a member of the TarFile object.
Andrew M. Kuchling864bba12004-07-10 22:02:11 +00001741 # 4. return tarinfo or another valid TarInfo object.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001742
1743 def proc_gnulong(self, tarinfo):
1744 """Evaluate the blocks that hold a GNU longname
1745 or longlink member.
1746 """
1747 buf = ""
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001748 count = tarinfo.size
1749 while count > 0:
1750 block = self.fileobj.read(BLOCKSIZE)
1751 buf += block
1752 self.offset += BLOCKSIZE
1753 count -= BLOCKSIZE
1754
Andrew M. Kuchling864bba12004-07-10 22:02:11 +00001755 # Fetch the next header
1756 next = self.next()
1757
1758 next.offset = tarinfo.offset
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001759 if tarinfo.type == GNUTYPE_LONGNAME:
Andrew M. Kuchling864bba12004-07-10 22:02:11 +00001760 next.name = nts(buf)
1761 elif tarinfo.type == GNUTYPE_LONGLINK:
1762 next.linkname = nts(buf)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001763
Andrew M. Kuchling864bba12004-07-10 22:02:11 +00001764 return next
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001765
1766 def proc_sparse(self, tarinfo):
1767 """Analyze a GNU sparse header plus extra headers.
1768 """
1769 buf = tarinfo.tobuf()
1770 sp = _ringbuffer()
1771 pos = 386
1772 lastpos = 0L
1773 realpos = 0L
1774 # There are 4 possible sparse structs in the
1775 # first header.
1776 for i in xrange(4):
1777 try:
1778 offset = int(buf[pos:pos + 12], 8)
1779 numbytes = int(buf[pos + 12:pos + 24], 8)
1780 except ValueError:
1781 break
1782 if offset > lastpos:
1783 sp.append(_hole(lastpos, offset - lastpos))
1784 sp.append(_data(offset, numbytes, realpos))
1785 realpos += numbytes
1786 lastpos = offset + numbytes
1787 pos += 24
1788
1789 isextended = ord(buf[482])
1790 origsize = int(buf[483:495], 8)
1791
1792 # If the isextended flag is given,
1793 # there are extra headers to process.
1794 while isextended == 1:
1795 buf = self.fileobj.read(BLOCKSIZE)
1796 self.offset += BLOCKSIZE
1797 pos = 0
1798 for i in xrange(21):
1799 try:
1800 offset = int(buf[pos:pos + 12], 8)
1801 numbytes = int(buf[pos + 12:pos + 24], 8)
1802 except ValueError:
1803 break
1804 if offset > lastpos:
1805 sp.append(_hole(lastpos, offset - lastpos))
1806 sp.append(_data(offset, numbytes, realpos))
1807 realpos += numbytes
1808 lastpos = offset + numbytes
1809 pos += 24
1810 isextended = ord(buf[504])
1811
1812 if lastpos < origsize:
1813 sp.append(_hole(lastpos, origsize - lastpos))
1814
1815 tarinfo.sparse = sp
1816
1817 tarinfo.offset_data = self.offset
1818 self.offset += self._block(tarinfo.size)
1819 tarinfo.size = origsize
Andrew M. Kuchling864bba12004-07-10 22:02:11 +00001820
Martin v. Löwisf3c56112004-09-18 09:08:52 +00001821 self.members.append(tarinfo)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001822 return tarinfo
1823
1824 # The type mapping for the next() method. The keys are single character
1825 # strings, the typeflag. The values are methods which are called when
1826 # next() encounters such a typeflag.
1827 TYPE_METH = {
1828 GNUTYPE_LONGNAME: proc_gnulong,
1829 GNUTYPE_LONGLINK: proc_gnulong,
1830 GNUTYPE_SPARSE: proc_sparse
1831 }
1832
1833 #--------------------------------------------------------------------------
1834 # Little helper methods:
1835
1836 def _block(self, count):
1837 """Round up a byte count by BLOCKSIZE and return it,
1838 e.g. _block(834) => 1024.
1839 """
1840 blocks, remainder = divmod(count, BLOCKSIZE)
1841 if remainder:
1842 blocks += 1
1843 return blocks * BLOCKSIZE
1844
1845 def _getmember(self, name, tarinfo=None):
1846 """Find an archive member by name from bottom to top.
1847 If tarinfo is given, it is used as the starting point.
1848 """
Martin v. Löwisf3c56112004-09-18 09:08:52 +00001849 # Ensure that all members have been loaded.
1850 members = self.getmembers()
1851
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001852 if tarinfo is None:
Martin v. Löwisf3c56112004-09-18 09:08:52 +00001853 end = len(members)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001854 else:
Martin v. Löwisf3c56112004-09-18 09:08:52 +00001855 end = members.index(tarinfo)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001856
1857 for i in xrange(end - 1, -1, -1):
Martin v. Löwisf3c56112004-09-18 09:08:52 +00001858 if name == members[i].name:
1859 return members[i]
Andrew M. Kuchling864bba12004-07-10 22:02:11 +00001860
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001861 def _load(self):
1862 """Read through the entire archive file and look for readable
1863 members.
1864 """
1865 while True:
1866 tarinfo = self.next()
1867 if tarinfo is None:
1868 break
1869 self._loaded = True
1870
1871 def _check(self, mode=None):
1872 """Check if TarFile is still open, and if the operation's mode
1873 corresponds to TarFile's mode.
1874 """
1875 if self.closed:
1876 raise IOError, "%s is closed" % self.__class__.__name__
1877 if mode is not None and self._mode not in mode:
1878 raise IOError, "bad operation for mode %r" % self._mode
1879
1880 def __iter__(self):
1881 """Provide an iterator object.
1882 """
1883 if self._loaded:
1884 return iter(self.members)
1885 else:
1886 return TarIter(self)
1887
1888 def _create_gnulong(self, name, type):
1889 """Write a GNU longname/longlink member to the TarFile.
1890 It consists of an extended tar header, with the length
1891 of the longname as size, followed by data blocks,
1892 which contain the longname as a null terminated string.
1893 """
Neal Norwitz0662f8a2004-07-20 21:54:18 +00001894 name += NUL
1895
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001896 tarinfo = TarInfo()
1897 tarinfo.name = "././@LongLink"
1898 tarinfo.type = type
1899 tarinfo.mode = 0
1900 tarinfo.size = len(name)
1901
1902 # write extended header
1903 self.fileobj.write(tarinfo.tobuf())
Neal Norwitz0662f8a2004-07-20 21:54:18 +00001904 self.offset += BLOCKSIZE
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001905 # write name blocks
1906 self.fileobj.write(name)
1907 blocks, remainder = divmod(tarinfo.size, BLOCKSIZE)
1908 if remainder > 0:
1909 self.fileobj.write(NUL * (BLOCKSIZE - remainder))
1910 blocks += 1
1911 self.offset += blocks * BLOCKSIZE
1912
1913 def _dbg(self, level, msg):
1914 """Write debugging output to sys.stderr.
1915 """
1916 if level <= self.debug:
1917 print >> sys.stderr, msg
1918# class TarFile
1919
1920class TarIter:
1921 """Iterator Class.
1922
1923 for tarinfo in TarFile(...):
1924 suite...
1925 """
1926
1927 def __init__(self, tarfile):
1928 """Construct a TarIter object.
1929 """
1930 self.tarfile = tarfile
Martin v. Löwis637431b2005-03-03 23:12:42 +00001931 self.index = 0
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001932 def __iter__(self):
1933 """Return iterator object.
1934 """
1935 return self
1936 def next(self):
1937 """Return the next item using TarFile's next() method.
1938 When all members have been read, set TarFile as _loaded.
1939 """
Martin v. Löwis637431b2005-03-03 23:12:42 +00001940 # Fix for SF #1100429: Under rare circumstances it can
1941 # happen that getmembers() is called during iteration,
1942 # which will cause TarIter to stop prematurely.
1943 if not self.tarfile._loaded:
1944 tarinfo = self.tarfile.next()
1945 if not tarinfo:
1946 self.tarfile._loaded = True
1947 raise StopIteration
1948 else:
1949 try:
1950 tarinfo = self.tarfile.members[self.index]
1951 except IndexError:
1952 raise StopIteration
1953 self.index += 1
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001954 return tarinfo
1955
1956# Helper classes for sparse file support
1957class _section:
1958 """Base class for _data and _hole.
1959 """
1960 def __init__(self, offset, size):
1961 self.offset = offset
1962 self.size = size
1963 def __contains__(self, offset):
1964 return self.offset <= offset < self.offset + self.size
1965
1966class _data(_section):
1967 """Represent a data section in a sparse file.
1968 """
1969 def __init__(self, offset, size, realpos):
1970 _section.__init__(self, offset, size)
1971 self.realpos = realpos
1972
1973class _hole(_section):
1974 """Represent a hole section in a sparse file.
1975 """
1976 pass
1977
1978class _ringbuffer(list):
1979 """Ringbuffer class which increases performance
1980 over a regular list.
1981 """
1982 def __init__(self):
1983 self.idx = 0
1984 def find(self, offset):
1985 idx = self.idx
1986 while True:
1987 item = self[idx]
1988 if offset in item:
1989 break
1990 idx += 1
1991 if idx == len(self):
1992 idx = 0
1993 if idx == self.idx:
1994 # End of File
1995 return None
1996 self.idx = idx
1997 return item
1998
1999#---------------------------------------------
2000# zipfile compatible TarFile class
2001#---------------------------------------------
2002TAR_PLAIN = 0 # zipfile.ZIP_STORED
2003TAR_GZIPPED = 8 # zipfile.ZIP_DEFLATED
2004class TarFileCompat:
2005 """TarFile class compatible with standard module zipfile's
2006 ZipFile class.
2007 """
2008 def __init__(self, file, mode="r", compression=TAR_PLAIN):
2009 if compression == TAR_PLAIN:
2010 self.tarfile = TarFile.taropen(file, mode)
2011 elif compression == TAR_GZIPPED:
2012 self.tarfile = TarFile.gzopen(file, mode)
2013 else:
2014 raise ValueError, "unknown compression constant"
2015 if mode[0:1] == "r":
2016 members = self.tarfile.getmembers()
2017 for i in xrange(len(members)):
2018 m = members[i]
2019 m.filename = m.name
2020 m.file_size = m.size
2021 m.date_time = time.gmtime(m.mtime)[:6]
2022 def namelist(self):
2023 return map(lambda m: m.name, self.infolist())
2024 def infolist(self):
2025 return filter(lambda m: m.type in REGULAR_TYPES,
2026 self.tarfile.getmembers())
2027 def printdir(self):
2028 self.tarfile.list()
2029 def testzip(self):
2030 return
2031 def getinfo(self, name):
2032 return self.tarfile.getmember(name)
2033 def read(self, name):
2034 return self.tarfile.extractfile(self.tarfile.getmember(name)).read()
2035 def write(self, filename, arcname=None, compress_type=None):
2036 self.tarfile.add(filename, arcname)
2037 def writestr(self, zinfo, bytes):
Raymond Hettingera6172712004-12-31 19:15:26 +00002038 try:
2039 from cStringIO import StringIO
2040 except ImportError:
2041 from StringIO import StringIO
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002042 import calendar
2043 zinfo.name = zinfo.filename
2044 zinfo.size = zinfo.file_size
2045 zinfo.mtime = calendar.timegm(zinfo.date_time)
Raymond Hettingera6172712004-12-31 19:15:26 +00002046 self.tarfile.addfile(zinfo, StringIO(bytes))
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002047 def close(self):
2048 self.tarfile.close()
2049#class TarFileCompat
2050
2051#--------------------
2052# exported functions
2053#--------------------
2054def is_tarfile(name):
2055 """Return True if name points to a tar archive that we
2056 are able to handle, else return False.
2057 """
2058 try:
2059 t = open(name)
2060 t.close()
2061 return True
2062 except TarError:
2063 return False
2064
2065open = TarFile.open