blob: ff9f51f2651f67871e183b17aaa09cff975c39a0 [file] [log] [blame]
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001#!/usr/bin/env python
2# -*- coding: iso-8859-1 -*-
3#-------------------------------------------------------------------
4# tarfile.py
5#-------------------------------------------------------------------
6# Copyright (C) 2002 Lars Gustäbel <lars@gustaebel.de>
7# All rights reserved.
8#
9# Permission is hereby granted, free of charge, to any person
10# obtaining a copy of this software and associated documentation
11# files (the "Software"), to deal in the Software without
12# restriction, including without limitation the rights to use,
13# copy, modify, merge, publish, distribute, sublicense, and/or sell
14# copies of the Software, and to permit persons to whom the
15# Software is furnished to do so, subject to the following
16# conditions:
17#
18# The above copyright notice and this permission notice shall be
19# included in all copies or substantial portions of the Software.
20#
21# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
22# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
23# OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
24# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
25# HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
26# WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
27# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
28# OTHER DEALINGS IN THE SOFTWARE.
29#
30"""Read from and write to tar format archives.
31"""
32
33__version__ = "$Revision$"
34# $Source$
35
36version = "0.6.4"
37__author__ = "Lars Gustäbel (lars@gustaebel.de)"
38__date__ = "$Date$"
39__cvsid__ = "$Id$"
40__credits__ = "Gustavo Niemeyer, Niels Gustäbel, Richard Townsend."
41
42#---------
43# Imports
44#---------
45import sys
46import os
47import shutil
48import stat
49import errno
50import time
51import struct
52
Jack Jansencfc49022003-03-07 13:37:32 +000053if sys.platform == 'mac':
54 # This module needs work for MacOS9, especially in the area of pathname
55 # handling. In many places it is assumed a simple substitution of / by the
56 # local os.path.sep is good enough to convert pathnames, but this does not
57 # work with the mac rooted:path:name versus :nonrooted:path:name syntax
58 raise ImportError, "tarfile does not work for platform==mac"
59
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +000060try:
61 import grp, pwd
62except ImportError:
63 grp = pwd = None
64
65# from tarfile import *
66__all__ = ["TarFile", "TarInfo", "is_tarfile", "TarError"]
67
68#---------------------------------------------------------
69# tar constants
70#---------------------------------------------------------
71NUL = "\0" # the null character
72BLOCKSIZE = 512 # length of processing blocks
73RECORDSIZE = BLOCKSIZE * 20 # length of records
74MAGIC = "ustar" # magic tar string
75VERSION = "00" # version number
76
77LENGTH_NAME = 100 # maximum length of a filename
78LENGTH_LINK = 100 # maximum length of a linkname
79LENGTH_PREFIX = 155 # maximum length of the prefix field
80MAXSIZE_MEMBER = 077777777777L # maximum size of a file (11 octal digits)
81
82REGTYPE = "0" # regular file
83AREGTYPE = "\0" # regular file
84LNKTYPE = "1" # link (inside tarfile)
85SYMTYPE = "2" # symbolic link
86CHRTYPE = "3" # character special device
87BLKTYPE = "4" # block special device
88DIRTYPE = "5" # directory
89FIFOTYPE = "6" # fifo special device
90CONTTYPE = "7" # contiguous file
91
92GNUTYPE_LONGNAME = "L" # GNU tar extension for longnames
93GNUTYPE_LONGLINK = "K" # GNU tar extension for longlink
94GNUTYPE_SPARSE = "S" # GNU tar extension for sparse file
95
96#---------------------------------------------------------
97# tarfile constants
98#---------------------------------------------------------
99SUPPORTED_TYPES = (REGTYPE, AREGTYPE, LNKTYPE, # file types that tarfile
100 SYMTYPE, DIRTYPE, FIFOTYPE, # can cope with.
101 CONTTYPE, CHRTYPE, BLKTYPE,
102 GNUTYPE_LONGNAME, GNUTYPE_LONGLINK,
103 GNUTYPE_SPARSE)
104
105REGULAR_TYPES = (REGTYPE, AREGTYPE, # file types that somehow
106 CONTTYPE, GNUTYPE_SPARSE) # represent regular files
107
108#---------------------------------------------------------
109# Bits used in the mode field, values in octal.
110#---------------------------------------------------------
111S_IFLNK = 0120000 # symbolic link
112S_IFREG = 0100000 # regular file
113S_IFBLK = 0060000 # block device
114S_IFDIR = 0040000 # directory
115S_IFCHR = 0020000 # character device
116S_IFIFO = 0010000 # fifo
117
118TSUID = 04000 # set UID on execution
119TSGID = 02000 # set GID on execution
120TSVTX = 01000 # reserved
121
122TUREAD = 0400 # read by owner
123TUWRITE = 0200 # write by owner
124TUEXEC = 0100 # execute/search by owner
125TGREAD = 0040 # read by group
126TGWRITE = 0020 # write by group
127TGEXEC = 0010 # execute/search by group
128TOREAD = 0004 # read by other
129TOWRITE = 0002 # write by other
130TOEXEC = 0001 # execute/search by other
131
132#---------------------------------------------------------
133# Some useful functions
134#---------------------------------------------------------
135def nts(s):
136 """Convert a null-terminated string buffer to a python string.
137 """
Andrew M. Kuchling864bba12004-07-10 22:02:11 +0000138 return s.rstrip(NUL)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000139
140def calc_chksum(buf):
141 """Calculate the checksum for a member's header. It's a simple addition
142 of all bytes, treating the chksum field as if filled with spaces.
143 buf is a 512 byte long string buffer which holds the header.
144 """
145 chk = 256 # chksum field is treated as blanks,
146 # so the initial value is 8 * ord(" ")
147 for c in buf[:148]: chk += ord(c) # sum up all bytes before chksum
148 for c in buf[156:]: chk += ord(c) # sum up all bytes after chksum
149 return chk
150
151def copyfileobj(src, dst, length=None):
152 """Copy length bytes from fileobj src to fileobj dst.
153 If length is None, copy the entire content.
154 """
155 if length == 0:
156 return
157 if length is None:
158 shutil.copyfileobj(src, dst)
159 return
160
161 BUFSIZE = 16 * 1024
162 blocks, remainder = divmod(length, BUFSIZE)
163 for b in xrange(blocks):
164 buf = src.read(BUFSIZE)
165 if len(buf) < BUFSIZE:
166 raise IOError, "end of file reached"
167 dst.write(buf)
168
169 if remainder != 0:
170 buf = src.read(remainder)
171 if len(buf) < remainder:
172 raise IOError, "end of file reached"
173 dst.write(buf)
174 return
175
176filemode_table = (
177 (S_IFLNK, "l",
178 S_IFREG, "-",
179 S_IFBLK, "b",
180 S_IFDIR, "d",
181 S_IFCHR, "c",
182 S_IFIFO, "p"),
183 (TUREAD, "r"),
184 (TUWRITE, "w"),
185 (TUEXEC, "x", TSUID, "S", TUEXEC|TSUID, "s"),
186 (TGREAD, "r"),
187 (TGWRITE, "w"),
188 (TGEXEC, "x", TSGID, "S", TGEXEC|TSGID, "s"),
189 (TOREAD, "r"),
190 (TOWRITE, "w"),
191 (TOEXEC, "x", TSVTX, "T", TOEXEC|TSVTX, "t"))
192
193def filemode(mode):
194 """Convert a file's mode to a string of the form
195 -rwxrwxrwx.
196 Used by TarFile.list()
197 """
198 s = ""
199 for t in filemode_table:
200 while True:
201 if mode & t[0] == t[0]:
202 s += t[1]
203 elif len(t) > 2:
204 t = t[2:]
205 continue
206 else:
207 s += "-"
208 break
209 return s
210
211if os.sep != "/":
212 normpath = lambda path: os.path.normpath(path).replace(os.sep, "/")
213else:
214 normpath = os.path.normpath
215
216class TarError(Exception):
217 """Base exception."""
218 pass
219class ExtractError(TarError):
220 """General exception for extract errors."""
221 pass
222class ReadError(TarError):
223 """Exception for unreadble tar archives."""
224 pass
225class CompressionError(TarError):
226 """Exception for unavailable compression methods."""
227 pass
228class StreamError(TarError):
229 """Exception for unsupported operations on stream-like TarFiles."""
230 pass
231
232#---------------------------
233# internal stream interface
234#---------------------------
235class _LowLevelFile:
236 """Low-level file object. Supports reading and writing.
237 It is used instead of a regular file object for streaming
238 access.
239 """
240
241 def __init__(self, name, mode):
242 mode = {
243 "r": os.O_RDONLY,
244 "w": os.O_WRONLY | os.O_CREAT | os.O_TRUNC,
245 }[mode]
246 if hasattr(os, "O_BINARY"):
247 mode |= os.O_BINARY
248 self.fd = os.open(name, mode)
249
250 def close(self):
251 os.close(self.fd)
252
253 def read(self, size):
254 return os.read(self.fd, size)
255
256 def write(self, s):
257 os.write(self.fd, s)
258
259class _Stream:
260 """Class that serves as an adapter between TarFile and
261 a stream-like object. The stream-like object only
262 needs to have a read() or write() method and is accessed
263 blockwise. Use of gzip or bzip2 compression is possible.
264 A stream-like object could be for example: sys.stdin,
265 sys.stdout, a socket, a tape device etc.
266
267 _Stream is intended to be used only internally.
268 """
269
270 def __init__(self, name, mode, type, fileobj, bufsize):
271 """Construct a _Stream object.
272 """
273 self._extfileobj = True
274 if fileobj is None:
275 fileobj = _LowLevelFile(name, mode)
276 self._extfileobj = False
277
278 self.name = name or ""
279 self.mode = mode
280 self.type = type
281 self.fileobj = fileobj
282 self.bufsize = bufsize
283 self.buf = ""
284 self.pos = 0L
285 self.closed = False
286
287 if type == "gz":
288 try:
289 import zlib
290 except ImportError:
291 raise CompressionError, "zlib module is not available"
292 self.zlib = zlib
293 self.crc = zlib.crc32("")
294 if mode == "r":
295 self._init_read_gz()
296 else:
297 self._init_write_gz()
298
299 if type == "bz2":
300 try:
301 import bz2
302 except ImportError:
303 raise CompressionError, "bz2 module is not available"
304 if mode == "r":
305 self.dbuf = ""
306 self.cmp = bz2.BZ2Decompressor()
307 else:
308 self.cmp = bz2.BZ2Compressor()
309
310 def __del__(self):
311 if not self.closed:
312 self.close()
313
314 def _init_write_gz(self):
315 """Initialize for writing with gzip compression.
316 """
317 self.cmp = self.zlib.compressobj(9, self.zlib.DEFLATED,
318 -self.zlib.MAX_WBITS,
319 self.zlib.DEF_MEM_LEVEL,
320 0)
321 timestamp = struct.pack("<L", long(time.time()))
322 self.__write("\037\213\010\010%s\002\377" % timestamp)
323 if self.name.endswith(".gz"):
324 self.name = self.name[:-3]
325 self.__write(self.name + NUL)
326
327 def write(self, s):
328 """Write string s to the stream.
329 """
330 if self.type == "gz":
331 self.crc = self.zlib.crc32(s, self.crc)
332 self.pos += len(s)
333 if self.type != "tar":
334 s = self.cmp.compress(s)
335 self.__write(s)
336
337 def __write(self, s):
338 """Write string s to the stream if a whole new block
339 is ready to be written.
340 """
341 self.buf += s
342 while len(self.buf) > self.bufsize:
343 self.fileobj.write(self.buf[:self.bufsize])
344 self.buf = self.buf[self.bufsize:]
345
346 def close(self):
347 """Close the _Stream object. No operation should be
348 done on it afterwards.
349 """
350 if self.closed:
351 return
352
353 if self.mode == "w" and self.buf:
354 if self.type != "tar":
355 self.buf += self.cmp.flush()
356 self.fileobj.write(self.buf)
357 self.buf = ""
358 if self.type == "gz":
359 self.fileobj.write(struct.pack("<l", self.crc))
Andrew M. Kuchling10a44492003-10-24 17:38:34 +0000360 self.fileobj.write(struct.pack("<L", self.pos & 0xffffFFFFL))
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000361
362 if not self._extfileobj:
363 self.fileobj.close()
364
365 self.closed = True
366
367 def _init_read_gz(self):
368 """Initialize for reading a gzip compressed fileobj.
369 """
370 self.cmp = self.zlib.decompressobj(-self.zlib.MAX_WBITS)
371 self.dbuf = ""
372
373 # taken from gzip.GzipFile with some alterations
374 if self.__read(2) != "\037\213":
375 raise ReadError, "not a gzip file"
376 if self.__read(1) != "\010":
377 raise CompressionError, "unsupported compression method"
378
379 flag = ord(self.__read(1))
380 self.__read(6)
381
382 if flag & 4:
383 xlen = ord(self.__read(1)) + 256 * ord(self.__read(1))
384 self.read(xlen)
385 if flag & 8:
386 while True:
387 s = self.__read(1)
388 if not s or s == NUL:
389 break
390 if flag & 16:
391 while True:
392 s = self.__read(1)
393 if not s or s == NUL:
394 break
395 if flag & 2:
396 self.__read(2)
397
398 def tell(self):
399 """Return the stream's file pointer position.
400 """
401 return self.pos
402
403 def seek(self, pos=0):
404 """Set the stream's file pointer to pos. Negative seeking
405 is forbidden.
406 """
407 if pos - self.pos >= 0:
408 blocks, remainder = divmod(pos - self.pos, self.bufsize)
409 for i in xrange(blocks):
410 self.read(self.bufsize)
411 self.read(remainder)
412 else:
413 raise StreamError, "seeking backwards is not allowed"
414 return self.pos
415
416 def read(self, size=None):
417 """Return the next size number of bytes from the stream.
418 If size is not defined, return all bytes of the stream
419 up to EOF.
420 """
421 if size is None:
422 t = []
423 while True:
424 buf = self._read(self.bufsize)
425 if not buf:
426 break
427 t.append(buf)
428 buf = "".join(t)
429 else:
430 buf = self._read(size)
431 self.pos += len(buf)
432 return buf
433
434 def _read(self, size):
435 """Return size bytes from the stream.
436 """
437 if self.type == "tar":
438 return self.__read(size)
439
440 c = len(self.dbuf)
441 t = [self.dbuf]
442 while c < size:
443 buf = self.__read(self.bufsize)
444 if not buf:
445 break
446 buf = self.cmp.decompress(buf)
447 t.append(buf)
448 c += len(buf)
449 t = "".join(t)
450 self.dbuf = t[size:]
451 return t[:size]
452
453 def __read(self, size):
454 """Return size bytes from stream. If internal buffer is empty,
455 read another block from the stream.
456 """
457 c = len(self.buf)
458 t = [self.buf]
459 while c < size:
460 buf = self.fileobj.read(self.bufsize)
461 if not buf:
462 break
463 t.append(buf)
464 c += len(buf)
465 t = "".join(t)
466 self.buf = t[size:]
467 return t[:size]
468# class _Stream
469
470#------------------------
471# Extraction file object
472#------------------------
473class ExFileObject(object):
474 """File-like object for reading an archive member.
475 Is returned by TarFile.extractfile(). Support for
476 sparse files included.
477 """
478
479 def __init__(self, tarfile, tarinfo):
480 self.fileobj = tarfile.fileobj
481 self.name = tarinfo.name
482 self.mode = "r"
483 self.closed = False
484 self.offset = tarinfo.offset_data
485 self.size = tarinfo.size
486 self.pos = 0L
487 self.linebuffer = ""
488 if tarinfo.issparse():
489 self.sparse = tarinfo.sparse
490 self.read = self._readsparse
491 else:
492 self.read = self._readnormal
493
494 def __read(self, size):
495 """Overloadable read method.
496 """
497 return self.fileobj.read(size)
498
499 def readline(self, size=-1):
500 """Read a line with approx. size. If size is negative,
501 read a whole line. readline() and read() must not
502 be mixed up (!).
503 """
504 if size < 0:
505 size = sys.maxint
506
507 nl = self.linebuffer.find("\n")
508 if nl >= 0:
509 nl = min(nl, size)
510 else:
511 size -= len(self.linebuffer)
512 while nl < 0:
513 buf = self.read(min(size, 100))
514 if not buf:
515 break
516 self.linebuffer += buf
517 size -= len(buf)
518 if size <= 0:
519 break
520 nl = self.linebuffer.find("\n")
521 if nl == -1:
522 s = self.linebuffer
523 self.linebuffer = ""
524 return s
525 buf = self.linebuffer[:nl]
526 self.linebuffer = self.linebuffer[nl + 1:]
527 while buf[-1:] == "\r":
528 buf = buf[:-1]
529 return buf + "\n"
530
531 def readlines(self):
532 """Return a list with all (following) lines.
533 """
534 result = []
535 while True:
536 line = self.readline()
537 if not line: break
538 result.append(line)
539 return result
540
541 def _readnormal(self, size=None):
542 """Read operation for regular files.
543 """
544 if self.closed:
545 raise ValueError, "file is closed"
546 self.fileobj.seek(self.offset + self.pos)
547 bytesleft = self.size - self.pos
548 if size is None:
549 bytestoread = bytesleft
550 else:
551 bytestoread = min(size, bytesleft)
552 self.pos += bytestoread
553 return self.__read(bytestoread)
554
555 def _readsparse(self, size=None):
556 """Read operation for sparse files.
557 """
558 if self.closed:
559 raise ValueError, "file is closed"
560
561 if size is None:
562 size = self.size - self.pos
563
564 data = []
565 while size > 0:
566 buf = self._readsparsesection(size)
567 if not buf:
568 break
569 size -= len(buf)
570 data.append(buf)
571 return "".join(data)
572
573 def _readsparsesection(self, size):
574 """Read a single section of a sparse file.
575 """
576 section = self.sparse.find(self.pos)
577
578 if section is None:
579 return ""
580
581 toread = min(size, section.offset + section.size - self.pos)
582 if isinstance(section, _data):
583 realpos = section.realpos + self.pos - section.offset
584 self.pos += toread
585 self.fileobj.seek(self.offset + realpos)
586 return self.__read(toread)
587 else:
588 self.pos += toread
589 return NUL * toread
590
591 def tell(self):
592 """Return the current file position.
593 """
594 return self.pos
595
596 def seek(self, pos, whence=0):
597 """Seek to a position in the file.
598 """
599 self.linebuffer = ""
600 if whence == 0:
601 self.pos = min(max(pos, 0), self.size)
602 if whence == 1:
603 if pos < 0:
604 self.pos = max(self.pos + pos, 0)
605 else:
606 self.pos = min(self.pos + pos, self.size)
607 if whence == 2:
608 self.pos = max(min(self.size + pos, self.size), 0)
609
610 def close(self):
611 """Close the file object.
612 """
613 self.closed = True
614#class ExFileObject
615
616#------------------
617# Exported Classes
618#------------------
619class TarInfo(object):
620 """Informational class which holds the details about an
621 archive member given by a tar header block.
622 TarInfo objects are returned by TarFile.getmember(),
623 TarFile.getmembers() and TarFile.gettarinfo() and are
624 usually created internally.
625 """
626
627 def __init__(self, name=""):
628 """Construct a TarInfo object. name is the optional name
629 of the member.
630 """
631
632 self.name = name # member name (dirnames must end with '/')
633 self.mode = 0666 # file permissions
634 self.uid = 0 # user id
635 self.gid = 0 # group id
636 self.size = 0 # file size
637 self.mtime = 0 # modification time
638 self.chksum = 0 # header checksum
639 self.type = REGTYPE # member type
640 self.linkname = "" # link name
641 self.uname = "user" # user name
642 self.gname = "group" # group name
643 self.devmajor = 0 #-
644 self.devminor = 0 #-for use with CHRTYPE and BLKTYPE
645 self.prefix = "" # prefix to filename or holding information
646 # about sparse files
647
648 self.offset = 0 # the tar header starts here
649 self.offset_data = 0 # the file's data starts here
650
651 def __repr__(self):
652 return "<%s %r at %#x>" % (self.__class__.__name__,self.name,id(self))
653
654 def frombuf(cls, buf):
655 """Construct a TarInfo object from a 512 byte string buffer.
656 """
657 tarinfo = cls()
Neal Norwitzd96d1012004-07-20 22:23:02 +0000658 tarinfo.name = nts(buf[0:100])
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000659 tarinfo.mode = int(buf[100:108], 8)
660 tarinfo.uid = int(buf[108:116],8)
661 tarinfo.gid = int(buf[116:124],8)
Neal Norwitzd96d1012004-07-20 22:23:02 +0000662
663 # There are two possible codings for the size field we
664 # have to discriminate, see comment in tobuf() below.
665 if buf[124] != chr(0200):
666 tarinfo.size = long(buf[124:136], 8)
667 else:
668 tarinfo.size = 0L
669 for i in range(11):
670 tarinfo.size <<= 8
671 tarinfo.size += ord(buf[125 + i])
672
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000673 tarinfo.mtime = long(buf[136:148], 8)
674 tarinfo.chksum = int(buf[148:156], 8)
675 tarinfo.type = buf[156:157]
676 tarinfo.linkname = nts(buf[157:257])
677 tarinfo.uname = nts(buf[265:297])
678 tarinfo.gname = nts(buf[297:329])
679 try:
680 tarinfo.devmajor = int(buf[329:337], 8)
681 tarinfo.devminor = int(buf[337:345], 8)
682 except ValueError:
683 tarinfo.devmajor = tarinfo.devmajor = 0
Jack Jansen2b4b5a52003-04-22 22:03:11 +0000684 tarinfo.prefix = buf[345:500]
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000685
686 # The prefix field is used for filenames > 100 in
687 # the POSIX standard.
Jack Jansen2b4b5a52003-04-22 22:03:11 +0000688 # name = prefix + '/' + name
689 if tarinfo.type != GNUTYPE_SPARSE:
690 tarinfo.name = normpath(os.path.join(nts(tarinfo.prefix), tarinfo.name))
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000691
692 # Directory names should have a '/' at the end.
693 if tarinfo.isdir() and tarinfo.name[-1:] != "/":
694 tarinfo.name += "/"
695 return tarinfo
696
697 frombuf = classmethod(frombuf)
698
699 def tobuf(self):
700 """Return a tar header block as a 512 byte string.
701 """
Neal Norwitzd96d1012004-07-20 22:23:02 +0000702 # Prefer the size to be encoded as 11 octal ascii digits
703 # which is the most portable. If the size exceeds this
704 # limit (>= 8 GB), encode it as an 88-bit value which is
705 # a GNU tar feature.
706 if self.size <= MAXSIZE_MEMBER:
707 size = "%011o" % self.size
708 else:
709 s = self.size
710 size = ""
711 for i in range(11):
712 size = chr(s & 0377) + size
713 s >>= 8
714 size = chr(0200) + size
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000715
716 # The following code was contributed by Detlef Lannert.
717 parts = []
718 for value, fieldsize in (
Neal Norwitzd96d1012004-07-20 22:23:02 +0000719 (self.name, 100),
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000720 ("%07o" % (self.mode & 07777), 8),
721 ("%07o" % self.uid, 8),
722 ("%07o" % self.gid, 8),
Neal Norwitzd96d1012004-07-20 22:23:02 +0000723 (size, 12),
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000724 ("%011o" % self.mtime, 12),
725 (" ", 8),
726 (self.type, 1),
727 (self.linkname, 100),
728 (MAGIC, 6),
729 (VERSION, 2),
730 (self.uname, 32),
731 (self.gname, 32),
732 ("%07o" % self.devmajor, 8),
733 ("%07o" % self.devminor, 8),
734 (self.prefix, 155)
735 ):
736 l = len(value)
Andrew M. Kuchling864bba12004-07-10 22:02:11 +0000737 parts.append(value[:fieldsize] + (fieldsize - l) * NUL)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000738
739 buf = "".join(parts)
740 chksum = calc_chksum(buf)
741 buf = buf[:148] + "%06o\0" % chksum + buf[155:]
742 buf += (BLOCKSIZE - len(buf)) * NUL
743 self.buf = buf
744 return buf
745
746 def isreg(self):
747 return self.type in REGULAR_TYPES
748 def isfile(self):
749 return self.isreg()
750 def isdir(self):
751 return self.type == DIRTYPE
752 def issym(self):
753 return self.type == SYMTYPE
754 def islnk(self):
755 return self.type == LNKTYPE
756 def ischr(self):
757 return self.type == CHRTYPE
758 def isblk(self):
759 return self.type == BLKTYPE
760 def isfifo(self):
761 return self.type == FIFOTYPE
762 def issparse(self):
763 return self.type == GNUTYPE_SPARSE
764 def isdev(self):
765 return self.type in (CHRTYPE, BLKTYPE, FIFOTYPE)
766# class TarInfo
767
768class TarFile(object):
769 """The TarFile Class provides an interface to tar archives.
770 """
771
772 debug = 0 # May be set from 0 (no msgs) to 3 (all msgs)
773
774 dereference = False # If true, add content of linked file to the
775 # tar file, else the link.
776
777 ignore_zeros = False # If true, skips empty or invalid blocks and
778 # continues processing.
779
780 errorlevel = 0 # If 0, fatal errors only appear in debug
781 # messages (if debug >= 0). If > 0, errors
782 # are passed to the caller as exceptions.
783
Martin v. Löwis75b9da42004-08-18 13:57:44 +0000784 posix = False # If True, generates POSIX.1-1990-compliant
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000785 # archives (no GNU extensions!)
786
787 fileobject = ExFileObject
788
789 def __init__(self, name=None, mode="r", fileobj=None):
790 """Open an (uncompressed) tar archive `name'. `mode' is either 'r' to
791 read from an existing archive, 'a' to append data to an existing
792 file or 'w' to create a new file overwriting an existing one. `mode'
793 defaults to 'r'.
794 If `fileobj' is given, it is used for reading or writing data. If it
795 can be determined, `mode' is overridden by `fileobj's mode.
796 `fileobj' is not closed, when TarFile is closed.
797 """
798 self.name = name
799
800 if len(mode) > 1 or mode not in "raw":
801 raise ValueError, "mode must be 'r', 'a' or 'w'"
802 self._mode = mode
803 self.mode = {"r": "rb", "a": "r+b", "w": "wb"}[mode]
804
805 if not fileobj:
806 fileobj = file(self.name, self.mode)
807 self._extfileobj = False
808 else:
809 if self.name is None and hasattr(fileobj, "name"):
810 self.name = fileobj.name
811 if hasattr(fileobj, "mode"):
812 self.mode = fileobj.mode
813 self._extfileobj = True
814 self.fileobj = fileobj
815
816 # Init datastructures
817 self.closed = False
818 self.members = [] # list of members as TarInfo objects
819 self.membernames = [] # names of members
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000820 self._loaded = False # flag if all members have been read
821 self.offset = 0L # current position in the archive file
822 self.inodes = {} # dictionary caching the inodes of
823 # archive members already added
824
825 if self._mode == "r":
826 self.firstmember = None
827 self.firstmember = self.next()
828
829 if self._mode == "a":
830 # Move to the end of the archive,
831 # before the first empty block.
832 self.firstmember = None
833 while True:
834 try:
835 tarinfo = self.next()
836 except ReadError:
837 self.fileobj.seek(0)
838 break
839 if tarinfo is None:
840 self.fileobj.seek(- BLOCKSIZE, 1)
841 break
842
843 if self._mode in "aw":
844 self._loaded = True
845
846 #--------------------------------------------------------------------------
847 # Below are the classmethods which act as alternate constructors to the
848 # TarFile class. The open() method is the only one that is needed for
849 # public use; it is the "super"-constructor and is able to select an
850 # adequate "sub"-constructor for a particular compression using the mapping
851 # from OPEN_METH.
852 #
853 # This concept allows one to subclass TarFile without losing the comfort of
854 # the super-constructor. A sub-constructor is registered and made available
855 # by adding it to the mapping in OPEN_METH.
856
857 def open(cls, name=None, mode="r", fileobj=None, bufsize=20*512):
858 """Open a tar archive for reading, writing or appending. Return
859 an appropriate TarFile class.
860
861 mode:
862 'r' open for reading with transparent compression
863 'r:' open for reading exclusively uncompressed
864 'r:gz' open for reading with gzip compression
865 'r:bz2' open for reading with bzip2 compression
866 'a' or 'a:' open for appending
867 'w' or 'w:' open for writing without compression
868 'w:gz' open for writing with gzip compression
869 'w:bz2' open for writing with bzip2 compression
870 'r|' open an uncompressed stream of tar blocks for reading
871 'r|gz' open a gzip compressed stream of tar blocks
872 'r|bz2' open a bzip2 compressed stream of tar blocks
873 'w|' open an uncompressed stream for writing
874 'w|gz' open a gzip compressed stream for writing
875 'w|bz2' open a bzip2 compressed stream for writing
876 """
877
878 if not name and not fileobj:
879 raise ValueError, "nothing to open"
880
881 if ":" in mode:
882 filemode, comptype = mode.split(":", 1)
883 filemode = filemode or "r"
884 comptype = comptype or "tar"
885
886 # Select the *open() function according to
887 # given compression.
888 if comptype in cls.OPEN_METH:
889 func = getattr(cls, cls.OPEN_METH[comptype])
890 else:
891 raise CompressionError, "unknown compression type %r" % comptype
892 return func(name, filemode, fileobj)
893
894 elif "|" in mode:
895 filemode, comptype = mode.split("|", 1)
896 filemode = filemode or "r"
897 comptype = comptype or "tar"
898
899 if filemode not in "rw":
900 raise ValueError, "mode must be 'r' or 'w'"
901
902 t = cls(name, filemode,
903 _Stream(name, filemode, comptype, fileobj, bufsize))
904 t._extfileobj = False
905 return t
906
907 elif mode == "r":
908 # Find out which *open() is appropriate for opening the file.
909 for comptype in cls.OPEN_METH:
910 func = getattr(cls, cls.OPEN_METH[comptype])
911 try:
912 return func(name, "r", fileobj)
913 except (ReadError, CompressionError):
914 continue
915 raise ReadError, "file could not be opened successfully"
916
917 elif mode in "aw":
918 return cls.taropen(name, mode, fileobj)
919
920 raise ValueError, "undiscernible mode"
921
922 open = classmethod(open)
923
924 def taropen(cls, name, mode="r", fileobj=None):
925 """Open uncompressed tar archive name for reading or writing.
926 """
927 if len(mode) > 1 or mode not in "raw":
928 raise ValueError, "mode must be 'r', 'a' or 'w'"
929 return cls(name, mode, fileobj)
930
931 taropen = classmethod(taropen)
932
933 def gzopen(cls, name, mode="r", fileobj=None, compresslevel=9):
934 """Open gzip compressed tar archive name for reading or writing.
935 Appending is not allowed.
936 """
937 if len(mode) > 1 or mode not in "rw":
938 raise ValueError, "mode must be 'r' or 'w'"
939
940 try:
941 import gzip
Neal Norwitz4ec68242003-04-11 03:05:56 +0000942 gzip.GzipFile
943 except (ImportError, AttributeError):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000944 raise CompressionError, "gzip module is not available"
945
946 pre, ext = os.path.splitext(name)
947 pre = os.path.basename(pre)
948 if ext == ".tgz":
949 ext = ".tar"
950 if ext == ".gz":
951 ext = ""
952 tarname = pre + ext
953
954 if fileobj is None:
955 fileobj = file(name, mode + "b")
956
957 if mode != "r":
958 name = tarname
959
960 try:
961 t = cls.taropen(tarname, mode,
962 gzip.GzipFile(name, mode, compresslevel, fileobj)
963 )
964 except IOError:
965 raise ReadError, "not a gzip file"
966 t._extfileobj = False
967 return t
968
969 gzopen = classmethod(gzopen)
970
971 def bz2open(cls, name, mode="r", fileobj=None, compresslevel=9):
972 """Open bzip2 compressed tar archive name for reading or writing.
973 Appending is not allowed.
974 """
975 if len(mode) > 1 or mode not in "rw":
976 raise ValueError, "mode must be 'r' or 'w'."
977
978 try:
979 import bz2
980 except ImportError:
981 raise CompressionError, "bz2 module is not available"
982
983 pre, ext = os.path.splitext(name)
984 pre = os.path.basename(pre)
985 if ext == ".tbz2":
986 ext = ".tar"
987 if ext == ".bz2":
988 ext = ""
989 tarname = pre + ext
990
991 if fileobj is not None:
992 raise ValueError, "no support for external file objects"
993
994 try:
995 t = cls.taropen(tarname, mode, bz2.BZ2File(name, mode, compresslevel=compresslevel))
996 except IOError:
997 raise ReadError, "not a bzip2 file"
998 t._extfileobj = False
999 return t
1000
1001 bz2open = classmethod(bz2open)
1002
1003 # All *open() methods are registered here.
1004 OPEN_METH = {
1005 "tar": "taropen", # uncompressed tar
1006 "gz": "gzopen", # gzip compressed tar
1007 "bz2": "bz2open" # bzip2 compressed tar
1008 }
1009
1010 #--------------------------------------------------------------------------
1011 # The public methods which TarFile provides:
1012
1013 def close(self):
1014 """Close the TarFile. In write-mode, two finishing zero blocks are
1015 appended to the archive.
1016 """
1017 if self.closed:
1018 return
1019
1020 if self._mode in "aw":
1021 self.fileobj.write(NUL * (BLOCKSIZE * 2))
1022 self.offset += (BLOCKSIZE * 2)
1023 # fill up the end with zero-blocks
1024 # (like option -b20 for tar does)
1025 blocks, remainder = divmod(self.offset, RECORDSIZE)
1026 if remainder > 0:
1027 self.fileobj.write(NUL * (RECORDSIZE - remainder))
1028
1029 if not self._extfileobj:
1030 self.fileobj.close()
1031 self.closed = True
1032
1033 def getmember(self, name):
1034 """Return a TarInfo object for member `name'. If `name' can not be
1035 found in the archive, KeyError is raised. If a member occurs more
1036 than once in the archive, its last occurence is assumed to be the
1037 most up-to-date version.
1038 """
1039 self._check()
1040 if name not in self.membernames and not self._loaded:
1041 self._load()
1042 if name not in self.membernames:
1043 raise KeyError, "filename %r not found" % name
1044 return self._getmember(name)
1045
1046 def getmembers(self):
1047 """Return the members of the archive as a list of TarInfo objects. The
1048 list has the same order as the members in the archive.
1049 """
1050 self._check()
1051 if not self._loaded: # if we want to obtain a list of
1052 self._load() # all members, we first have to
1053 # scan the whole archive.
1054 return self.members
1055
1056 def getnames(self):
1057 """Return the members of the archive as a list of their names. It has
1058 the same order as the list returned by getmembers().
1059 """
1060 self._check()
1061 if not self._loaded:
1062 self._load()
1063 return self.membernames
1064
1065 def gettarinfo(self, name=None, arcname=None, fileobj=None):
1066 """Create a TarInfo object for either the file `name' or the file
1067 object `fileobj' (using os.fstat on its file descriptor). You can
1068 modify some of the TarInfo's attributes before you add it using
1069 addfile(). If given, `arcname' specifies an alternative name for the
1070 file in the archive.
1071 """
1072 self._check("aw")
1073
1074 # When fileobj is given, replace name by
1075 # fileobj's real name.
1076 if fileobj is not None:
1077 name = fileobj.name
1078
1079 # Building the name of the member in the archive.
1080 # Backward slashes are converted to forward slashes,
1081 # Absolute paths are turned to relative paths.
1082 if arcname is None:
1083 arcname = name
1084 arcname = normpath(arcname)
1085 drv, arcname = os.path.splitdrive(arcname)
1086 while arcname[0:1] == "/":
1087 arcname = arcname[1:]
1088
1089 # Now, fill the TarInfo object with
1090 # information specific for the file.
1091 tarinfo = TarInfo()
1092
1093 # Use os.stat or os.lstat, depending on platform
1094 # and if symlinks shall be resolved.
1095 if fileobj is None:
1096 if hasattr(os, "lstat") and not self.dereference:
1097 statres = os.lstat(name)
1098 else:
1099 statres = os.stat(name)
1100 else:
1101 statres = os.fstat(fileobj.fileno())
1102 linkname = ""
1103
1104 stmd = statres.st_mode
1105 if stat.S_ISREG(stmd):
1106 inode = (statres.st_ino, statres.st_dev)
1107 if inode in self.inodes and not self.dereference:
1108 # Is it a hardlink to an already
1109 # archived file?
1110 type = LNKTYPE
1111 linkname = self.inodes[inode]
1112 else:
1113 # The inode is added only if its valid.
1114 # For win32 it is always 0.
1115 type = REGTYPE
1116 if inode[0]:
1117 self.inodes[inode] = arcname
1118 elif stat.S_ISDIR(stmd):
1119 type = DIRTYPE
1120 if arcname[-1:] != "/":
1121 arcname += "/"
1122 elif stat.S_ISFIFO(stmd):
1123 type = FIFOTYPE
1124 elif stat.S_ISLNK(stmd):
1125 type = SYMTYPE
1126 linkname = os.readlink(name)
1127 elif stat.S_ISCHR(stmd):
1128 type = CHRTYPE
1129 elif stat.S_ISBLK(stmd):
1130 type = BLKTYPE
1131 else:
1132 return None
1133
1134 # Fill the TarInfo object with all
1135 # information we can get.
1136 tarinfo.name = arcname
1137 tarinfo.mode = stmd
1138 tarinfo.uid = statres.st_uid
1139 tarinfo.gid = statres.st_gid
Martin v. Löwis61d77e02004-08-20 06:35:46 +00001140 if stat.S_ISDIR(stmd):
1141 # For a directory, the size must be 0
1142 tarinfo.size = 0
1143 else:
1144 tarinfo.size = statres.st_size
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001145 tarinfo.mtime = statres.st_mtime
1146 tarinfo.type = type
1147 tarinfo.linkname = linkname
1148 if pwd:
1149 try:
1150 tarinfo.uname = pwd.getpwuid(tarinfo.uid)[0]
1151 except KeyError:
1152 pass
1153 if grp:
1154 try:
1155 tarinfo.gname = grp.getgrgid(tarinfo.gid)[0]
1156 except KeyError:
1157 pass
1158
1159 if type in (CHRTYPE, BLKTYPE):
1160 if hasattr(os, "major") and hasattr(os, "minor"):
1161 tarinfo.devmajor = os.major(statres.st_rdev)
1162 tarinfo.devminor = os.minor(statres.st_rdev)
1163 return tarinfo
1164
1165 def list(self, verbose=True):
1166 """Print a table of contents to sys.stdout. If `verbose' is False, only
1167 the names of the members are printed. If it is True, an `ls -l'-like
1168 output is produced.
1169 """
1170 self._check()
1171
1172 for tarinfo in self:
1173 if verbose:
1174 print filemode(tarinfo.mode),
1175 print "%s/%s" % (tarinfo.uname or tarinfo.uid,
1176 tarinfo.gname or tarinfo.gid),
1177 if tarinfo.ischr() or tarinfo.isblk():
1178 print "%10s" % ("%d,%d" \
1179 % (tarinfo.devmajor, tarinfo.devminor)),
1180 else:
1181 print "%10d" % tarinfo.size,
1182 print "%d-%02d-%02d %02d:%02d:%02d" \
1183 % time.localtime(tarinfo.mtime)[:6],
1184
1185 print tarinfo.name,
1186
1187 if verbose:
1188 if tarinfo.issym():
1189 print "->", tarinfo.linkname,
1190 if tarinfo.islnk():
1191 print "link to", tarinfo.linkname,
1192 print
1193
1194 def add(self, name, arcname=None, recursive=True):
1195 """Add the file `name' to the archive. `name' may be any type of file
1196 (directory, fifo, symbolic link, etc.). If given, `arcname'
1197 specifies an alternative name for the file in the archive.
1198 Directories are added recursively by default. This can be avoided by
1199 setting `recursive' to False.
1200 """
1201 self._check("aw")
1202
1203 if arcname is None:
1204 arcname = name
1205
1206 # Skip if somebody tries to archive the archive...
1207 if self.name is not None \
1208 and os.path.abspath(name) == os.path.abspath(self.name):
1209 self._dbg(2, "tarfile: Skipped %r" % name)
1210 return
1211
1212 # Special case: The user wants to add the current
1213 # working directory.
1214 if name == ".":
1215 if recursive:
1216 if arcname == ".":
1217 arcname = ""
1218 for f in os.listdir("."):
1219 self.add(f, os.path.join(arcname, f))
1220 return
1221
1222 self._dbg(1, name)
1223
1224 # Create a TarInfo object from the file.
1225 tarinfo = self.gettarinfo(name, arcname)
1226
1227 if tarinfo is None:
1228 self._dbg(1, "tarfile: Unsupported type %r" % name)
1229 return
1230
1231 # Append the tar header and data to the archive.
1232 if tarinfo.isreg():
1233 f = file(name, "rb")
1234 self.addfile(tarinfo, f)
1235 f.close()
1236
1237 if tarinfo.type in (LNKTYPE, SYMTYPE, FIFOTYPE, CHRTYPE, BLKTYPE):
1238 tarinfo.size = 0L
1239 self.addfile(tarinfo)
1240
1241 if tarinfo.isdir():
1242 self.addfile(tarinfo)
1243 if recursive:
1244 for f in os.listdir(name):
1245 self.add(os.path.join(name, f), os.path.join(arcname, f))
1246
1247 def addfile(self, tarinfo, fileobj=None):
1248 """Add the TarInfo object `tarinfo' to the archive. If `fileobj' is
1249 given, tarinfo.size bytes are read from it and added to the archive.
1250 You can create TarInfo objects using gettarinfo().
1251 On Windows platforms, `fileobj' should always be opened with mode
1252 'rb' to avoid irritation about the file size.
1253 """
1254 self._check("aw")
1255
1256 tarinfo.name = normpath(tarinfo.name)
1257 if tarinfo.isdir():
1258 # directories should end with '/'
1259 tarinfo.name += "/"
1260
1261 if tarinfo.linkname:
1262 tarinfo.linkname = normpath(tarinfo.linkname)
1263
1264 if tarinfo.size > MAXSIZE_MEMBER:
Neal Norwitzd96d1012004-07-20 22:23:02 +00001265 if self.posix:
1266 raise ValueError, "file is too large (>= 8 GB)"
1267 else:
1268 self._dbg(2, "tarfile: Created GNU tar largefile header")
1269
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001270
1271 if len(tarinfo.linkname) > LENGTH_LINK:
1272 if self.posix:
1273 raise ValueError, "linkname is too long (>%d)" \
1274 % (LENGTH_LINK)
1275 else:
1276 self._create_gnulong(tarinfo.linkname, GNUTYPE_LONGLINK)
1277 tarinfo.linkname = tarinfo.linkname[:LENGTH_LINK -1]
1278 self._dbg(2, "tarfile: Created GNU tar extension LONGLINK")
1279
1280 if len(tarinfo.name) > LENGTH_NAME:
1281 if self.posix:
1282 prefix = tarinfo.name[:LENGTH_PREFIX + 1]
1283 while prefix and prefix[-1] != "/":
Tim Peters2c60f7a2003-01-29 03:49:43 +00001284 prefix = prefix[:-1]
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001285
1286 name = tarinfo.name[len(prefix):]
1287 prefix = prefix[:-1]
1288
1289 if not prefix or len(name) > LENGTH_NAME:
1290 raise ValueError, "name is too long (>%d)" \
1291 % (LENGTH_NAME)
1292
1293 tarinfo.name = name
1294 tarinfo.prefix = prefix
1295 else:
1296 self._create_gnulong(tarinfo.name, GNUTYPE_LONGNAME)
1297 tarinfo.name = tarinfo.name[:LENGTH_NAME - 1]
1298 self._dbg(2, "tarfile: Created GNU tar extension LONGNAME")
1299
1300 self.fileobj.write(tarinfo.tobuf())
1301 self.offset += BLOCKSIZE
1302
1303 # If there's data to follow, append it.
1304 if fileobj is not None:
1305 copyfileobj(fileobj, self.fileobj, tarinfo.size)
1306 blocks, remainder = divmod(tarinfo.size, BLOCKSIZE)
1307 if remainder > 0:
1308 self.fileobj.write(NUL * (BLOCKSIZE - remainder))
1309 blocks += 1
1310 self.offset += blocks * BLOCKSIZE
1311
Andrew M. Kuchling864bba12004-07-10 22:02:11 +00001312 self._record_member(tarinfo)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001313
1314 def extract(self, member, path=""):
1315 """Extract a member from the archive to the current working directory,
1316 using its full name. Its file information is extracted as accurately
1317 as possible. `member' may be a filename or a TarInfo object. You can
1318 specify a different directory using `path'.
1319 """
1320 self._check("r")
1321
1322 if isinstance(member, TarInfo):
1323 tarinfo = member
1324 else:
1325 tarinfo = self.getmember(member)
1326
Neal Norwitza4f651a2004-07-20 22:07:44 +00001327 # Prepare the link target for makelink().
1328 if tarinfo.islnk():
1329 tarinfo._link_target = os.path.join(path, tarinfo.linkname)
1330
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001331 try:
1332 self._extract_member(tarinfo, os.path.join(path, tarinfo.name))
1333 except EnvironmentError, e:
1334 if self.errorlevel > 0:
1335 raise
1336 else:
1337 if e.filename is None:
1338 self._dbg(1, "tarfile: %s" % e.strerror)
1339 else:
1340 self._dbg(1, "tarfile: %s %r" % (e.strerror, e.filename))
1341 except ExtractError, e:
1342 if self.errorlevel > 1:
1343 raise
1344 else:
1345 self._dbg(1, "tarfile: %s" % e)
1346
1347 def extractfile(self, member):
1348 """Extract a member from the archive as a file object. `member' may be
1349 a filename or a TarInfo object. If `member' is a regular file, a
1350 file-like object is returned. If `member' is a link, a file-like
1351 object is constructed from the link's target. If `member' is none of
1352 the above, None is returned.
1353 The file-like object is read-only and provides the following
1354 methods: read(), readline(), readlines(), seek() and tell()
1355 """
1356 self._check("r")
1357
1358 if isinstance(member, TarInfo):
1359 tarinfo = member
1360 else:
1361 tarinfo = self.getmember(member)
1362
1363 if tarinfo.isreg():
1364 return self.fileobject(self, tarinfo)
1365
1366 elif tarinfo.type not in SUPPORTED_TYPES:
1367 # If a member's type is unknown, it is treated as a
1368 # regular file.
1369 return self.fileobject(self, tarinfo)
1370
1371 elif tarinfo.islnk() or tarinfo.issym():
1372 if isinstance(self.fileobj, _Stream):
1373 # A small but ugly workaround for the case that someone tries
1374 # to extract a (sym)link as a file-object from a non-seekable
1375 # stream of tar blocks.
1376 raise StreamError, "cannot extract (sym)link as file object"
1377 else:
1378 # A (sym)link's file object is it's target's file object.
1379 return self.extractfile(self._getmember(tarinfo.linkname,
1380 tarinfo))
1381 else:
1382 # If there's no data associated with the member (directory, chrdev,
1383 # blkdev, etc.), return None instead of a file object.
1384 return None
1385
1386 def _extract_member(self, tarinfo, targetpath):
1387 """Extract the TarInfo object tarinfo to a physical
1388 file called targetpath.
1389 """
1390 # Fetch the TarInfo object for the given name
1391 # and build the destination pathname, replacing
1392 # forward slashes to platform specific separators.
1393 if targetpath[-1:] == "/":
1394 targetpath = targetpath[:-1]
1395 targetpath = os.path.normpath(targetpath)
1396
1397 # Create all upper directories.
1398 upperdirs = os.path.dirname(targetpath)
1399 if upperdirs and not os.path.exists(upperdirs):
1400 ti = TarInfo()
1401 ti.name = upperdirs
1402 ti.type = DIRTYPE
1403 ti.mode = 0777
1404 ti.mtime = tarinfo.mtime
1405 ti.uid = tarinfo.uid
1406 ti.gid = tarinfo.gid
1407 ti.uname = tarinfo.uname
1408 ti.gname = tarinfo.gname
1409 try:
1410 self._extract_member(ti, ti.name)
1411 except:
1412 pass
1413
1414 if tarinfo.islnk() or tarinfo.issym():
1415 self._dbg(1, "%s -> %s" % (tarinfo.name, tarinfo.linkname))
1416 else:
1417 self._dbg(1, tarinfo.name)
1418
1419 if tarinfo.isreg():
1420 self.makefile(tarinfo, targetpath)
1421 elif tarinfo.isdir():
1422 self.makedir(tarinfo, targetpath)
1423 elif tarinfo.isfifo():
1424 self.makefifo(tarinfo, targetpath)
1425 elif tarinfo.ischr() or tarinfo.isblk():
1426 self.makedev(tarinfo, targetpath)
1427 elif tarinfo.islnk() or tarinfo.issym():
1428 self.makelink(tarinfo, targetpath)
1429 elif tarinfo.type not in SUPPORTED_TYPES:
1430 self.makeunknown(tarinfo, targetpath)
1431 else:
1432 self.makefile(tarinfo, targetpath)
1433
1434 self.chown(tarinfo, targetpath)
1435 if not tarinfo.issym():
1436 self.chmod(tarinfo, targetpath)
1437 self.utime(tarinfo, targetpath)
1438
1439 #--------------------------------------------------------------------------
1440 # Below are the different file methods. They are called via
1441 # _extract_member() when extract() is called. They can be replaced in a
1442 # subclass to implement other functionality.
1443
1444 def makedir(self, tarinfo, targetpath):
1445 """Make a directory called targetpath.
1446 """
1447 try:
1448 os.mkdir(targetpath)
1449 except EnvironmentError, e:
1450 if e.errno != errno.EEXIST:
1451 raise
1452
1453 def makefile(self, tarinfo, targetpath):
1454 """Make a file called targetpath.
1455 """
1456 source = self.extractfile(tarinfo)
1457 target = file(targetpath, "wb")
1458 copyfileobj(source, target)
1459 source.close()
1460 target.close()
1461
1462 def makeunknown(self, tarinfo, targetpath):
1463 """Make a file from a TarInfo object with an unknown type
1464 at targetpath.
1465 """
1466 self.makefile(tarinfo, targetpath)
1467 self._dbg(1, "tarfile: Unknown file type %r, " \
1468 "extracted as regular file." % tarinfo.type)
1469
1470 def makefifo(self, tarinfo, targetpath):
1471 """Make a fifo called targetpath.
1472 """
1473 if hasattr(os, "mkfifo"):
1474 os.mkfifo(targetpath)
1475 else:
1476 raise ExtractError, "fifo not supported by system"
1477
1478 def makedev(self, tarinfo, targetpath):
1479 """Make a character or block device called targetpath.
1480 """
1481 if not hasattr(os, "mknod") or not hasattr(os, "makedev"):
1482 raise ExtractError, "special devices not supported by system"
1483
1484 mode = tarinfo.mode
1485 if tarinfo.isblk():
1486 mode |= stat.S_IFBLK
1487 else:
1488 mode |= stat.S_IFCHR
1489
1490 os.mknod(targetpath, mode,
1491 os.makedev(tarinfo.devmajor, tarinfo.devminor))
1492
1493 def makelink(self, tarinfo, targetpath):
1494 """Make a (symbolic) link called targetpath. If it cannot be created
1495 (platform limitation), we try to make a copy of the referenced file
1496 instead of a link.
1497 """
1498 linkpath = tarinfo.linkname
1499 try:
1500 if tarinfo.issym():
1501 os.symlink(linkpath, targetpath)
1502 else:
Neal Norwitza4f651a2004-07-20 22:07:44 +00001503 # See extract().
1504 os.link(tarinfo._link_target, targetpath)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001505 except AttributeError:
1506 if tarinfo.issym():
1507 linkpath = os.path.join(os.path.dirname(tarinfo.name),
1508 linkpath)
1509 linkpath = normpath(linkpath)
1510
1511 try:
1512 self._extract_member(self.getmember(linkpath), targetpath)
1513 except (EnvironmentError, KeyError), e:
1514 linkpath = os.path.normpath(linkpath)
1515 try:
1516 shutil.copy2(linkpath, targetpath)
1517 except EnvironmentError, e:
1518 raise IOError, "link could not be created"
1519
1520 def chown(self, tarinfo, targetpath):
1521 """Set owner of targetpath according to tarinfo.
1522 """
1523 if pwd and hasattr(os, "geteuid") and os.geteuid() == 0:
1524 # We have to be root to do so.
1525 try:
1526 g = grp.getgrnam(tarinfo.gname)[2]
1527 except KeyError:
1528 try:
1529 g = grp.getgrgid(tarinfo.gid)[2]
1530 except KeyError:
1531 g = os.getgid()
1532 try:
1533 u = pwd.getpwnam(tarinfo.uname)[2]
1534 except KeyError:
1535 try:
1536 u = pwd.getpwuid(tarinfo.uid)[2]
1537 except KeyError:
1538 u = os.getuid()
1539 try:
1540 if tarinfo.issym() and hasattr(os, "lchown"):
1541 os.lchown(targetpath, u, g)
1542 else:
Andrew MacIntyre7970d202003-02-19 12:51:34 +00001543 if sys.platform != "os2emx":
1544 os.chown(targetpath, u, g)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001545 except EnvironmentError, e:
1546 raise ExtractError, "could not change owner"
1547
1548 def chmod(self, tarinfo, targetpath):
1549 """Set file permissions of targetpath according to tarinfo.
1550 """
Jack Jansen834eff62003-03-07 12:47:06 +00001551 if hasattr(os, 'chmod'):
1552 try:
1553 os.chmod(targetpath, tarinfo.mode)
1554 except EnvironmentError, e:
1555 raise ExtractError, "could not change mode"
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001556
1557 def utime(self, tarinfo, targetpath):
1558 """Set modification time of targetpath according to tarinfo.
1559 """
Jack Jansen834eff62003-03-07 12:47:06 +00001560 if not hasattr(os, 'utime'):
Tim Petersf9347782003-03-07 15:36:41 +00001561 return
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001562 if sys.platform == "win32" and tarinfo.isdir():
1563 # According to msdn.microsoft.com, it is an error (EACCES)
1564 # to use utime() on directories.
1565 return
1566 try:
1567 os.utime(targetpath, (tarinfo.mtime, tarinfo.mtime))
1568 except EnvironmentError, e:
1569 raise ExtractError, "could not change modification time"
1570
1571 #--------------------------------------------------------------------------
1572
1573 def next(self):
1574 """Return the next member of the archive as a TarInfo object, when
1575 TarFile is opened for reading. Return None if there is no more
1576 available.
1577 """
1578 self._check("ra")
1579 if self.firstmember is not None:
1580 m = self.firstmember
1581 self.firstmember = None
1582 return m
1583
1584 # Read the next block.
Andrew M. Kuchling864bba12004-07-10 22:02:11 +00001585 self.fileobj.seek(self.offset)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001586 while True:
1587 buf = self.fileobj.read(BLOCKSIZE)
1588 if not buf:
1589 return None
1590 try:
1591 tarinfo = TarInfo.frombuf(buf)
1592 except ValueError:
1593 if self.ignore_zeros:
1594 if buf.count(NUL) == BLOCKSIZE:
1595 adj = "empty"
1596 else:
1597 adj = "invalid"
1598 self._dbg(2, "0x%X: %s block" % (self.offset, adj))
1599 self.offset += BLOCKSIZE
1600 continue
1601 else:
1602 # Block is empty or unreadable.
Andrew M. Kuchling864bba12004-07-10 22:02:11 +00001603 if self.offset == 0:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001604 # If the first block is invalid. That does not
1605 # look like a tar archive we can handle.
1606 raise ReadError,"empty, unreadable or compressed file"
1607 return None
1608 break
1609
1610 # We shouldn't rely on this checksum, because some tar programs
1611 # calculate it differently and it is merely validating the
1612 # header block. We could just as well skip this part, which would
1613 # have a slight effect on performance...
1614 if tarinfo.chksum != calc_chksum(buf):
1615 self._dbg(1, "tarfile: Bad Checksum %r" % tarinfo.name)
1616
1617 # Set the TarInfo object's offset to the current position of the
1618 # TarFile and set self.offset to the position where the data blocks
1619 # should begin.
1620 tarinfo.offset = self.offset
1621 self.offset += BLOCKSIZE
1622
1623 # Check if the TarInfo object has a typeflag for which a callback
1624 # method is registered in the TYPE_METH. If so, then call it.
1625 if tarinfo.type in self.TYPE_METH:
Andrew M. Kuchling864bba12004-07-10 22:02:11 +00001626 return self.TYPE_METH[tarinfo.type](self, tarinfo)
1627
1628 tarinfo.offset_data = self.offset
1629 if tarinfo.isreg() or tarinfo.type not in SUPPORTED_TYPES:
1630 # Skip the following data blocks.
1631 self.offset += self._block(tarinfo.size)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001632
1633 if tarinfo.isreg() and tarinfo.name[:-1] == "/":
1634 # some old tar programs don't know DIRTYPE
1635 tarinfo.type = DIRTYPE
1636
Andrew M. Kuchling864bba12004-07-10 22:02:11 +00001637 self._record_member(tarinfo)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001638 return tarinfo
1639
1640 #--------------------------------------------------------------------------
1641 # Below are some methods which are called for special typeflags in the
1642 # next() method, e.g. for unwrapping GNU longname/longlink blocks. They
1643 # are registered in TYPE_METH below. You can register your own methods
1644 # with this mapping.
1645 # A registered method is called with a TarInfo object as only argument.
1646 #
1647 # During its execution the method MUST perform the following tasks:
1648 # 1. set tarinfo.offset_data to the position where the data blocks begin,
1649 # if there is data to follow.
1650 # 2. set self.offset to the position where the next member's header will
1651 # begin.
Andrew M. Kuchling864bba12004-07-10 22:02:11 +00001652 # 3. call self._record_member() if the tarinfo object is supposed to
1653 # appear as a member of the TarFile object.
1654 # 4. return tarinfo or another valid TarInfo object.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001655
1656 def proc_gnulong(self, tarinfo):
1657 """Evaluate the blocks that hold a GNU longname
1658 or longlink member.
1659 """
1660 buf = ""
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001661 count = tarinfo.size
1662 while count > 0:
1663 block = self.fileobj.read(BLOCKSIZE)
1664 buf += block
1665 self.offset += BLOCKSIZE
1666 count -= BLOCKSIZE
1667
Andrew M. Kuchling864bba12004-07-10 22:02:11 +00001668 # Fetch the next header
1669 next = self.next()
1670
1671 next.offset = tarinfo.offset
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001672 if tarinfo.type == GNUTYPE_LONGNAME:
Andrew M. Kuchling864bba12004-07-10 22:02:11 +00001673 next.name = nts(buf)
1674 elif tarinfo.type == GNUTYPE_LONGLINK:
1675 next.linkname = nts(buf)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001676
Andrew M. Kuchling864bba12004-07-10 22:02:11 +00001677 return next
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001678
1679 def proc_sparse(self, tarinfo):
1680 """Analyze a GNU sparse header plus extra headers.
1681 """
1682 buf = tarinfo.tobuf()
1683 sp = _ringbuffer()
1684 pos = 386
1685 lastpos = 0L
1686 realpos = 0L
1687 # There are 4 possible sparse structs in the
1688 # first header.
1689 for i in xrange(4):
1690 try:
1691 offset = int(buf[pos:pos + 12], 8)
1692 numbytes = int(buf[pos + 12:pos + 24], 8)
1693 except ValueError:
1694 break
1695 if offset > lastpos:
1696 sp.append(_hole(lastpos, offset - lastpos))
1697 sp.append(_data(offset, numbytes, realpos))
1698 realpos += numbytes
1699 lastpos = offset + numbytes
1700 pos += 24
1701
1702 isextended = ord(buf[482])
1703 origsize = int(buf[483:495], 8)
1704
1705 # If the isextended flag is given,
1706 # there are extra headers to process.
1707 while isextended == 1:
1708 buf = self.fileobj.read(BLOCKSIZE)
1709 self.offset += BLOCKSIZE
1710 pos = 0
1711 for i in xrange(21):
1712 try:
1713 offset = int(buf[pos:pos + 12], 8)
1714 numbytes = int(buf[pos + 12:pos + 24], 8)
1715 except ValueError:
1716 break
1717 if offset > lastpos:
1718 sp.append(_hole(lastpos, offset - lastpos))
1719 sp.append(_data(offset, numbytes, realpos))
1720 realpos += numbytes
1721 lastpos = offset + numbytes
1722 pos += 24
1723 isextended = ord(buf[504])
1724
1725 if lastpos < origsize:
1726 sp.append(_hole(lastpos, origsize - lastpos))
1727
1728 tarinfo.sparse = sp
1729
1730 tarinfo.offset_data = self.offset
1731 self.offset += self._block(tarinfo.size)
1732 tarinfo.size = origsize
Andrew M. Kuchling864bba12004-07-10 22:02:11 +00001733
1734 self._record_member(tarinfo)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001735 return tarinfo
1736
1737 # The type mapping for the next() method. The keys are single character
1738 # strings, the typeflag. The values are methods which are called when
1739 # next() encounters such a typeflag.
1740 TYPE_METH = {
1741 GNUTYPE_LONGNAME: proc_gnulong,
1742 GNUTYPE_LONGLINK: proc_gnulong,
1743 GNUTYPE_SPARSE: proc_sparse
1744 }
1745
1746 #--------------------------------------------------------------------------
1747 # Little helper methods:
1748
1749 def _block(self, count):
1750 """Round up a byte count by BLOCKSIZE and return it,
1751 e.g. _block(834) => 1024.
1752 """
1753 blocks, remainder = divmod(count, BLOCKSIZE)
1754 if remainder:
1755 blocks += 1
1756 return blocks * BLOCKSIZE
1757
1758 def _getmember(self, name, tarinfo=None):
1759 """Find an archive member by name from bottom to top.
1760 If tarinfo is given, it is used as the starting point.
1761 """
1762 if tarinfo is None:
1763 end = len(self.members)
1764 else:
1765 end = self.members.index(tarinfo)
1766
1767 for i in xrange(end - 1, -1, -1):
1768 if name == self.membernames[i]:
1769 return self.members[i]
1770
Andrew M. Kuchling864bba12004-07-10 22:02:11 +00001771 def _record_member(self, tarinfo):
1772 """Record a tarinfo object in the internal datastructures.
1773 """
1774 self.members.append(tarinfo)
1775 self.membernames.append(tarinfo.name)
1776
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001777 def _load(self):
1778 """Read through the entire archive file and look for readable
1779 members.
1780 """
1781 while True:
1782 tarinfo = self.next()
1783 if tarinfo is None:
1784 break
1785 self._loaded = True
1786
1787 def _check(self, mode=None):
1788 """Check if TarFile is still open, and if the operation's mode
1789 corresponds to TarFile's mode.
1790 """
1791 if self.closed:
1792 raise IOError, "%s is closed" % self.__class__.__name__
1793 if mode is not None and self._mode not in mode:
1794 raise IOError, "bad operation for mode %r" % self._mode
1795
1796 def __iter__(self):
1797 """Provide an iterator object.
1798 """
1799 if self._loaded:
1800 return iter(self.members)
1801 else:
1802 return TarIter(self)
1803
1804 def _create_gnulong(self, name, type):
1805 """Write a GNU longname/longlink member to the TarFile.
1806 It consists of an extended tar header, with the length
1807 of the longname as size, followed by data blocks,
1808 which contain the longname as a null terminated string.
1809 """
Neal Norwitz0662f8a2004-07-20 21:54:18 +00001810 name += NUL
1811
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001812 tarinfo = TarInfo()
1813 tarinfo.name = "././@LongLink"
1814 tarinfo.type = type
1815 tarinfo.mode = 0
1816 tarinfo.size = len(name)
1817
1818 # write extended header
1819 self.fileobj.write(tarinfo.tobuf())
Neal Norwitz0662f8a2004-07-20 21:54:18 +00001820 self.offset += BLOCKSIZE
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001821 # write name blocks
1822 self.fileobj.write(name)
1823 blocks, remainder = divmod(tarinfo.size, BLOCKSIZE)
1824 if remainder > 0:
1825 self.fileobj.write(NUL * (BLOCKSIZE - remainder))
1826 blocks += 1
1827 self.offset += blocks * BLOCKSIZE
1828
1829 def _dbg(self, level, msg):
1830 """Write debugging output to sys.stderr.
1831 """
1832 if level <= self.debug:
1833 print >> sys.stderr, msg
1834# class TarFile
1835
1836class TarIter:
1837 """Iterator Class.
1838
1839 for tarinfo in TarFile(...):
1840 suite...
1841 """
1842
1843 def __init__(self, tarfile):
1844 """Construct a TarIter object.
1845 """
1846 self.tarfile = tarfile
1847 def __iter__(self):
1848 """Return iterator object.
1849 """
1850 return self
1851 def next(self):
1852 """Return the next item using TarFile's next() method.
1853 When all members have been read, set TarFile as _loaded.
1854 """
1855 tarinfo = self.tarfile.next()
1856 if not tarinfo:
1857 self.tarfile._loaded = True
1858 raise StopIteration
1859 return tarinfo
1860
1861# Helper classes for sparse file support
1862class _section:
1863 """Base class for _data and _hole.
1864 """
1865 def __init__(self, offset, size):
1866 self.offset = offset
1867 self.size = size
1868 def __contains__(self, offset):
1869 return self.offset <= offset < self.offset + self.size
1870
1871class _data(_section):
1872 """Represent a data section in a sparse file.
1873 """
1874 def __init__(self, offset, size, realpos):
1875 _section.__init__(self, offset, size)
1876 self.realpos = realpos
1877
1878class _hole(_section):
1879 """Represent a hole section in a sparse file.
1880 """
1881 pass
1882
1883class _ringbuffer(list):
1884 """Ringbuffer class which increases performance
1885 over a regular list.
1886 """
1887 def __init__(self):
1888 self.idx = 0
1889 def find(self, offset):
1890 idx = self.idx
1891 while True:
1892 item = self[idx]
1893 if offset in item:
1894 break
1895 idx += 1
1896 if idx == len(self):
1897 idx = 0
1898 if idx == self.idx:
1899 # End of File
1900 return None
1901 self.idx = idx
1902 return item
1903
1904#---------------------------------------------
1905# zipfile compatible TarFile class
1906#---------------------------------------------
1907TAR_PLAIN = 0 # zipfile.ZIP_STORED
1908TAR_GZIPPED = 8 # zipfile.ZIP_DEFLATED
1909class TarFileCompat:
1910 """TarFile class compatible with standard module zipfile's
1911 ZipFile class.
1912 """
1913 def __init__(self, file, mode="r", compression=TAR_PLAIN):
1914 if compression == TAR_PLAIN:
1915 self.tarfile = TarFile.taropen(file, mode)
1916 elif compression == TAR_GZIPPED:
1917 self.tarfile = TarFile.gzopen(file, mode)
1918 else:
1919 raise ValueError, "unknown compression constant"
1920 if mode[0:1] == "r":
1921 members = self.tarfile.getmembers()
1922 for i in xrange(len(members)):
1923 m = members[i]
1924 m.filename = m.name
1925 m.file_size = m.size
1926 m.date_time = time.gmtime(m.mtime)[:6]
1927 def namelist(self):
1928 return map(lambda m: m.name, self.infolist())
1929 def infolist(self):
1930 return filter(lambda m: m.type in REGULAR_TYPES,
1931 self.tarfile.getmembers())
1932 def printdir(self):
1933 self.tarfile.list()
1934 def testzip(self):
1935 return
1936 def getinfo(self, name):
1937 return self.tarfile.getmember(name)
1938 def read(self, name):
1939 return self.tarfile.extractfile(self.tarfile.getmember(name)).read()
1940 def write(self, filename, arcname=None, compress_type=None):
1941 self.tarfile.add(filename, arcname)
1942 def writestr(self, zinfo, bytes):
1943 import StringIO
1944 import calendar
1945 zinfo.name = zinfo.filename
1946 zinfo.size = zinfo.file_size
1947 zinfo.mtime = calendar.timegm(zinfo.date_time)
1948 self.tarfile.addfile(zinfo, StringIO.StringIO(bytes))
1949 def close(self):
1950 self.tarfile.close()
1951#class TarFileCompat
1952
1953#--------------------
1954# exported functions
1955#--------------------
1956def is_tarfile(name):
1957 """Return True if name points to a tar archive that we
1958 are able to handle, else return False.
1959 """
1960 try:
1961 t = open(name)
1962 t.close()
1963 return True
1964 except TarError:
1965 return False
1966
1967open = TarFile.open