blob: 64d006deab4231d12804a14b529fe21375869cda [file] [log] [blame]
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001#!/usr/bin/env python
2# -*- coding: iso-8859-1 -*-
3#-------------------------------------------------------------------
4# tarfile.py
5#-------------------------------------------------------------------
6# Copyright (C) 2002 Lars Gustäbel <lars@gustaebel.de>
7# All rights reserved.
8#
9# Permission is hereby granted, free of charge, to any person
10# obtaining a copy of this software and associated documentation
11# files (the "Software"), to deal in the Software without
12# restriction, including without limitation the rights to use,
13# copy, modify, merge, publish, distribute, sublicense, and/or sell
14# copies of the Software, and to permit persons to whom the
15# Software is furnished to do so, subject to the following
16# conditions:
17#
18# The above copyright notice and this permission notice shall be
19# included in all copies or substantial portions of the Software.
20#
21# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
22# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
23# OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
24# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
25# HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
26# WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
27# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
28# OTHER DEALINGS IN THE SOFTWARE.
29#
30"""Read from and write to tar format archives.
31"""
32
33__version__ = "$Revision$"
34# $Source$
35
36version = "0.6.4"
37__author__ = "Lars Gustäbel (lars@gustaebel.de)"
38__date__ = "$Date$"
39__cvsid__ = "$Id$"
40__credits__ = "Gustavo Niemeyer, Niels Gustäbel, Richard Townsend."
41
42#---------
43# Imports
44#---------
45import sys
46import os
47import shutil
48import stat
49import errno
50import time
51import struct
52
Jack Jansencfc49022003-03-07 13:37:32 +000053if sys.platform == 'mac':
54 # This module needs work for MacOS9, especially in the area of pathname
55 # handling. In many places it is assumed a simple substitution of / by the
56 # local os.path.sep is good enough to convert pathnames, but this does not
57 # work with the mac rooted:path:name versus :nonrooted:path:name syntax
58 raise ImportError, "tarfile does not work for platform==mac"
59
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +000060try:
61 import grp, pwd
62except ImportError:
63 grp = pwd = None
64
65# from tarfile import *
66__all__ = ["TarFile", "TarInfo", "is_tarfile", "TarError"]
67
68#---------------------------------------------------------
69# tar constants
70#---------------------------------------------------------
71NUL = "\0" # the null character
72BLOCKSIZE = 512 # length of processing blocks
73RECORDSIZE = BLOCKSIZE * 20 # length of records
74MAGIC = "ustar" # magic tar string
75VERSION = "00" # version number
76
77LENGTH_NAME = 100 # maximum length of a filename
78LENGTH_LINK = 100 # maximum length of a linkname
79LENGTH_PREFIX = 155 # maximum length of the prefix field
80MAXSIZE_MEMBER = 077777777777L # maximum size of a file (11 octal digits)
81
82REGTYPE = "0" # regular file
83AREGTYPE = "\0" # regular file
84LNKTYPE = "1" # link (inside tarfile)
85SYMTYPE = "2" # symbolic link
86CHRTYPE = "3" # character special device
87BLKTYPE = "4" # block special device
88DIRTYPE = "5" # directory
89FIFOTYPE = "6" # fifo special device
90CONTTYPE = "7" # contiguous file
91
92GNUTYPE_LONGNAME = "L" # GNU tar extension for longnames
93GNUTYPE_LONGLINK = "K" # GNU tar extension for longlink
94GNUTYPE_SPARSE = "S" # GNU tar extension for sparse file
95
96#---------------------------------------------------------
97# tarfile constants
98#---------------------------------------------------------
99SUPPORTED_TYPES = (REGTYPE, AREGTYPE, LNKTYPE, # file types that tarfile
100 SYMTYPE, DIRTYPE, FIFOTYPE, # can cope with.
101 CONTTYPE, CHRTYPE, BLKTYPE,
102 GNUTYPE_LONGNAME, GNUTYPE_LONGLINK,
103 GNUTYPE_SPARSE)
104
105REGULAR_TYPES = (REGTYPE, AREGTYPE, # file types that somehow
106 CONTTYPE, GNUTYPE_SPARSE) # represent regular files
107
108#---------------------------------------------------------
109# Bits used in the mode field, values in octal.
110#---------------------------------------------------------
111S_IFLNK = 0120000 # symbolic link
112S_IFREG = 0100000 # regular file
113S_IFBLK = 0060000 # block device
114S_IFDIR = 0040000 # directory
115S_IFCHR = 0020000 # character device
116S_IFIFO = 0010000 # fifo
117
118TSUID = 04000 # set UID on execution
119TSGID = 02000 # set GID on execution
120TSVTX = 01000 # reserved
121
122TUREAD = 0400 # read by owner
123TUWRITE = 0200 # write by owner
124TUEXEC = 0100 # execute/search by owner
125TGREAD = 0040 # read by group
126TGWRITE = 0020 # write by group
127TGEXEC = 0010 # execute/search by group
128TOREAD = 0004 # read by other
129TOWRITE = 0002 # write by other
130TOEXEC = 0001 # execute/search by other
131
132#---------------------------------------------------------
133# Some useful functions
134#---------------------------------------------------------
135def nts(s):
136 """Convert a null-terminated string buffer to a python string.
137 """
Andrew M. Kuchling864bba12004-07-10 22:02:11 +0000138 return s.rstrip(NUL)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000139
140def calc_chksum(buf):
141 """Calculate the checksum for a member's header. It's a simple addition
142 of all bytes, treating the chksum field as if filled with spaces.
143 buf is a 512 byte long string buffer which holds the header.
144 """
145 chk = 256 # chksum field is treated as blanks,
146 # so the initial value is 8 * ord(" ")
147 for c in buf[:148]: chk += ord(c) # sum up all bytes before chksum
148 for c in buf[156:]: chk += ord(c) # sum up all bytes after chksum
149 return chk
150
151def copyfileobj(src, dst, length=None):
152 """Copy length bytes from fileobj src to fileobj dst.
153 If length is None, copy the entire content.
154 """
155 if length == 0:
156 return
157 if length is None:
158 shutil.copyfileobj(src, dst)
159 return
160
161 BUFSIZE = 16 * 1024
162 blocks, remainder = divmod(length, BUFSIZE)
163 for b in xrange(blocks):
164 buf = src.read(BUFSIZE)
165 if len(buf) < BUFSIZE:
166 raise IOError, "end of file reached"
167 dst.write(buf)
168
169 if remainder != 0:
170 buf = src.read(remainder)
171 if len(buf) < remainder:
172 raise IOError, "end of file reached"
173 dst.write(buf)
174 return
175
176filemode_table = (
177 (S_IFLNK, "l",
178 S_IFREG, "-",
179 S_IFBLK, "b",
180 S_IFDIR, "d",
181 S_IFCHR, "c",
182 S_IFIFO, "p"),
183 (TUREAD, "r"),
184 (TUWRITE, "w"),
185 (TUEXEC, "x", TSUID, "S", TUEXEC|TSUID, "s"),
186 (TGREAD, "r"),
187 (TGWRITE, "w"),
188 (TGEXEC, "x", TSGID, "S", TGEXEC|TSGID, "s"),
189 (TOREAD, "r"),
190 (TOWRITE, "w"),
191 (TOEXEC, "x", TSVTX, "T", TOEXEC|TSVTX, "t"))
192
193def filemode(mode):
194 """Convert a file's mode to a string of the form
195 -rwxrwxrwx.
196 Used by TarFile.list()
197 """
198 s = ""
199 for t in filemode_table:
200 while True:
201 if mode & t[0] == t[0]:
202 s += t[1]
203 elif len(t) > 2:
204 t = t[2:]
205 continue
206 else:
207 s += "-"
208 break
209 return s
210
211if os.sep != "/":
212 normpath = lambda path: os.path.normpath(path).replace(os.sep, "/")
213else:
214 normpath = os.path.normpath
215
216class TarError(Exception):
217 """Base exception."""
218 pass
219class ExtractError(TarError):
220 """General exception for extract errors."""
221 pass
222class ReadError(TarError):
223 """Exception for unreadble tar archives."""
224 pass
225class CompressionError(TarError):
226 """Exception for unavailable compression methods."""
227 pass
228class StreamError(TarError):
229 """Exception for unsupported operations on stream-like TarFiles."""
230 pass
231
232#---------------------------
233# internal stream interface
234#---------------------------
235class _LowLevelFile:
236 """Low-level file object. Supports reading and writing.
237 It is used instead of a regular file object for streaming
238 access.
239 """
240
241 def __init__(self, name, mode):
242 mode = {
243 "r": os.O_RDONLY,
244 "w": os.O_WRONLY | os.O_CREAT | os.O_TRUNC,
245 }[mode]
246 if hasattr(os, "O_BINARY"):
247 mode |= os.O_BINARY
248 self.fd = os.open(name, mode)
249
250 def close(self):
251 os.close(self.fd)
252
253 def read(self, size):
254 return os.read(self.fd, size)
255
256 def write(self, s):
257 os.write(self.fd, s)
258
259class _Stream:
260 """Class that serves as an adapter between TarFile and
261 a stream-like object. The stream-like object only
262 needs to have a read() or write() method and is accessed
263 blockwise. Use of gzip or bzip2 compression is possible.
264 A stream-like object could be for example: sys.stdin,
265 sys.stdout, a socket, a tape device etc.
266
267 _Stream is intended to be used only internally.
268 """
269
270 def __init__(self, name, mode, type, fileobj, bufsize):
271 """Construct a _Stream object.
272 """
273 self._extfileobj = True
274 if fileobj is None:
275 fileobj = _LowLevelFile(name, mode)
276 self._extfileobj = False
277
278 self.name = name or ""
279 self.mode = mode
280 self.type = type
281 self.fileobj = fileobj
282 self.bufsize = bufsize
283 self.buf = ""
284 self.pos = 0L
285 self.closed = False
286
287 if type == "gz":
288 try:
289 import zlib
290 except ImportError:
291 raise CompressionError, "zlib module is not available"
292 self.zlib = zlib
293 self.crc = zlib.crc32("")
294 if mode == "r":
295 self._init_read_gz()
296 else:
297 self._init_write_gz()
298
299 if type == "bz2":
300 try:
301 import bz2
302 except ImportError:
303 raise CompressionError, "bz2 module is not available"
304 if mode == "r":
305 self.dbuf = ""
306 self.cmp = bz2.BZ2Decompressor()
307 else:
308 self.cmp = bz2.BZ2Compressor()
309
310 def __del__(self):
311 if not self.closed:
312 self.close()
313
314 def _init_write_gz(self):
315 """Initialize for writing with gzip compression.
316 """
317 self.cmp = self.zlib.compressobj(9, self.zlib.DEFLATED,
318 -self.zlib.MAX_WBITS,
319 self.zlib.DEF_MEM_LEVEL,
320 0)
321 timestamp = struct.pack("<L", long(time.time()))
322 self.__write("\037\213\010\010%s\002\377" % timestamp)
323 if self.name.endswith(".gz"):
324 self.name = self.name[:-3]
325 self.__write(self.name + NUL)
326
327 def write(self, s):
328 """Write string s to the stream.
329 """
330 if self.type == "gz":
331 self.crc = self.zlib.crc32(s, self.crc)
332 self.pos += len(s)
333 if self.type != "tar":
334 s = self.cmp.compress(s)
335 self.__write(s)
336
337 def __write(self, s):
338 """Write string s to the stream if a whole new block
339 is ready to be written.
340 """
341 self.buf += s
342 while len(self.buf) > self.bufsize:
343 self.fileobj.write(self.buf[:self.bufsize])
344 self.buf = self.buf[self.bufsize:]
345
346 def close(self):
347 """Close the _Stream object. No operation should be
348 done on it afterwards.
349 """
350 if self.closed:
351 return
352
353 if self.mode == "w" and self.buf:
354 if self.type != "tar":
355 self.buf += self.cmp.flush()
356 self.fileobj.write(self.buf)
357 self.buf = ""
358 if self.type == "gz":
359 self.fileobj.write(struct.pack("<l", self.crc))
Andrew M. Kuchling10a44492003-10-24 17:38:34 +0000360 self.fileobj.write(struct.pack("<L", self.pos & 0xffffFFFFL))
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000361
362 if not self._extfileobj:
363 self.fileobj.close()
364
365 self.closed = True
366
367 def _init_read_gz(self):
368 """Initialize for reading a gzip compressed fileobj.
369 """
370 self.cmp = self.zlib.decompressobj(-self.zlib.MAX_WBITS)
371 self.dbuf = ""
372
373 # taken from gzip.GzipFile with some alterations
374 if self.__read(2) != "\037\213":
375 raise ReadError, "not a gzip file"
376 if self.__read(1) != "\010":
377 raise CompressionError, "unsupported compression method"
378
379 flag = ord(self.__read(1))
380 self.__read(6)
381
382 if flag & 4:
383 xlen = ord(self.__read(1)) + 256 * ord(self.__read(1))
384 self.read(xlen)
385 if flag & 8:
386 while True:
387 s = self.__read(1)
388 if not s or s == NUL:
389 break
390 if flag & 16:
391 while True:
392 s = self.__read(1)
393 if not s or s == NUL:
394 break
395 if flag & 2:
396 self.__read(2)
397
398 def tell(self):
399 """Return the stream's file pointer position.
400 """
401 return self.pos
402
403 def seek(self, pos=0):
404 """Set the stream's file pointer to pos. Negative seeking
405 is forbidden.
406 """
407 if pos - self.pos >= 0:
408 blocks, remainder = divmod(pos - self.pos, self.bufsize)
409 for i in xrange(blocks):
410 self.read(self.bufsize)
411 self.read(remainder)
412 else:
413 raise StreamError, "seeking backwards is not allowed"
414 return self.pos
415
416 def read(self, size=None):
417 """Return the next size number of bytes from the stream.
418 If size is not defined, return all bytes of the stream
419 up to EOF.
420 """
421 if size is None:
422 t = []
423 while True:
424 buf = self._read(self.bufsize)
425 if not buf:
426 break
427 t.append(buf)
428 buf = "".join(t)
429 else:
430 buf = self._read(size)
431 self.pos += len(buf)
432 return buf
433
434 def _read(self, size):
435 """Return size bytes from the stream.
436 """
437 if self.type == "tar":
438 return self.__read(size)
439
440 c = len(self.dbuf)
441 t = [self.dbuf]
442 while c < size:
443 buf = self.__read(self.bufsize)
444 if not buf:
445 break
446 buf = self.cmp.decompress(buf)
447 t.append(buf)
448 c += len(buf)
449 t = "".join(t)
450 self.dbuf = t[size:]
451 return t[:size]
452
453 def __read(self, size):
454 """Return size bytes from stream. If internal buffer is empty,
455 read another block from the stream.
456 """
457 c = len(self.buf)
458 t = [self.buf]
459 while c < size:
460 buf = self.fileobj.read(self.bufsize)
461 if not buf:
462 break
463 t.append(buf)
464 c += len(buf)
465 t = "".join(t)
466 self.buf = t[size:]
467 return t[:size]
468# class _Stream
469
470#------------------------
471# Extraction file object
472#------------------------
473class ExFileObject(object):
474 """File-like object for reading an archive member.
475 Is returned by TarFile.extractfile(). Support for
476 sparse files included.
477 """
478
479 def __init__(self, tarfile, tarinfo):
480 self.fileobj = tarfile.fileobj
481 self.name = tarinfo.name
482 self.mode = "r"
483 self.closed = False
484 self.offset = tarinfo.offset_data
485 self.size = tarinfo.size
486 self.pos = 0L
487 self.linebuffer = ""
488 if tarinfo.issparse():
489 self.sparse = tarinfo.sparse
490 self.read = self._readsparse
491 else:
492 self.read = self._readnormal
493
494 def __read(self, size):
495 """Overloadable read method.
496 """
497 return self.fileobj.read(size)
498
499 def readline(self, size=-1):
500 """Read a line with approx. size. If size is negative,
501 read a whole line. readline() and read() must not
502 be mixed up (!).
503 """
504 if size < 0:
505 size = sys.maxint
506
507 nl = self.linebuffer.find("\n")
508 if nl >= 0:
509 nl = min(nl, size)
510 else:
511 size -= len(self.linebuffer)
512 while nl < 0:
513 buf = self.read(min(size, 100))
514 if not buf:
515 break
516 self.linebuffer += buf
517 size -= len(buf)
518 if size <= 0:
519 break
520 nl = self.linebuffer.find("\n")
521 if nl == -1:
522 s = self.linebuffer
523 self.linebuffer = ""
524 return s
525 buf = self.linebuffer[:nl]
526 self.linebuffer = self.linebuffer[nl + 1:]
527 while buf[-1:] == "\r":
528 buf = buf[:-1]
529 return buf + "\n"
530
531 def readlines(self):
532 """Return a list with all (following) lines.
533 """
534 result = []
535 while True:
536 line = self.readline()
537 if not line: break
538 result.append(line)
539 return result
540
541 def _readnormal(self, size=None):
542 """Read operation for regular files.
543 """
544 if self.closed:
545 raise ValueError, "file is closed"
546 self.fileobj.seek(self.offset + self.pos)
547 bytesleft = self.size - self.pos
548 if size is None:
549 bytestoread = bytesleft
550 else:
551 bytestoread = min(size, bytesleft)
552 self.pos += bytestoread
553 return self.__read(bytestoread)
554
555 def _readsparse(self, size=None):
556 """Read operation for sparse files.
557 """
558 if self.closed:
559 raise ValueError, "file is closed"
560
561 if size is None:
562 size = self.size - self.pos
563
564 data = []
565 while size > 0:
566 buf = self._readsparsesection(size)
567 if not buf:
568 break
569 size -= len(buf)
570 data.append(buf)
571 return "".join(data)
572
573 def _readsparsesection(self, size):
574 """Read a single section of a sparse file.
575 """
576 section = self.sparse.find(self.pos)
577
578 if section is None:
579 return ""
580
581 toread = min(size, section.offset + section.size - self.pos)
582 if isinstance(section, _data):
583 realpos = section.realpos + self.pos - section.offset
584 self.pos += toread
585 self.fileobj.seek(self.offset + realpos)
586 return self.__read(toread)
587 else:
588 self.pos += toread
589 return NUL * toread
590
591 def tell(self):
592 """Return the current file position.
593 """
594 return self.pos
595
596 def seek(self, pos, whence=0):
597 """Seek to a position in the file.
598 """
599 self.linebuffer = ""
600 if whence == 0:
601 self.pos = min(max(pos, 0), self.size)
602 if whence == 1:
603 if pos < 0:
604 self.pos = max(self.pos + pos, 0)
605 else:
606 self.pos = min(self.pos + pos, self.size)
607 if whence == 2:
608 self.pos = max(min(self.size + pos, self.size), 0)
609
610 def close(self):
611 """Close the file object.
612 """
613 self.closed = True
614#class ExFileObject
615
616#------------------
617# Exported Classes
618#------------------
619class TarInfo(object):
620 """Informational class which holds the details about an
621 archive member given by a tar header block.
622 TarInfo objects are returned by TarFile.getmember(),
623 TarFile.getmembers() and TarFile.gettarinfo() and are
624 usually created internally.
625 """
626
627 def __init__(self, name=""):
628 """Construct a TarInfo object. name is the optional name
629 of the member.
630 """
631
632 self.name = name # member name (dirnames must end with '/')
633 self.mode = 0666 # file permissions
634 self.uid = 0 # user id
635 self.gid = 0 # group id
636 self.size = 0 # file size
637 self.mtime = 0 # modification time
638 self.chksum = 0 # header checksum
639 self.type = REGTYPE # member type
640 self.linkname = "" # link name
641 self.uname = "user" # user name
642 self.gname = "group" # group name
643 self.devmajor = 0 #-
644 self.devminor = 0 #-for use with CHRTYPE and BLKTYPE
645 self.prefix = "" # prefix to filename or holding information
646 # about sparse files
647
648 self.offset = 0 # the tar header starts here
649 self.offset_data = 0 # the file's data starts here
650
651 def __repr__(self):
652 return "<%s %r at %#x>" % (self.__class__.__name__,self.name,id(self))
653
654 def frombuf(cls, buf):
655 """Construct a TarInfo object from a 512 byte string buffer.
656 """
657 tarinfo = cls()
Neal Norwitzd96d1012004-07-20 22:23:02 +0000658 tarinfo.name = nts(buf[0:100])
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000659 tarinfo.mode = int(buf[100:108], 8)
660 tarinfo.uid = int(buf[108:116],8)
661 tarinfo.gid = int(buf[116:124],8)
Neal Norwitzd96d1012004-07-20 22:23:02 +0000662
663 # There are two possible codings for the size field we
664 # have to discriminate, see comment in tobuf() below.
665 if buf[124] != chr(0200):
666 tarinfo.size = long(buf[124:136], 8)
667 else:
668 tarinfo.size = 0L
669 for i in range(11):
670 tarinfo.size <<= 8
671 tarinfo.size += ord(buf[125 + i])
672
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000673 tarinfo.mtime = long(buf[136:148], 8)
674 tarinfo.chksum = int(buf[148:156], 8)
675 tarinfo.type = buf[156:157]
676 tarinfo.linkname = nts(buf[157:257])
677 tarinfo.uname = nts(buf[265:297])
678 tarinfo.gname = nts(buf[297:329])
679 try:
680 tarinfo.devmajor = int(buf[329:337], 8)
681 tarinfo.devminor = int(buf[337:345], 8)
682 except ValueError:
683 tarinfo.devmajor = tarinfo.devmajor = 0
Jack Jansen2b4b5a52003-04-22 22:03:11 +0000684 tarinfo.prefix = buf[345:500]
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000685
686 # The prefix field is used for filenames > 100 in
687 # the POSIX standard.
Jack Jansen2b4b5a52003-04-22 22:03:11 +0000688 # name = prefix + '/' + name
689 if tarinfo.type != GNUTYPE_SPARSE:
690 tarinfo.name = normpath(os.path.join(nts(tarinfo.prefix), tarinfo.name))
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000691
692 # Directory names should have a '/' at the end.
693 if tarinfo.isdir() and tarinfo.name[-1:] != "/":
694 tarinfo.name += "/"
695 return tarinfo
696
697 frombuf = classmethod(frombuf)
698
699 def tobuf(self):
700 """Return a tar header block as a 512 byte string.
701 """
Neal Norwitzd96d1012004-07-20 22:23:02 +0000702 # Prefer the size to be encoded as 11 octal ascii digits
703 # which is the most portable. If the size exceeds this
704 # limit (>= 8 GB), encode it as an 88-bit value which is
705 # a GNU tar feature.
706 if self.size <= MAXSIZE_MEMBER:
707 size = "%011o" % self.size
708 else:
709 s = self.size
710 size = ""
711 for i in range(11):
712 size = chr(s & 0377) + size
713 s >>= 8
714 size = chr(0200) + size
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000715
716 # The following code was contributed by Detlef Lannert.
717 parts = []
718 for value, fieldsize in (
Neal Norwitzd96d1012004-07-20 22:23:02 +0000719 (self.name, 100),
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000720 ("%07o" % (self.mode & 07777), 8),
721 ("%07o" % self.uid, 8),
722 ("%07o" % self.gid, 8),
Neal Norwitzd96d1012004-07-20 22:23:02 +0000723 (size, 12),
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000724 ("%011o" % self.mtime, 12),
725 (" ", 8),
726 (self.type, 1),
727 (self.linkname, 100),
728 (MAGIC, 6),
729 (VERSION, 2),
730 (self.uname, 32),
731 (self.gname, 32),
732 ("%07o" % self.devmajor, 8),
733 ("%07o" % self.devminor, 8),
734 (self.prefix, 155)
735 ):
736 l = len(value)
Andrew M. Kuchling864bba12004-07-10 22:02:11 +0000737 parts.append(value[:fieldsize] + (fieldsize - l) * NUL)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000738
739 buf = "".join(parts)
740 chksum = calc_chksum(buf)
741 buf = buf[:148] + "%06o\0" % chksum + buf[155:]
742 buf += (BLOCKSIZE - len(buf)) * NUL
743 self.buf = buf
744 return buf
745
746 def isreg(self):
747 return self.type in REGULAR_TYPES
748 def isfile(self):
749 return self.isreg()
750 def isdir(self):
751 return self.type == DIRTYPE
752 def issym(self):
753 return self.type == SYMTYPE
754 def islnk(self):
755 return self.type == LNKTYPE
756 def ischr(self):
757 return self.type == CHRTYPE
758 def isblk(self):
759 return self.type == BLKTYPE
760 def isfifo(self):
761 return self.type == FIFOTYPE
762 def issparse(self):
763 return self.type == GNUTYPE_SPARSE
764 def isdev(self):
765 return self.type in (CHRTYPE, BLKTYPE, FIFOTYPE)
766# class TarInfo
767
768class TarFile(object):
769 """The TarFile Class provides an interface to tar archives.
770 """
771
772 debug = 0 # May be set from 0 (no msgs) to 3 (all msgs)
773
774 dereference = False # If true, add content of linked file to the
775 # tar file, else the link.
776
777 ignore_zeros = False # If true, skips empty or invalid blocks and
778 # continues processing.
779
780 errorlevel = 0 # If 0, fatal errors only appear in debug
781 # messages (if debug >= 0). If > 0, errors
782 # are passed to the caller as exceptions.
783
784 posix = True # If True, generates POSIX.1-1990-compliant
785 # archives (no GNU extensions!)
786
787 fileobject = ExFileObject
788
789 def __init__(self, name=None, mode="r", fileobj=None):
790 """Open an (uncompressed) tar archive `name'. `mode' is either 'r' to
791 read from an existing archive, 'a' to append data to an existing
792 file or 'w' to create a new file overwriting an existing one. `mode'
793 defaults to 'r'.
794 If `fileobj' is given, it is used for reading or writing data. If it
795 can be determined, `mode' is overridden by `fileobj's mode.
796 `fileobj' is not closed, when TarFile is closed.
797 """
798 self.name = name
799
800 if len(mode) > 1 or mode not in "raw":
801 raise ValueError, "mode must be 'r', 'a' or 'w'"
802 self._mode = mode
803 self.mode = {"r": "rb", "a": "r+b", "w": "wb"}[mode]
804
805 if not fileobj:
806 fileobj = file(self.name, self.mode)
807 self._extfileobj = False
808 else:
809 if self.name is None and hasattr(fileobj, "name"):
810 self.name = fileobj.name
811 if hasattr(fileobj, "mode"):
812 self.mode = fileobj.mode
813 self._extfileobj = True
814 self.fileobj = fileobj
815
816 # Init datastructures
817 self.closed = False
818 self.members = [] # list of members as TarInfo objects
819 self.membernames = [] # names of members
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000820 self._loaded = False # flag if all members have been read
821 self.offset = 0L # current position in the archive file
822 self.inodes = {} # dictionary caching the inodes of
823 # archive members already added
824
825 if self._mode == "r":
826 self.firstmember = None
827 self.firstmember = self.next()
828
829 if self._mode == "a":
830 # Move to the end of the archive,
831 # before the first empty block.
832 self.firstmember = None
833 while True:
834 try:
835 tarinfo = self.next()
836 except ReadError:
837 self.fileobj.seek(0)
838 break
839 if tarinfo is None:
840 self.fileobj.seek(- BLOCKSIZE, 1)
841 break
842
843 if self._mode in "aw":
844 self._loaded = True
845
846 #--------------------------------------------------------------------------
847 # Below are the classmethods which act as alternate constructors to the
848 # TarFile class. The open() method is the only one that is needed for
849 # public use; it is the "super"-constructor and is able to select an
850 # adequate "sub"-constructor for a particular compression using the mapping
851 # from OPEN_METH.
852 #
853 # This concept allows one to subclass TarFile without losing the comfort of
854 # the super-constructor. A sub-constructor is registered and made available
855 # by adding it to the mapping in OPEN_METH.
856
857 def open(cls, name=None, mode="r", fileobj=None, bufsize=20*512):
858 """Open a tar archive for reading, writing or appending. Return
859 an appropriate TarFile class.
860
861 mode:
862 'r' open for reading with transparent compression
863 'r:' open for reading exclusively uncompressed
864 'r:gz' open for reading with gzip compression
865 'r:bz2' open for reading with bzip2 compression
866 'a' or 'a:' open for appending
867 'w' or 'w:' open for writing without compression
868 'w:gz' open for writing with gzip compression
869 'w:bz2' open for writing with bzip2 compression
870 'r|' open an uncompressed stream of tar blocks for reading
871 'r|gz' open a gzip compressed stream of tar blocks
872 'r|bz2' open a bzip2 compressed stream of tar blocks
873 'w|' open an uncompressed stream for writing
874 'w|gz' open a gzip compressed stream for writing
875 'w|bz2' open a bzip2 compressed stream for writing
876 """
877
878 if not name and not fileobj:
879 raise ValueError, "nothing to open"
880
881 if ":" in mode:
882 filemode, comptype = mode.split(":", 1)
883 filemode = filemode or "r"
884 comptype = comptype or "tar"
885
886 # Select the *open() function according to
887 # given compression.
888 if comptype in cls.OPEN_METH:
889 func = getattr(cls, cls.OPEN_METH[comptype])
890 else:
891 raise CompressionError, "unknown compression type %r" % comptype
892 return func(name, filemode, fileobj)
893
894 elif "|" in mode:
895 filemode, comptype = mode.split("|", 1)
896 filemode = filemode or "r"
897 comptype = comptype or "tar"
898
899 if filemode not in "rw":
900 raise ValueError, "mode must be 'r' or 'w'"
901
902 t = cls(name, filemode,
903 _Stream(name, filemode, comptype, fileobj, bufsize))
904 t._extfileobj = False
905 return t
906
907 elif mode == "r":
908 # Find out which *open() is appropriate for opening the file.
909 for comptype in cls.OPEN_METH:
910 func = getattr(cls, cls.OPEN_METH[comptype])
911 try:
912 return func(name, "r", fileobj)
913 except (ReadError, CompressionError):
914 continue
915 raise ReadError, "file could not be opened successfully"
916
917 elif mode in "aw":
918 return cls.taropen(name, mode, fileobj)
919
920 raise ValueError, "undiscernible mode"
921
922 open = classmethod(open)
923
924 def taropen(cls, name, mode="r", fileobj=None):
925 """Open uncompressed tar archive name for reading or writing.
926 """
927 if len(mode) > 1 or mode not in "raw":
928 raise ValueError, "mode must be 'r', 'a' or 'w'"
929 return cls(name, mode, fileobj)
930
931 taropen = classmethod(taropen)
932
933 def gzopen(cls, name, mode="r", fileobj=None, compresslevel=9):
934 """Open gzip compressed tar archive name for reading or writing.
935 Appending is not allowed.
936 """
937 if len(mode) > 1 or mode not in "rw":
938 raise ValueError, "mode must be 'r' or 'w'"
939
940 try:
941 import gzip
Neal Norwitz4ec68242003-04-11 03:05:56 +0000942 gzip.GzipFile
943 except (ImportError, AttributeError):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000944 raise CompressionError, "gzip module is not available"
945
946 pre, ext = os.path.splitext(name)
947 pre = os.path.basename(pre)
948 if ext == ".tgz":
949 ext = ".tar"
950 if ext == ".gz":
951 ext = ""
952 tarname = pre + ext
953
954 if fileobj is None:
955 fileobj = file(name, mode + "b")
956
957 if mode != "r":
958 name = tarname
959
960 try:
961 t = cls.taropen(tarname, mode,
962 gzip.GzipFile(name, mode, compresslevel, fileobj)
963 )
964 except IOError:
965 raise ReadError, "not a gzip file"
966 t._extfileobj = False
967 return t
968
969 gzopen = classmethod(gzopen)
970
971 def bz2open(cls, name, mode="r", fileobj=None, compresslevel=9):
972 """Open bzip2 compressed tar archive name for reading or writing.
973 Appending is not allowed.
974 """
975 if len(mode) > 1 or mode not in "rw":
976 raise ValueError, "mode must be 'r' or 'w'."
977
978 try:
979 import bz2
980 except ImportError:
981 raise CompressionError, "bz2 module is not available"
982
983 pre, ext = os.path.splitext(name)
984 pre = os.path.basename(pre)
985 if ext == ".tbz2":
986 ext = ".tar"
987 if ext == ".bz2":
988 ext = ""
989 tarname = pre + ext
990
991 if fileobj is not None:
992 raise ValueError, "no support for external file objects"
993
994 try:
995 t = cls.taropen(tarname, mode, bz2.BZ2File(name, mode, compresslevel=compresslevel))
996 except IOError:
997 raise ReadError, "not a bzip2 file"
998 t._extfileobj = False
999 return t
1000
1001 bz2open = classmethod(bz2open)
1002
1003 # All *open() methods are registered here.
1004 OPEN_METH = {
1005 "tar": "taropen", # uncompressed tar
1006 "gz": "gzopen", # gzip compressed tar
1007 "bz2": "bz2open" # bzip2 compressed tar
1008 }
1009
1010 #--------------------------------------------------------------------------
1011 # The public methods which TarFile provides:
1012
1013 def close(self):
1014 """Close the TarFile. In write-mode, two finishing zero blocks are
1015 appended to the archive.
1016 """
1017 if self.closed:
1018 return
1019
1020 if self._mode in "aw":
1021 self.fileobj.write(NUL * (BLOCKSIZE * 2))
1022 self.offset += (BLOCKSIZE * 2)
1023 # fill up the end with zero-blocks
1024 # (like option -b20 for tar does)
1025 blocks, remainder = divmod(self.offset, RECORDSIZE)
1026 if remainder > 0:
1027 self.fileobj.write(NUL * (RECORDSIZE - remainder))
1028
1029 if not self._extfileobj:
1030 self.fileobj.close()
1031 self.closed = True
1032
1033 def getmember(self, name):
1034 """Return a TarInfo object for member `name'. If `name' can not be
1035 found in the archive, KeyError is raised. If a member occurs more
1036 than once in the archive, its last occurence is assumed to be the
1037 most up-to-date version.
1038 """
1039 self._check()
1040 if name not in self.membernames and not self._loaded:
1041 self._load()
1042 if name not in self.membernames:
1043 raise KeyError, "filename %r not found" % name
1044 return self._getmember(name)
1045
1046 def getmembers(self):
1047 """Return the members of the archive as a list of TarInfo objects. The
1048 list has the same order as the members in the archive.
1049 """
1050 self._check()
1051 if not self._loaded: # if we want to obtain a list of
1052 self._load() # all members, we first have to
1053 # scan the whole archive.
1054 return self.members
1055
1056 def getnames(self):
1057 """Return the members of the archive as a list of their names. It has
1058 the same order as the list returned by getmembers().
1059 """
1060 self._check()
1061 if not self._loaded:
1062 self._load()
1063 return self.membernames
1064
1065 def gettarinfo(self, name=None, arcname=None, fileobj=None):
1066 """Create a TarInfo object for either the file `name' or the file
1067 object `fileobj' (using os.fstat on its file descriptor). You can
1068 modify some of the TarInfo's attributes before you add it using
1069 addfile(). If given, `arcname' specifies an alternative name for the
1070 file in the archive.
1071 """
1072 self._check("aw")
1073
1074 # When fileobj is given, replace name by
1075 # fileobj's real name.
1076 if fileobj is not None:
1077 name = fileobj.name
1078
1079 # Building the name of the member in the archive.
1080 # Backward slashes are converted to forward slashes,
1081 # Absolute paths are turned to relative paths.
1082 if arcname is None:
1083 arcname = name
1084 arcname = normpath(arcname)
1085 drv, arcname = os.path.splitdrive(arcname)
1086 while arcname[0:1] == "/":
1087 arcname = arcname[1:]
1088
1089 # Now, fill the TarInfo object with
1090 # information specific for the file.
1091 tarinfo = TarInfo()
1092
1093 # Use os.stat or os.lstat, depending on platform
1094 # and if symlinks shall be resolved.
1095 if fileobj is None:
1096 if hasattr(os, "lstat") and not self.dereference:
1097 statres = os.lstat(name)
1098 else:
1099 statres = os.stat(name)
1100 else:
1101 statres = os.fstat(fileobj.fileno())
1102 linkname = ""
1103
1104 stmd = statres.st_mode
1105 if stat.S_ISREG(stmd):
1106 inode = (statres.st_ino, statres.st_dev)
1107 if inode in self.inodes and not self.dereference:
1108 # Is it a hardlink to an already
1109 # archived file?
1110 type = LNKTYPE
1111 linkname = self.inodes[inode]
1112 else:
1113 # The inode is added only if its valid.
1114 # For win32 it is always 0.
1115 type = REGTYPE
1116 if inode[0]:
1117 self.inodes[inode] = arcname
1118 elif stat.S_ISDIR(stmd):
1119 type = DIRTYPE
1120 if arcname[-1:] != "/":
1121 arcname += "/"
1122 elif stat.S_ISFIFO(stmd):
1123 type = FIFOTYPE
1124 elif stat.S_ISLNK(stmd):
1125 type = SYMTYPE
1126 linkname = os.readlink(name)
1127 elif stat.S_ISCHR(stmd):
1128 type = CHRTYPE
1129 elif stat.S_ISBLK(stmd):
1130 type = BLKTYPE
1131 else:
1132 return None
1133
1134 # Fill the TarInfo object with all
1135 # information we can get.
1136 tarinfo.name = arcname
1137 tarinfo.mode = stmd
1138 tarinfo.uid = statres.st_uid
1139 tarinfo.gid = statres.st_gid
1140 tarinfo.size = statres.st_size
1141 tarinfo.mtime = statres.st_mtime
1142 tarinfo.type = type
1143 tarinfo.linkname = linkname
1144 if pwd:
1145 try:
1146 tarinfo.uname = pwd.getpwuid(tarinfo.uid)[0]
1147 except KeyError:
1148 pass
1149 if grp:
1150 try:
1151 tarinfo.gname = grp.getgrgid(tarinfo.gid)[0]
1152 except KeyError:
1153 pass
1154
1155 if type in (CHRTYPE, BLKTYPE):
1156 if hasattr(os, "major") and hasattr(os, "minor"):
1157 tarinfo.devmajor = os.major(statres.st_rdev)
1158 tarinfo.devminor = os.minor(statres.st_rdev)
1159 return tarinfo
1160
1161 def list(self, verbose=True):
1162 """Print a table of contents to sys.stdout. If `verbose' is False, only
1163 the names of the members are printed. If it is True, an `ls -l'-like
1164 output is produced.
1165 """
1166 self._check()
1167
1168 for tarinfo in self:
1169 if verbose:
1170 print filemode(tarinfo.mode),
1171 print "%s/%s" % (tarinfo.uname or tarinfo.uid,
1172 tarinfo.gname or tarinfo.gid),
1173 if tarinfo.ischr() or tarinfo.isblk():
1174 print "%10s" % ("%d,%d" \
1175 % (tarinfo.devmajor, tarinfo.devminor)),
1176 else:
1177 print "%10d" % tarinfo.size,
1178 print "%d-%02d-%02d %02d:%02d:%02d" \
1179 % time.localtime(tarinfo.mtime)[:6],
1180
1181 print tarinfo.name,
1182
1183 if verbose:
1184 if tarinfo.issym():
1185 print "->", tarinfo.linkname,
1186 if tarinfo.islnk():
1187 print "link to", tarinfo.linkname,
1188 print
1189
1190 def add(self, name, arcname=None, recursive=True):
1191 """Add the file `name' to the archive. `name' may be any type of file
1192 (directory, fifo, symbolic link, etc.). If given, `arcname'
1193 specifies an alternative name for the file in the archive.
1194 Directories are added recursively by default. This can be avoided by
1195 setting `recursive' to False.
1196 """
1197 self._check("aw")
1198
1199 if arcname is None:
1200 arcname = name
1201
1202 # Skip if somebody tries to archive the archive...
1203 if self.name is not None \
1204 and os.path.abspath(name) == os.path.abspath(self.name):
1205 self._dbg(2, "tarfile: Skipped %r" % name)
1206 return
1207
1208 # Special case: The user wants to add the current
1209 # working directory.
1210 if name == ".":
1211 if recursive:
1212 if arcname == ".":
1213 arcname = ""
1214 for f in os.listdir("."):
1215 self.add(f, os.path.join(arcname, f))
1216 return
1217
1218 self._dbg(1, name)
1219
1220 # Create a TarInfo object from the file.
1221 tarinfo = self.gettarinfo(name, arcname)
1222
1223 if tarinfo is None:
1224 self._dbg(1, "tarfile: Unsupported type %r" % name)
1225 return
1226
1227 # Append the tar header and data to the archive.
1228 if tarinfo.isreg():
1229 f = file(name, "rb")
1230 self.addfile(tarinfo, f)
1231 f.close()
1232
1233 if tarinfo.type in (LNKTYPE, SYMTYPE, FIFOTYPE, CHRTYPE, BLKTYPE):
1234 tarinfo.size = 0L
1235 self.addfile(tarinfo)
1236
1237 if tarinfo.isdir():
1238 self.addfile(tarinfo)
1239 if recursive:
1240 for f in os.listdir(name):
1241 self.add(os.path.join(name, f), os.path.join(arcname, f))
1242
1243 def addfile(self, tarinfo, fileobj=None):
1244 """Add the TarInfo object `tarinfo' to the archive. If `fileobj' is
1245 given, tarinfo.size bytes are read from it and added to the archive.
1246 You can create TarInfo objects using gettarinfo().
1247 On Windows platforms, `fileobj' should always be opened with mode
1248 'rb' to avoid irritation about the file size.
1249 """
1250 self._check("aw")
1251
1252 tarinfo.name = normpath(tarinfo.name)
1253 if tarinfo.isdir():
1254 # directories should end with '/'
1255 tarinfo.name += "/"
1256
1257 if tarinfo.linkname:
1258 tarinfo.linkname = normpath(tarinfo.linkname)
1259
1260 if tarinfo.size > MAXSIZE_MEMBER:
Neal Norwitzd96d1012004-07-20 22:23:02 +00001261 if self.posix:
1262 raise ValueError, "file is too large (>= 8 GB)"
1263 else:
1264 self._dbg(2, "tarfile: Created GNU tar largefile header")
1265
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001266
1267 if len(tarinfo.linkname) > LENGTH_LINK:
1268 if self.posix:
1269 raise ValueError, "linkname is too long (>%d)" \
1270 % (LENGTH_LINK)
1271 else:
1272 self._create_gnulong(tarinfo.linkname, GNUTYPE_LONGLINK)
1273 tarinfo.linkname = tarinfo.linkname[:LENGTH_LINK -1]
1274 self._dbg(2, "tarfile: Created GNU tar extension LONGLINK")
1275
1276 if len(tarinfo.name) > LENGTH_NAME:
1277 if self.posix:
1278 prefix = tarinfo.name[:LENGTH_PREFIX + 1]
1279 while prefix and prefix[-1] != "/":
Tim Peters2c60f7a2003-01-29 03:49:43 +00001280 prefix = prefix[:-1]
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001281
1282 name = tarinfo.name[len(prefix):]
1283 prefix = prefix[:-1]
1284
1285 if not prefix or len(name) > LENGTH_NAME:
1286 raise ValueError, "name is too long (>%d)" \
1287 % (LENGTH_NAME)
1288
1289 tarinfo.name = name
1290 tarinfo.prefix = prefix
1291 else:
1292 self._create_gnulong(tarinfo.name, GNUTYPE_LONGNAME)
1293 tarinfo.name = tarinfo.name[:LENGTH_NAME - 1]
1294 self._dbg(2, "tarfile: Created GNU tar extension LONGNAME")
1295
1296 self.fileobj.write(tarinfo.tobuf())
1297 self.offset += BLOCKSIZE
1298
1299 # If there's data to follow, append it.
1300 if fileobj is not None:
1301 copyfileobj(fileobj, self.fileobj, tarinfo.size)
1302 blocks, remainder = divmod(tarinfo.size, BLOCKSIZE)
1303 if remainder > 0:
1304 self.fileobj.write(NUL * (BLOCKSIZE - remainder))
1305 blocks += 1
1306 self.offset += blocks * BLOCKSIZE
1307
Andrew M. Kuchling864bba12004-07-10 22:02:11 +00001308 self._record_member(tarinfo)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001309
1310 def extract(self, member, path=""):
1311 """Extract a member from the archive to the current working directory,
1312 using its full name. Its file information is extracted as accurately
1313 as possible. `member' may be a filename or a TarInfo object. You can
1314 specify a different directory using `path'.
1315 """
1316 self._check("r")
1317
1318 if isinstance(member, TarInfo):
1319 tarinfo = member
1320 else:
1321 tarinfo = self.getmember(member)
1322
Neal Norwitza4f651a2004-07-20 22:07:44 +00001323 # Prepare the link target for makelink().
1324 if tarinfo.islnk():
1325 tarinfo._link_target = os.path.join(path, tarinfo.linkname)
1326
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001327 try:
1328 self._extract_member(tarinfo, os.path.join(path, tarinfo.name))
1329 except EnvironmentError, e:
1330 if self.errorlevel > 0:
1331 raise
1332 else:
1333 if e.filename is None:
1334 self._dbg(1, "tarfile: %s" % e.strerror)
1335 else:
1336 self._dbg(1, "tarfile: %s %r" % (e.strerror, e.filename))
1337 except ExtractError, e:
1338 if self.errorlevel > 1:
1339 raise
1340 else:
1341 self._dbg(1, "tarfile: %s" % e)
1342
1343 def extractfile(self, member):
1344 """Extract a member from the archive as a file object. `member' may be
1345 a filename or a TarInfo object. If `member' is a regular file, a
1346 file-like object is returned. If `member' is a link, a file-like
1347 object is constructed from the link's target. If `member' is none of
1348 the above, None is returned.
1349 The file-like object is read-only and provides the following
1350 methods: read(), readline(), readlines(), seek() and tell()
1351 """
1352 self._check("r")
1353
1354 if isinstance(member, TarInfo):
1355 tarinfo = member
1356 else:
1357 tarinfo = self.getmember(member)
1358
1359 if tarinfo.isreg():
1360 return self.fileobject(self, tarinfo)
1361
1362 elif tarinfo.type not in SUPPORTED_TYPES:
1363 # If a member's type is unknown, it is treated as a
1364 # regular file.
1365 return self.fileobject(self, tarinfo)
1366
1367 elif tarinfo.islnk() or tarinfo.issym():
1368 if isinstance(self.fileobj, _Stream):
1369 # A small but ugly workaround for the case that someone tries
1370 # to extract a (sym)link as a file-object from a non-seekable
1371 # stream of tar blocks.
1372 raise StreamError, "cannot extract (sym)link as file object"
1373 else:
1374 # A (sym)link's file object is it's target's file object.
1375 return self.extractfile(self._getmember(tarinfo.linkname,
1376 tarinfo))
1377 else:
1378 # If there's no data associated with the member (directory, chrdev,
1379 # blkdev, etc.), return None instead of a file object.
1380 return None
1381
1382 def _extract_member(self, tarinfo, targetpath):
1383 """Extract the TarInfo object tarinfo to a physical
1384 file called targetpath.
1385 """
1386 # Fetch the TarInfo object for the given name
1387 # and build the destination pathname, replacing
1388 # forward slashes to platform specific separators.
1389 if targetpath[-1:] == "/":
1390 targetpath = targetpath[:-1]
1391 targetpath = os.path.normpath(targetpath)
1392
1393 # Create all upper directories.
1394 upperdirs = os.path.dirname(targetpath)
1395 if upperdirs and not os.path.exists(upperdirs):
1396 ti = TarInfo()
1397 ti.name = upperdirs
1398 ti.type = DIRTYPE
1399 ti.mode = 0777
1400 ti.mtime = tarinfo.mtime
1401 ti.uid = tarinfo.uid
1402 ti.gid = tarinfo.gid
1403 ti.uname = tarinfo.uname
1404 ti.gname = tarinfo.gname
1405 try:
1406 self._extract_member(ti, ti.name)
1407 except:
1408 pass
1409
1410 if tarinfo.islnk() or tarinfo.issym():
1411 self._dbg(1, "%s -> %s" % (tarinfo.name, tarinfo.linkname))
1412 else:
1413 self._dbg(1, tarinfo.name)
1414
1415 if tarinfo.isreg():
1416 self.makefile(tarinfo, targetpath)
1417 elif tarinfo.isdir():
1418 self.makedir(tarinfo, targetpath)
1419 elif tarinfo.isfifo():
1420 self.makefifo(tarinfo, targetpath)
1421 elif tarinfo.ischr() or tarinfo.isblk():
1422 self.makedev(tarinfo, targetpath)
1423 elif tarinfo.islnk() or tarinfo.issym():
1424 self.makelink(tarinfo, targetpath)
1425 elif tarinfo.type not in SUPPORTED_TYPES:
1426 self.makeunknown(tarinfo, targetpath)
1427 else:
1428 self.makefile(tarinfo, targetpath)
1429
1430 self.chown(tarinfo, targetpath)
1431 if not tarinfo.issym():
1432 self.chmod(tarinfo, targetpath)
1433 self.utime(tarinfo, targetpath)
1434
1435 #--------------------------------------------------------------------------
1436 # Below are the different file methods. They are called via
1437 # _extract_member() when extract() is called. They can be replaced in a
1438 # subclass to implement other functionality.
1439
1440 def makedir(self, tarinfo, targetpath):
1441 """Make a directory called targetpath.
1442 """
1443 try:
1444 os.mkdir(targetpath)
1445 except EnvironmentError, e:
1446 if e.errno != errno.EEXIST:
1447 raise
1448
1449 def makefile(self, tarinfo, targetpath):
1450 """Make a file called targetpath.
1451 """
1452 source = self.extractfile(tarinfo)
1453 target = file(targetpath, "wb")
1454 copyfileobj(source, target)
1455 source.close()
1456 target.close()
1457
1458 def makeunknown(self, tarinfo, targetpath):
1459 """Make a file from a TarInfo object with an unknown type
1460 at targetpath.
1461 """
1462 self.makefile(tarinfo, targetpath)
1463 self._dbg(1, "tarfile: Unknown file type %r, " \
1464 "extracted as regular file." % tarinfo.type)
1465
1466 def makefifo(self, tarinfo, targetpath):
1467 """Make a fifo called targetpath.
1468 """
1469 if hasattr(os, "mkfifo"):
1470 os.mkfifo(targetpath)
1471 else:
1472 raise ExtractError, "fifo not supported by system"
1473
1474 def makedev(self, tarinfo, targetpath):
1475 """Make a character or block device called targetpath.
1476 """
1477 if not hasattr(os, "mknod") or not hasattr(os, "makedev"):
1478 raise ExtractError, "special devices not supported by system"
1479
1480 mode = tarinfo.mode
1481 if tarinfo.isblk():
1482 mode |= stat.S_IFBLK
1483 else:
1484 mode |= stat.S_IFCHR
1485
1486 os.mknod(targetpath, mode,
1487 os.makedev(tarinfo.devmajor, tarinfo.devminor))
1488
1489 def makelink(self, tarinfo, targetpath):
1490 """Make a (symbolic) link called targetpath. If it cannot be created
1491 (platform limitation), we try to make a copy of the referenced file
1492 instead of a link.
1493 """
1494 linkpath = tarinfo.linkname
1495 try:
1496 if tarinfo.issym():
1497 os.symlink(linkpath, targetpath)
1498 else:
Neal Norwitza4f651a2004-07-20 22:07:44 +00001499 # See extract().
1500 os.link(tarinfo._link_target, targetpath)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001501 except AttributeError:
1502 if tarinfo.issym():
1503 linkpath = os.path.join(os.path.dirname(tarinfo.name),
1504 linkpath)
1505 linkpath = normpath(linkpath)
1506
1507 try:
1508 self._extract_member(self.getmember(linkpath), targetpath)
1509 except (EnvironmentError, KeyError), e:
1510 linkpath = os.path.normpath(linkpath)
1511 try:
1512 shutil.copy2(linkpath, targetpath)
1513 except EnvironmentError, e:
1514 raise IOError, "link could not be created"
1515
1516 def chown(self, tarinfo, targetpath):
1517 """Set owner of targetpath according to tarinfo.
1518 """
1519 if pwd and hasattr(os, "geteuid") and os.geteuid() == 0:
1520 # We have to be root to do so.
1521 try:
1522 g = grp.getgrnam(tarinfo.gname)[2]
1523 except KeyError:
1524 try:
1525 g = grp.getgrgid(tarinfo.gid)[2]
1526 except KeyError:
1527 g = os.getgid()
1528 try:
1529 u = pwd.getpwnam(tarinfo.uname)[2]
1530 except KeyError:
1531 try:
1532 u = pwd.getpwuid(tarinfo.uid)[2]
1533 except KeyError:
1534 u = os.getuid()
1535 try:
1536 if tarinfo.issym() and hasattr(os, "lchown"):
1537 os.lchown(targetpath, u, g)
1538 else:
Andrew MacIntyre7970d202003-02-19 12:51:34 +00001539 if sys.platform != "os2emx":
1540 os.chown(targetpath, u, g)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001541 except EnvironmentError, e:
1542 raise ExtractError, "could not change owner"
1543
1544 def chmod(self, tarinfo, targetpath):
1545 """Set file permissions of targetpath according to tarinfo.
1546 """
Jack Jansen834eff62003-03-07 12:47:06 +00001547 if hasattr(os, 'chmod'):
1548 try:
1549 os.chmod(targetpath, tarinfo.mode)
1550 except EnvironmentError, e:
1551 raise ExtractError, "could not change mode"
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001552
1553 def utime(self, tarinfo, targetpath):
1554 """Set modification time of targetpath according to tarinfo.
1555 """
Jack Jansen834eff62003-03-07 12:47:06 +00001556 if not hasattr(os, 'utime'):
Tim Petersf9347782003-03-07 15:36:41 +00001557 return
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001558 if sys.platform == "win32" and tarinfo.isdir():
1559 # According to msdn.microsoft.com, it is an error (EACCES)
1560 # to use utime() on directories.
1561 return
1562 try:
1563 os.utime(targetpath, (tarinfo.mtime, tarinfo.mtime))
1564 except EnvironmentError, e:
1565 raise ExtractError, "could not change modification time"
1566
1567 #--------------------------------------------------------------------------
1568
1569 def next(self):
1570 """Return the next member of the archive as a TarInfo object, when
1571 TarFile is opened for reading. Return None if there is no more
1572 available.
1573 """
1574 self._check("ra")
1575 if self.firstmember is not None:
1576 m = self.firstmember
1577 self.firstmember = None
1578 return m
1579
1580 # Read the next block.
Andrew M. Kuchling864bba12004-07-10 22:02:11 +00001581 self.fileobj.seek(self.offset)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001582 while True:
1583 buf = self.fileobj.read(BLOCKSIZE)
1584 if not buf:
1585 return None
1586 try:
1587 tarinfo = TarInfo.frombuf(buf)
1588 except ValueError:
1589 if self.ignore_zeros:
1590 if buf.count(NUL) == BLOCKSIZE:
1591 adj = "empty"
1592 else:
1593 adj = "invalid"
1594 self._dbg(2, "0x%X: %s block" % (self.offset, adj))
1595 self.offset += BLOCKSIZE
1596 continue
1597 else:
1598 # Block is empty or unreadable.
Andrew M. Kuchling864bba12004-07-10 22:02:11 +00001599 if self.offset == 0:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001600 # If the first block is invalid. That does not
1601 # look like a tar archive we can handle.
1602 raise ReadError,"empty, unreadable or compressed file"
1603 return None
1604 break
1605
1606 # We shouldn't rely on this checksum, because some tar programs
1607 # calculate it differently and it is merely validating the
1608 # header block. We could just as well skip this part, which would
1609 # have a slight effect on performance...
1610 if tarinfo.chksum != calc_chksum(buf):
1611 self._dbg(1, "tarfile: Bad Checksum %r" % tarinfo.name)
1612
1613 # Set the TarInfo object's offset to the current position of the
1614 # TarFile and set self.offset to the position where the data blocks
1615 # should begin.
1616 tarinfo.offset = self.offset
1617 self.offset += BLOCKSIZE
1618
1619 # Check if the TarInfo object has a typeflag for which a callback
1620 # method is registered in the TYPE_METH. If so, then call it.
1621 if tarinfo.type in self.TYPE_METH:
Andrew M. Kuchling864bba12004-07-10 22:02:11 +00001622 return self.TYPE_METH[tarinfo.type](self, tarinfo)
1623
1624 tarinfo.offset_data = self.offset
1625 if tarinfo.isreg() or tarinfo.type not in SUPPORTED_TYPES:
1626 # Skip the following data blocks.
1627 self.offset += self._block(tarinfo.size)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001628
1629 if tarinfo.isreg() and tarinfo.name[:-1] == "/":
1630 # some old tar programs don't know DIRTYPE
1631 tarinfo.type = DIRTYPE
1632
Andrew M. Kuchling864bba12004-07-10 22:02:11 +00001633 self._record_member(tarinfo)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001634 return tarinfo
1635
1636 #--------------------------------------------------------------------------
1637 # Below are some methods which are called for special typeflags in the
1638 # next() method, e.g. for unwrapping GNU longname/longlink blocks. They
1639 # are registered in TYPE_METH below. You can register your own methods
1640 # with this mapping.
1641 # A registered method is called with a TarInfo object as only argument.
1642 #
1643 # During its execution the method MUST perform the following tasks:
1644 # 1. set tarinfo.offset_data to the position where the data blocks begin,
1645 # if there is data to follow.
1646 # 2. set self.offset to the position where the next member's header will
1647 # begin.
Andrew M. Kuchling864bba12004-07-10 22:02:11 +00001648 # 3. call self._record_member() if the tarinfo object is supposed to
1649 # appear as a member of the TarFile object.
1650 # 4. return tarinfo or another valid TarInfo object.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001651
1652 def proc_gnulong(self, tarinfo):
1653 """Evaluate the blocks that hold a GNU longname
1654 or longlink member.
1655 """
1656 buf = ""
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001657 count = tarinfo.size
1658 while count > 0:
1659 block = self.fileobj.read(BLOCKSIZE)
1660 buf += block
1661 self.offset += BLOCKSIZE
1662 count -= BLOCKSIZE
1663
Andrew M. Kuchling864bba12004-07-10 22:02:11 +00001664 # Fetch the next header
1665 next = self.next()
1666
1667 next.offset = tarinfo.offset
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001668 if tarinfo.type == GNUTYPE_LONGNAME:
Andrew M. Kuchling864bba12004-07-10 22:02:11 +00001669 next.name = nts(buf)
1670 elif tarinfo.type == GNUTYPE_LONGLINK:
1671 next.linkname = nts(buf)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001672
Andrew M. Kuchling864bba12004-07-10 22:02:11 +00001673 return next
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001674
1675 def proc_sparse(self, tarinfo):
1676 """Analyze a GNU sparse header plus extra headers.
1677 """
1678 buf = tarinfo.tobuf()
1679 sp = _ringbuffer()
1680 pos = 386
1681 lastpos = 0L
1682 realpos = 0L
1683 # There are 4 possible sparse structs in the
1684 # first header.
1685 for i in xrange(4):
1686 try:
1687 offset = int(buf[pos:pos + 12], 8)
1688 numbytes = int(buf[pos + 12:pos + 24], 8)
1689 except ValueError:
1690 break
1691 if offset > lastpos:
1692 sp.append(_hole(lastpos, offset - lastpos))
1693 sp.append(_data(offset, numbytes, realpos))
1694 realpos += numbytes
1695 lastpos = offset + numbytes
1696 pos += 24
1697
1698 isextended = ord(buf[482])
1699 origsize = int(buf[483:495], 8)
1700
1701 # If the isextended flag is given,
1702 # there are extra headers to process.
1703 while isextended == 1:
1704 buf = self.fileobj.read(BLOCKSIZE)
1705 self.offset += BLOCKSIZE
1706 pos = 0
1707 for i in xrange(21):
1708 try:
1709 offset = int(buf[pos:pos + 12], 8)
1710 numbytes = int(buf[pos + 12:pos + 24], 8)
1711 except ValueError:
1712 break
1713 if offset > lastpos:
1714 sp.append(_hole(lastpos, offset - lastpos))
1715 sp.append(_data(offset, numbytes, realpos))
1716 realpos += numbytes
1717 lastpos = offset + numbytes
1718 pos += 24
1719 isextended = ord(buf[504])
1720
1721 if lastpos < origsize:
1722 sp.append(_hole(lastpos, origsize - lastpos))
1723
1724 tarinfo.sparse = sp
1725
1726 tarinfo.offset_data = self.offset
1727 self.offset += self._block(tarinfo.size)
1728 tarinfo.size = origsize
Andrew M. Kuchling864bba12004-07-10 22:02:11 +00001729
1730 self._record_member(tarinfo)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001731 return tarinfo
1732
1733 # The type mapping for the next() method. The keys are single character
1734 # strings, the typeflag. The values are methods which are called when
1735 # next() encounters such a typeflag.
1736 TYPE_METH = {
1737 GNUTYPE_LONGNAME: proc_gnulong,
1738 GNUTYPE_LONGLINK: proc_gnulong,
1739 GNUTYPE_SPARSE: proc_sparse
1740 }
1741
1742 #--------------------------------------------------------------------------
1743 # Little helper methods:
1744
1745 def _block(self, count):
1746 """Round up a byte count by BLOCKSIZE and return it,
1747 e.g. _block(834) => 1024.
1748 """
1749 blocks, remainder = divmod(count, BLOCKSIZE)
1750 if remainder:
1751 blocks += 1
1752 return blocks * BLOCKSIZE
1753
1754 def _getmember(self, name, tarinfo=None):
1755 """Find an archive member by name from bottom to top.
1756 If tarinfo is given, it is used as the starting point.
1757 """
1758 if tarinfo is None:
1759 end = len(self.members)
1760 else:
1761 end = self.members.index(tarinfo)
1762
1763 for i in xrange(end - 1, -1, -1):
1764 if name == self.membernames[i]:
1765 return self.members[i]
1766
Andrew M. Kuchling864bba12004-07-10 22:02:11 +00001767 def _record_member(self, tarinfo):
1768 """Record a tarinfo object in the internal datastructures.
1769 """
1770 self.members.append(tarinfo)
1771 self.membernames.append(tarinfo.name)
1772
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001773 def _load(self):
1774 """Read through the entire archive file and look for readable
1775 members.
1776 """
1777 while True:
1778 tarinfo = self.next()
1779 if tarinfo is None:
1780 break
1781 self._loaded = True
1782
1783 def _check(self, mode=None):
1784 """Check if TarFile is still open, and if the operation's mode
1785 corresponds to TarFile's mode.
1786 """
1787 if self.closed:
1788 raise IOError, "%s is closed" % self.__class__.__name__
1789 if mode is not None and self._mode not in mode:
1790 raise IOError, "bad operation for mode %r" % self._mode
1791
1792 def __iter__(self):
1793 """Provide an iterator object.
1794 """
1795 if self._loaded:
1796 return iter(self.members)
1797 else:
1798 return TarIter(self)
1799
1800 def _create_gnulong(self, name, type):
1801 """Write a GNU longname/longlink member to the TarFile.
1802 It consists of an extended tar header, with the length
1803 of the longname as size, followed by data blocks,
1804 which contain the longname as a null terminated string.
1805 """
Neal Norwitz0662f8a2004-07-20 21:54:18 +00001806 name += NUL
1807
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001808 tarinfo = TarInfo()
1809 tarinfo.name = "././@LongLink"
1810 tarinfo.type = type
1811 tarinfo.mode = 0
1812 tarinfo.size = len(name)
1813
1814 # write extended header
1815 self.fileobj.write(tarinfo.tobuf())
Neal Norwitz0662f8a2004-07-20 21:54:18 +00001816 self.offset += BLOCKSIZE
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001817 # write name blocks
1818 self.fileobj.write(name)
1819 blocks, remainder = divmod(tarinfo.size, BLOCKSIZE)
1820 if remainder > 0:
1821 self.fileobj.write(NUL * (BLOCKSIZE - remainder))
1822 blocks += 1
1823 self.offset += blocks * BLOCKSIZE
1824
1825 def _dbg(self, level, msg):
1826 """Write debugging output to sys.stderr.
1827 """
1828 if level <= self.debug:
1829 print >> sys.stderr, msg
1830# class TarFile
1831
1832class TarIter:
1833 """Iterator Class.
1834
1835 for tarinfo in TarFile(...):
1836 suite...
1837 """
1838
1839 def __init__(self, tarfile):
1840 """Construct a TarIter object.
1841 """
1842 self.tarfile = tarfile
1843 def __iter__(self):
1844 """Return iterator object.
1845 """
1846 return self
1847 def next(self):
1848 """Return the next item using TarFile's next() method.
1849 When all members have been read, set TarFile as _loaded.
1850 """
1851 tarinfo = self.tarfile.next()
1852 if not tarinfo:
1853 self.tarfile._loaded = True
1854 raise StopIteration
1855 return tarinfo
1856
1857# Helper classes for sparse file support
1858class _section:
1859 """Base class for _data and _hole.
1860 """
1861 def __init__(self, offset, size):
1862 self.offset = offset
1863 self.size = size
1864 def __contains__(self, offset):
1865 return self.offset <= offset < self.offset + self.size
1866
1867class _data(_section):
1868 """Represent a data section in a sparse file.
1869 """
1870 def __init__(self, offset, size, realpos):
1871 _section.__init__(self, offset, size)
1872 self.realpos = realpos
1873
1874class _hole(_section):
1875 """Represent a hole section in a sparse file.
1876 """
1877 pass
1878
1879class _ringbuffer(list):
1880 """Ringbuffer class which increases performance
1881 over a regular list.
1882 """
1883 def __init__(self):
1884 self.idx = 0
1885 def find(self, offset):
1886 idx = self.idx
1887 while True:
1888 item = self[idx]
1889 if offset in item:
1890 break
1891 idx += 1
1892 if idx == len(self):
1893 idx = 0
1894 if idx == self.idx:
1895 # End of File
1896 return None
1897 self.idx = idx
1898 return item
1899
1900#---------------------------------------------
1901# zipfile compatible TarFile class
1902#---------------------------------------------
1903TAR_PLAIN = 0 # zipfile.ZIP_STORED
1904TAR_GZIPPED = 8 # zipfile.ZIP_DEFLATED
1905class TarFileCompat:
1906 """TarFile class compatible with standard module zipfile's
1907 ZipFile class.
1908 """
1909 def __init__(self, file, mode="r", compression=TAR_PLAIN):
1910 if compression == TAR_PLAIN:
1911 self.tarfile = TarFile.taropen(file, mode)
1912 elif compression == TAR_GZIPPED:
1913 self.tarfile = TarFile.gzopen(file, mode)
1914 else:
1915 raise ValueError, "unknown compression constant"
1916 if mode[0:1] == "r":
1917 members = self.tarfile.getmembers()
1918 for i in xrange(len(members)):
1919 m = members[i]
1920 m.filename = m.name
1921 m.file_size = m.size
1922 m.date_time = time.gmtime(m.mtime)[:6]
1923 def namelist(self):
1924 return map(lambda m: m.name, self.infolist())
1925 def infolist(self):
1926 return filter(lambda m: m.type in REGULAR_TYPES,
1927 self.tarfile.getmembers())
1928 def printdir(self):
1929 self.tarfile.list()
1930 def testzip(self):
1931 return
1932 def getinfo(self, name):
1933 return self.tarfile.getmember(name)
1934 def read(self, name):
1935 return self.tarfile.extractfile(self.tarfile.getmember(name)).read()
1936 def write(self, filename, arcname=None, compress_type=None):
1937 self.tarfile.add(filename, arcname)
1938 def writestr(self, zinfo, bytes):
1939 import StringIO
1940 import calendar
1941 zinfo.name = zinfo.filename
1942 zinfo.size = zinfo.file_size
1943 zinfo.mtime = calendar.timegm(zinfo.date_time)
1944 self.tarfile.addfile(zinfo, StringIO.StringIO(bytes))
1945 def close(self):
1946 self.tarfile.close()
1947#class TarFileCompat
1948
1949#--------------------
1950# exported functions
1951#--------------------
1952def is_tarfile(name):
1953 """Return True if name points to a tar archive that we
1954 are able to handle, else return False.
1955 """
1956 try:
1957 t = open(name)
1958 t.close()
1959 return True
1960 except TarError:
1961 return False
1962
1963open = TarFile.open