blob: 0c63b91287070e32fcdd1bfb7714b54e13be9476 [file] [log] [blame]
Fred Drake484d7352000-10-02 21:14:52 +00001"Read and write ZIP files."
Guido van Rossum32abe6f2000-03-31 17:30:02 +00002# Written by James C. Ahlstrom jim@interet.com
3# All rights transferred to CNRI pursuant to the Python contribution agreement
4
5import struct, os, time
Fred Drake484d7352000-10-02 21:14:52 +00006import binascii
Guido van Rossum32abe6f2000-03-31 17:30:02 +00007
8try:
Tim Peterse1190062001-01-15 03:34:38 +00009 import zlib # We may need its compression method
Guido van Rossum9c673f32001-04-10 15:37:12 +000010except ImportError:
Guido van Rossum32abe6f2000-03-31 17:30:02 +000011 zlib = None
12
Skip Montanaro40fc1602001-03-01 04:27:19 +000013__all__ = ["BadZipfile", "error", "ZIP_STORED", "ZIP_DEFLATED", "is_zipfile",
14 "ZipInfo", "ZipFile", "PyZipFile"]
15
Fred Drake5db246d2000-09-29 20:44:48 +000016class BadZipfile(Exception):
Guido van Rossum32abe6f2000-03-31 17:30:02 +000017 pass
Tim Peterse1190062001-01-15 03:34:38 +000018error = BadZipfile # The exception raised by this module
Guido van Rossum32abe6f2000-03-31 17:30:02 +000019
20# constants for Zip file compression methods
21ZIP_STORED = 0
22ZIP_DEFLATED = 8
23# Other ZIP compression methods not supported
24
25# Here are some struct module formats for reading headers
26structEndArchive = "<4s4H2lH" # 9 items, end of archive, 22 bytes
27stringEndArchive = "PK\005\006" # magic number for end of archive record
28structCentralDir = "<4s4B4H3l5H2l"# 19 items, central directory, 46 bytes
29stringCentralDir = "PK\001\002" # magic number for central directory
30structFileHeader = "<4s2B4H3l2H" # 12 items, file header record, 30 bytes
31stringFileHeader = "PK\003\004" # magic number for file header
32
Fred Drake3e038e52001-02-28 17:56:26 +000033# indexes of entries in the central directory structure
34_CD_SIGNATURE = 0
35_CD_CREATE_VERSION = 1
36_CD_CREATE_SYSTEM = 2
37_CD_EXTRACT_VERSION = 3
38_CD_EXTRACT_SYSTEM = 4 # is this meaningful?
39_CD_FLAG_BITS = 5
40_CD_COMPRESS_TYPE = 6
41_CD_TIME = 7
42_CD_DATE = 8
43_CD_CRC = 9
44_CD_COMPRESSED_SIZE = 10
45_CD_UNCOMPRESSED_SIZE = 11
46_CD_FILENAME_LENGTH = 12
47_CD_EXTRA_FIELD_LENGTH = 13
48_CD_COMMENT_LENGTH = 14
49_CD_DISK_NUMBER_START = 15
50_CD_INTERNAL_FILE_ATTRIBUTES = 16
51_CD_EXTERNAL_FILE_ATTRIBUTES = 17
52_CD_LOCAL_HEADER_OFFSET = 18
53
54# indexes of entries in the local file header structure
55_FH_SIGNATURE = 0
56_FH_EXTRACT_VERSION = 1
57_FH_EXTRACT_SYSTEM = 2 # is this meaningful?
58_FH_GENERAL_PURPOSE_FLAG_BITS = 3
59_FH_COMPRESSION_METHOD = 4
60_FH_LAST_MOD_TIME = 5
61_FH_LAST_MOD_DATE = 6
62_FH_CRC = 7
63_FH_COMPRESSED_SIZE = 8
64_FH_UNCOMPRESSED_SIZE = 9
65_FH_FILENAME_LENGTH = 10
66_FH_EXTRA_FIELD_LENGTH = 11
67
Fred Drake3d9091e2001-03-26 15:49:24 +000068# Used to compare file passed to ZipFile
69_STRING_TYPES = (type('s'), type(u's'))
70
Fred Drake484d7352000-10-02 21:14:52 +000071
Guido van Rossum32abe6f2000-03-31 17:30:02 +000072def is_zipfile(filename):
73 """Quickly see if file is a ZIP file by checking the magic number.
74
Fred Drake484d7352000-10-02 21:14:52 +000075 Will not accept a ZIP archive with an ending comment.
76 """
Guido van Rossum32abe6f2000-03-31 17:30:02 +000077 try:
78 fpin = open(filename, "rb")
Tim Peterse1190062001-01-15 03:34:38 +000079 fpin.seek(-22, 2) # Seek to end-of-file record
Guido van Rossum32abe6f2000-03-31 17:30:02 +000080 endrec = fpin.read()
81 fpin.close()
82 if endrec[0:4] == "PK\005\006" and endrec[-2:] == "\000\000":
Tim Peterse1190062001-01-15 03:34:38 +000083 return 1 # file has correct magic number
Fred Drake7e473802001-05-11 19:52:57 +000084 except IOError:
Guido van Rossum32abe6f2000-03-31 17:30:02 +000085 pass
86
Fred Drake484d7352000-10-02 21:14:52 +000087
Guido van Rossum32abe6f2000-03-31 17:30:02 +000088class ZipInfo:
Fred Drake484d7352000-10-02 21:14:52 +000089 """Class with attributes describing each file in the ZIP archive."""
90
Guido van Rossum32abe6f2000-03-31 17:30:02 +000091 def __init__(self, filename="NoName", date_time=(1980,1,1,0,0,0)):
Fred Drakea58947f2001-07-19 19:44:25 +000092 self.filename = _normpath(filename) # Name of the file in the archive
Tim Peterse1190062001-01-15 03:34:38 +000093 self.date_time = date_time # year, month, day, hour, min, sec
Guido van Rossum32abe6f2000-03-31 17:30:02 +000094 # Standard values:
Tim Peterse1190062001-01-15 03:34:38 +000095 self.compress_type = ZIP_STORED # Type of compression for the file
96 self.comment = "" # Comment for each file
97 self.extra = "" # ZIP extra data
98 self.create_system = 0 # System which created ZIP archive
99 self.create_version = 20 # Version which created ZIP archive
100 self.extract_version = 20 # Version needed to extract archive
101 self.reserved = 0 # Must be zero
102 self.flag_bits = 0 # ZIP flag bits
103 self.volume = 0 # Volume number of file header
104 self.internal_attr = 0 # Internal attributes
105 self.external_attr = 0 # External file attributes
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000106 # Other attributes are set by class ZipFile:
Tim Peterse1190062001-01-15 03:34:38 +0000107 # header_offset Byte offset to the file header
108 # file_offset Byte offset to the start of the file data
109 # CRC CRC-32 of the uncompressed file
110 # compress_size Size of the compressed file
111 # file_size Size of the uncompressed file
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000112
113 def FileHeader(self):
Fred Drake484d7352000-10-02 21:14:52 +0000114 """Return the per-file header as a string."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000115 dt = self.date_time
116 dosdate = (dt[0] - 1980) << 9 | dt[1] << 5 | dt[2]
117 dostime = dt[3] << 11 | dt[4] << 5 | dt[5] / 2
118 if self.flag_bits & 0x08:
Tim Peterse1190062001-01-15 03:34:38 +0000119 # Set these to zero because we write them after the file data
120 CRC = compress_size = file_size = 0
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000121 else:
Tim Peterse1190062001-01-15 03:34:38 +0000122 CRC = self.CRC
123 compress_size = self.compress_size
124 file_size = self.file_size
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000125 header = struct.pack(structFileHeader, stringFileHeader,
126 self.extract_version, self.reserved, self.flag_bits,
127 self.compress_type, dostime, dosdate, CRC,
128 compress_size, file_size,
129 len(self.filename), len(self.extra))
130 return header + self.filename + self.extra
131
132
Fred Drakea58947f2001-07-19 19:44:25 +0000133# This is used to ensure paths in generated ZIP files always use
134# forward slashes as the directory separator, as required by the
135# ZIP format specification.
136if os.sep != "/":
137 def _normpath(path):
138 return path.replace(os.sep, "/")
139else:
140 def _normpath(path):
141 return path
142
143
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000144class ZipFile:
Tim Petersa19a1682001-03-29 04:36:09 +0000145 """ Class with methods to open, read, write, close, list zip files.
146
Fred Drake3d9091e2001-03-26 15:49:24 +0000147 z = ZipFile(file, mode="r", compression=ZIP_STORED)
Tim Petersa19a1682001-03-29 04:36:09 +0000148
Fred Drake3d9091e2001-03-26 15:49:24 +0000149 file: Either the path to the file, or a file-like object.
150 If it is a path, the file will be opened and closed by ZipFile.
151 mode: The mode can be either read "r", write "w" or append "a".
152 compression: ZIP_STORED (no compression) or ZIP_DEFLATED (requires zlib).
153 """
Fred Drake484d7352000-10-02 21:14:52 +0000154
Fred Drake90eac282001-02-28 05:29:34 +0000155 fp = None # Set here since __del__ checks it
156
Fred Drake3d9091e2001-03-26 15:49:24 +0000157 def __init__(self, file, mode="r", compression=ZIP_STORED):
Fred Drake484d7352000-10-02 21:14:52 +0000158 """Open the ZIP file with mode read "r", write "w" or append "a"."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000159 if compression == ZIP_STORED:
160 pass
161 elif compression == ZIP_DEFLATED:
162 if not zlib:
163 raise RuntimeError,\
Fred Drake5db246d2000-09-29 20:44:48 +0000164 "Compression requires the (missing) zlib module"
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000165 else:
166 raise RuntimeError, "That compression method is not supported"
Tim Peterse1190062001-01-15 03:34:38 +0000167 self.debug = 0 # Level of printing: 0 through 3
168 self.NameToInfo = {} # Find file info given name
169 self.filelist = [] # List of ZipInfo instances for archive
170 self.compression = compression # Method of compression
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000171 self.mode = key = mode[0]
Tim Petersa19a1682001-03-29 04:36:09 +0000172
Fred Drake3d9091e2001-03-26 15:49:24 +0000173 # Check if we were passed a file-like object
174 if type(file) in _STRING_TYPES:
175 self._filePassed = 0
176 self.filename = file
177 modeDict = {'r' : 'rb', 'w': 'wb', 'a' : 'r+b'}
178 self.fp = open(file, modeDict[mode])
179 else:
180 self._filePassed = 1
181 self.fp = file
182 self.filename = getattr(file, 'name', None)
Tim Petersa19a1682001-03-29 04:36:09 +0000183
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000184 if key == 'r':
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000185 self._GetContents()
186 elif key == 'w':
Fred Drake3d9091e2001-03-26 15:49:24 +0000187 pass
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000188 elif key == 'a':
Fred Drake3d9091e2001-03-26 15:49:24 +0000189 fp = self.fp
Tim Peterse1190062001-01-15 03:34:38 +0000190 fp.seek(-22, 2) # Seek to end-of-file record
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000191 endrec = fp.read()
192 if endrec[0:4] == stringEndArchive and \
193 endrec[-2:] == "\000\000":
Tim Peterse1190062001-01-15 03:34:38 +0000194 self._GetContents() # file is a zip file
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000195 # seek to start of directory and overwrite
196 fp.seek(self.start_dir, 0)
Tim Peterse1190062001-01-15 03:34:38 +0000197 else: # file is not a zip file, just append
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000198 fp.seek(0, 2)
199 else:
Tim Peters7d3bad62001-04-04 18:56:49 +0000200 if not self._filePassed:
201 self.fp.close()
202 self.fp = None
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000203 raise RuntimeError, 'Mode must be "r", "w" or "a"'
204
205 def _GetContents(self):
Tim Peters7d3bad62001-04-04 18:56:49 +0000206 """Read the directory, making sure we close the file if the format
207 is bad."""
208 try:
209 self._RealGetContents()
210 except BadZipfile:
211 if not self._filePassed:
212 self.fp.close()
213 self.fp = None
214 raise
215
216 def _RealGetContents(self):
Fred Drake484d7352000-10-02 21:14:52 +0000217 """Read in the table of contents for the ZIP file."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000218 fp = self.fp
Tim Peterse1190062001-01-15 03:34:38 +0000219 fp.seek(-22, 2) # Start of end-of-archive record
220 filesize = fp.tell() + 22 # Get file size
221 endrec = fp.read(22) # Archive must not end with a comment!
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000222 if endrec[0:4] != stringEndArchive or endrec[-2:] != "\000\000":
223 raise BadZipfile, "File is not a zip file, or ends with a comment"
224 endrec = struct.unpack(structEndArchive, endrec)
225 if self.debug > 1:
226 print endrec
Tim Peterse1190062001-01-15 03:34:38 +0000227 size_cd = endrec[5] # bytes in central directory
228 offset_cd = endrec[6] # offset of central directory
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000229 x = filesize - 22 - size_cd
230 # "concat" is zero, unless zip was concatenated to another file
231 concat = x - offset_cd
232 if self.debug > 2:
233 print "given, inferred, offset", offset_cd, x, concat
234 # self.start_dir: Position of start of central directory
235 self.start_dir = offset_cd + concat
236 fp.seek(self.start_dir, 0)
237 total = 0
238 while total < size_cd:
239 centdir = fp.read(46)
240 total = total + 46
241 if centdir[0:4] != stringCentralDir:
242 raise BadZipfile, "Bad magic number for central directory"
243 centdir = struct.unpack(structCentralDir, centdir)
244 if self.debug > 2:
245 print centdir
Fred Drake3e038e52001-02-28 17:56:26 +0000246 filename = fp.read(centdir[_CD_FILENAME_LENGTH])
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000247 # Create ZipInfo instance to store file information
248 x = ZipInfo(filename)
Fred Drake3e038e52001-02-28 17:56:26 +0000249 x.extra = fp.read(centdir[_CD_EXTRA_FIELD_LENGTH])
250 x.comment = fp.read(centdir[_CD_COMMENT_LENGTH])
251 total = (total + centdir[_CD_FILENAME_LENGTH]
252 + centdir[_CD_EXTRA_FIELD_LENGTH]
253 + centdir[_CD_COMMENT_LENGTH])
254 x.header_offset = centdir[_CD_LOCAL_HEADER_OFFSET] + concat
255 # file_offset must be computed below...
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000256 (x.create_version, x.create_system, x.extract_version, x.reserved,
257 x.flag_bits, x.compress_type, t, d,
258 x.CRC, x.compress_size, x.file_size) = centdir[1:12]
259 x.volume, x.internal_attr, x.external_attr = centdir[15:18]
260 # Convert date/time code to (year, month, day, hour, min, sec)
261 x.date_time = ( (d>>9)+1980, (d>>5)&0xF, d&0x1F,
Fred Drake414ca662000-06-13 18:49:53 +0000262 t>>11, (t>>5)&0x3F, (t&0x1F) * 2 )
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000263 self.filelist.append(x)
264 self.NameToInfo[x.filename] = x
265 if self.debug > 2:
266 print "total", total
267 for data in self.filelist:
268 fp.seek(data.header_offset, 0)
269 fheader = fp.read(30)
270 if fheader[0:4] != stringFileHeader:
271 raise BadZipfile, "Bad magic number for file header"
272 fheader = struct.unpack(structFileHeader, fheader)
Fred Drake3e038e52001-02-28 17:56:26 +0000273 # file_offset is computed here, since the extra field for
274 # the central directory and for the local file header
275 # refer to different fields, and they can have different
276 # lengths
277 data.file_offset = (data.header_offset + 30
278 + fheader[_FH_FILENAME_LENGTH]
279 + fheader[_FH_EXTRA_FIELD_LENGTH])
280 fname = fp.read(fheader[_FH_FILENAME_LENGTH])
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000281 if fname != data.filename:
282 raise RuntimeError, \
Fred Drake5db246d2000-09-29 20:44:48 +0000283 'File name in directory "%s" and header "%s" differ.' % (
284 data.filename, fname)
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000285
286 def namelist(self):
Fred Drake484d7352000-10-02 21:14:52 +0000287 """Return a list of file names in the archive."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000288 l = []
289 for data in self.filelist:
290 l.append(data.filename)
291 return l
292
293 def infolist(self):
Fred Drake484d7352000-10-02 21:14:52 +0000294 """Return a list of class ZipInfo instances for files in the
295 archive."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000296 return self.filelist
297
298 def printdir(self):
Fred Drake484d7352000-10-02 21:14:52 +0000299 """Print a table of contents for the zip file."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000300 print "%-46s %19s %12s" % ("File Name", "Modified ", "Size")
301 for zinfo in self.filelist:
302 date = "%d-%02d-%02d %02d:%02d:%02d" % zinfo.date_time
303 print "%-46s %s %12d" % (zinfo.filename, date, zinfo.file_size)
304
305 def testzip(self):
Fred Drake484d7352000-10-02 21:14:52 +0000306 """Read all the files and check the CRC."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000307 for zinfo in self.filelist:
308 try:
Tim Peterse1190062001-01-15 03:34:38 +0000309 self.read(zinfo.filename) # Check CRC-32
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000310 except:
311 return zinfo.filename
312
313 def getinfo(self, name):
Fred Drake484d7352000-10-02 21:14:52 +0000314 """Return the instance of ZipInfo given 'name'."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000315 return self.NameToInfo[name]
316
317 def read(self, name):
Fred Drake484d7352000-10-02 21:14:52 +0000318 """Return file bytes (as a string) for name."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000319 if self.mode not in ("r", "a"):
320 raise RuntimeError, 'read() requires mode "r" or "a"'
321 if not self.fp:
322 raise RuntimeError, \
Fred Drake5db246d2000-09-29 20:44:48 +0000323 "Attempt to read ZIP archive that was already closed"
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000324 zinfo = self.getinfo(name)
325 filepos = self.fp.tell()
326 self.fp.seek(zinfo.file_offset, 0)
327 bytes = self.fp.read(zinfo.compress_size)
328 self.fp.seek(filepos, 0)
329 if zinfo.compress_type == ZIP_STORED:
330 pass
331 elif zinfo.compress_type == ZIP_DEFLATED:
332 if not zlib:
333 raise RuntimeError, \
Fred Drake5db246d2000-09-29 20:44:48 +0000334 "De-compression requires the (missing) zlib module"
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000335 # zlib compress/decompress code by Jeremy Hylton of CNRI
336 dc = zlib.decompressobj(-15)
337 bytes = dc.decompress(bytes)
338 # need to feed in unused pad byte so that zlib won't choke
339 ex = dc.decompress('Z') + dc.flush()
340 if ex:
341 bytes = bytes + ex
342 else:
343 raise BadZipfile, \
Fred Drake5db246d2000-09-29 20:44:48 +0000344 "Unsupported compression method %d for file %s" % \
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000345 (zinfo.compress_type, name)
346 crc = binascii.crc32(bytes)
347 if crc != zinfo.CRC:
348 raise BadZipfile, "Bad CRC-32 for file %s" % name
349 return bytes
350
351 def _writecheck(self, zinfo):
Fred Drake484d7352000-10-02 21:14:52 +0000352 """Check for errors before writing a file to the archive."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000353 if self.NameToInfo.has_key(zinfo.filename):
Tim Peterse1190062001-01-15 03:34:38 +0000354 if self.debug: # Warning for duplicate names
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000355 print "Duplicate name:", zinfo.filename
356 if self.mode not in ("w", "a"):
357 raise RuntimeError, 'write() requires mode "w" or "a"'
358 if not self.fp:
359 raise RuntimeError, \
Fred Drake5db246d2000-09-29 20:44:48 +0000360 "Attempt to write ZIP archive that was already closed"
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000361 if zinfo.compress_type == ZIP_DEFLATED and not zlib:
362 raise RuntimeError, \
Fred Drake5db246d2000-09-29 20:44:48 +0000363 "Compression requires the (missing) zlib module"
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000364 if zinfo.compress_type not in (ZIP_STORED, ZIP_DEFLATED):
365 raise RuntimeError, \
Fred Drake5db246d2000-09-29 20:44:48 +0000366 "That compression method is not supported"
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000367
368 def write(self, filename, arcname=None, compress_type=None):
Fred Drake484d7352000-10-02 21:14:52 +0000369 """Put the bytes from filename into the archive under the name
370 arcname."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000371 st = os.stat(filename)
372 mtime = time.localtime(st[8])
373 date_time = mtime[0:6]
374 # Create ZipInfo instance to store file information
375 if arcname is None:
Tim Peterse1190062001-01-15 03:34:38 +0000376 zinfo = ZipInfo(filename, date_time)
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000377 else:
Tim Peterse1190062001-01-15 03:34:38 +0000378 zinfo = ZipInfo(arcname, date_time)
379 zinfo.external_attr = st[0] << 16 # Unix attributes
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000380 if compress_type is None:
Tim Peterse1190062001-01-15 03:34:38 +0000381 zinfo.compress_type = self.compression
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000382 else:
Tim Peterse1190062001-01-15 03:34:38 +0000383 zinfo.compress_type = compress_type
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000384 self._writecheck(zinfo)
385 fp = open(filename, "rb")
Finn Bock03a3bb82001-09-05 18:40:33 +0000386 zinfo.flag_bits = 0x00
Tim Peterse1190062001-01-15 03:34:38 +0000387 zinfo.header_offset = self.fp.tell() # Start of header bytes
Finn Bock03a3bb82001-09-05 18:40:33 +0000388 # Must overwrite CRC and sizes with correct data later
389 zinfo.CRC = CRC = 0
390 zinfo.compress_size = compress_size = 0
391 zinfo.file_size = file_size = 0
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000392 self.fp.write(zinfo.FileHeader())
Tim Peterse1190062001-01-15 03:34:38 +0000393 zinfo.file_offset = self.fp.tell() # Start of file bytes
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000394 if zinfo.compress_type == ZIP_DEFLATED:
395 cmpr = zlib.compressobj(zlib.Z_DEFAULT_COMPRESSION,
396 zlib.DEFLATED, -15)
397 else:
398 cmpr = None
399 while 1:
400 buf = fp.read(1024 * 8)
401 if not buf:
402 break
403 file_size = file_size + len(buf)
404 CRC = binascii.crc32(buf, CRC)
405 if cmpr:
406 buf = cmpr.compress(buf)
407 compress_size = compress_size + len(buf)
408 self.fp.write(buf)
409 fp.close()
410 if cmpr:
411 buf = cmpr.flush()
412 compress_size = compress_size + len(buf)
413 self.fp.write(buf)
414 zinfo.compress_size = compress_size
415 else:
416 zinfo.compress_size = file_size
417 zinfo.CRC = CRC
418 zinfo.file_size = file_size
Finn Bock03a3bb82001-09-05 18:40:33 +0000419 # Seek backwards and write CRC and file sizes
420 position = self.fp.tell() # Preserve current position in file
421 self.fp.seek(zinfo.header_offset + 14, 0)
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000422 self.fp.write(struct.pack("<lll", zinfo.CRC, zinfo.compress_size,
423 zinfo.file_size))
Finn Bock03a3bb82001-09-05 18:40:33 +0000424 self.fp.seek(position, 0)
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000425 self.filelist.append(zinfo)
426 self.NameToInfo[zinfo.filename] = zinfo
427
428 def writestr(self, zinfo, bytes):
Fred Drake484d7352000-10-02 21:14:52 +0000429 """Write a file into the archive. The contents is the string
430 'bytes'."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000431 self._writecheck(zinfo)
Tim Peterse1190062001-01-15 03:34:38 +0000432 zinfo.file_size = len(bytes) # Uncompressed size
433 zinfo.CRC = binascii.crc32(bytes) # CRC-32 checksum
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000434 if zinfo.compress_type == ZIP_DEFLATED:
435 co = zlib.compressobj(zlib.Z_DEFAULT_COMPRESSION,
436 zlib.DEFLATED, -15)
437 bytes = co.compress(bytes) + co.flush()
Tim Peterse1190062001-01-15 03:34:38 +0000438 zinfo.compress_size = len(bytes) # Compressed size
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000439 else:
440 zinfo.compress_size = zinfo.file_size
Tim Peterse1190062001-01-15 03:34:38 +0000441 zinfo.header_offset = self.fp.tell() # Start of header bytes
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000442 self.fp.write(zinfo.FileHeader())
Tim Peterse1190062001-01-15 03:34:38 +0000443 zinfo.file_offset = self.fp.tell() # Start of file bytes
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000444 self.fp.write(bytes)
445 if zinfo.flag_bits & 0x08:
Tim Peterse1190062001-01-15 03:34:38 +0000446 # Write CRC and file sizes after the file data
447 self.fp.write(struct.pack("<lll", zinfo.CRC, zinfo.compress_size,
448 zinfo.file_size))
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000449 self.filelist.append(zinfo)
450 self.NameToInfo[zinfo.filename] = zinfo
451
452 def __del__(self):
Fred Drake484d7352000-10-02 21:14:52 +0000453 """Call the "close()" method in case the user forgot."""
Fred Drake3d9091e2001-03-26 15:49:24 +0000454 if self.fp and not self._filePassed:
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000455 self.fp.close()
456 self.fp = None
457
458 def close(self):
Fred Drake484d7352000-10-02 21:14:52 +0000459 """Close the file, and for mode "w" and "a" write the ending
460 records."""
Tim Peterse1190062001-01-15 03:34:38 +0000461 if self.mode in ("w", "a"): # write ending records
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000462 count = 0
463 pos1 = self.fp.tell()
Tim Peterse1190062001-01-15 03:34:38 +0000464 for zinfo in self.filelist: # write central directory
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000465 count = count + 1
466 dt = zinfo.date_time
467 dosdate = (dt[0] - 1980) << 9 | dt[1] << 5 | dt[2]
468 dostime = dt[3] << 11 | dt[4] << 5 | dt[5] / 2
469 centdir = struct.pack(structCentralDir,
470 stringCentralDir, zinfo.create_version,
471 zinfo.create_system, zinfo.extract_version, zinfo.reserved,
472 zinfo.flag_bits, zinfo.compress_type, dostime, dosdate,
473 zinfo.CRC, zinfo.compress_size, zinfo.file_size,
474 len(zinfo.filename), len(zinfo.extra), len(zinfo.comment),
475 0, zinfo.internal_attr, zinfo.external_attr,
476 zinfo.header_offset)
477 self.fp.write(centdir)
478 self.fp.write(zinfo.filename)
479 self.fp.write(zinfo.extra)
480 self.fp.write(zinfo.comment)
481 pos2 = self.fp.tell()
482 # Write end-of-zip-archive record
483 endrec = struct.pack(structEndArchive, stringEndArchive,
484 0, 0, count, count, pos2 - pos1, pos1, 0)
485 self.fp.write(endrec)
Guido van Rossumf85af612001-04-14 16:45:14 +0000486 self.fp.flush()
Fred Drake3d9091e2001-03-26 15:49:24 +0000487 if not self._filePassed:
488 self.fp.close()
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000489 self.fp = None
490
491
492class PyZipFile(ZipFile):
Fred Drake484d7352000-10-02 21:14:52 +0000493 """Class to create ZIP archives with Python library files and packages."""
494
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000495 def writepy(self, pathname, basename = ""):
496 """Add all files from "pathname" to the ZIP archive.
497
Fred Drake484d7352000-10-02 21:14:52 +0000498 If pathname is a package directory, search the directory and
499 all package subdirectories recursively for all *.py and enter
500 the modules into the archive. If pathname is a plain
501 directory, listdir *.py and enter all modules. Else, pathname
502 must be a Python *.py file and the module will be put into the
503 archive. Added modules are always module.pyo or module.pyc.
504 This method will compile the module.py into module.pyc if
505 necessary.
506 """
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000507 dir, name = os.path.split(pathname)
508 if os.path.isdir(pathname):
509 initname = os.path.join(pathname, "__init__.py")
510 if os.path.isfile(initname):
511 # This is a package directory, add it
512 if basename:
513 basename = "%s/%s" % (basename, name)
514 else:
515 basename = name
516 if self.debug:
517 print "Adding package in", pathname, "as", basename
518 fname, arcname = self._get_codename(initname[0:-3], basename)
519 if self.debug:
520 print "Adding", arcname
521 self.write(fname, arcname)
522 dirlist = os.listdir(pathname)
523 dirlist.remove("__init__.py")
524 # Add all *.py files and package subdirectories
525 for filename in dirlist:
526 path = os.path.join(pathname, filename)
527 root, ext = os.path.splitext(filename)
528 if os.path.isdir(path):
529 if os.path.isfile(os.path.join(path, "__init__.py")):
530 # This is a package directory, add it
531 self.writepy(path, basename) # Recursive call
532 elif ext == ".py":
533 fname, arcname = self._get_codename(path[0:-3],
534 basename)
535 if self.debug:
536 print "Adding", arcname
537 self.write(fname, arcname)
538 else:
539 # This is NOT a package directory, add its files at top level
540 if self.debug:
541 print "Adding files from directory", pathname
542 for filename in os.listdir(pathname):
543 path = os.path.join(pathname, filename)
544 root, ext = os.path.splitext(filename)
545 if ext == ".py":
546 fname, arcname = self._get_codename(path[0:-3],
547 basename)
548 if self.debug:
549 print "Adding", arcname
550 self.write(fname, arcname)
551 else:
552 if pathname[-3:] != ".py":
553 raise RuntimeError, \
Fred Drake5db246d2000-09-29 20:44:48 +0000554 'Files added with writepy() must end with ".py"'
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000555 fname, arcname = self._get_codename(pathname[0:-3], basename)
556 if self.debug:
557 print "Adding file", arcname
558 self.write(fname, arcname)
559
560 def _get_codename(self, pathname, basename):
561 """Return (filename, archivename) for the path.
562
Fred Drake484d7352000-10-02 21:14:52 +0000563 Given a module name path, return the correct file path and
564 archive name, compiling if necessary. For example, given
565 /python/lib/string, return (/python/lib/string.pyc, string).
566 """
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000567 file_py = pathname + ".py"
568 file_pyc = pathname + ".pyc"
569 file_pyo = pathname + ".pyo"
570 if os.path.isfile(file_pyo) and \
571 os.stat(file_pyo)[8] >= os.stat(file_py)[8]:
Tim Peterse1190062001-01-15 03:34:38 +0000572 fname = file_pyo # Use .pyo file
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000573 elif not os.path.isfile(file_pyc) or \
Fred Drake484d7352000-10-02 21:14:52 +0000574 os.stat(file_pyc)[8] < os.stat(file_py)[8]:
575 import py_compile
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000576 if self.debug:
577 print "Compiling", file_py
578 py_compile.compile(file_py, file_pyc)
579 fname = file_pyc
580 else:
581 fname = file_pyc
582 archivename = os.path.split(fname)[1]
583 if basename:
584 archivename = "%s/%s" % (basename, archivename)
585 return (fname, archivename)