blob: bd9df9b99d1491ab6df67ff84ae87a384ba69705 [file] [log] [blame]
Fred Drake484d7352000-10-02 21:14:52 +00001"Read and write ZIP files."
Guido van Rossum32abe6f2000-03-31 17:30:02 +00002# Written by James C. Ahlstrom jim@interet.com
3# All rights transferred to CNRI pursuant to the Python contribution agreement
4
5import struct, os, time
Fred Drake484d7352000-10-02 21:14:52 +00006import binascii
Guido van Rossum32abe6f2000-03-31 17:30:02 +00007
8try:
Tim Peterse1190062001-01-15 03:34:38 +00009 import zlib # We may need its compression method
Guido van Rossum9c673f32001-04-10 15:37:12 +000010except ImportError:
Guido van Rossum32abe6f2000-03-31 17:30:02 +000011 zlib = None
12
Skip Montanaro40fc1602001-03-01 04:27:19 +000013__all__ = ["BadZipfile", "error", "ZIP_STORED", "ZIP_DEFLATED", "is_zipfile",
14 "ZipInfo", "ZipFile", "PyZipFile"]
15
Fred Drake5db246d2000-09-29 20:44:48 +000016class BadZipfile(Exception):
Guido van Rossum32abe6f2000-03-31 17:30:02 +000017 pass
Tim Peterse1190062001-01-15 03:34:38 +000018error = BadZipfile # The exception raised by this module
Guido van Rossum32abe6f2000-03-31 17:30:02 +000019
20# constants for Zip file compression methods
21ZIP_STORED = 0
22ZIP_DEFLATED = 8
23# Other ZIP compression methods not supported
24
25# Here are some struct module formats for reading headers
26structEndArchive = "<4s4H2lH" # 9 items, end of archive, 22 bytes
27stringEndArchive = "PK\005\006" # magic number for end of archive record
28structCentralDir = "<4s4B4H3l5H2l"# 19 items, central directory, 46 bytes
29stringCentralDir = "PK\001\002" # magic number for central directory
30structFileHeader = "<4s2B4H3l2H" # 12 items, file header record, 30 bytes
31stringFileHeader = "PK\003\004" # magic number for file header
32
Fred Drake3e038e52001-02-28 17:56:26 +000033# indexes of entries in the central directory structure
34_CD_SIGNATURE = 0
35_CD_CREATE_VERSION = 1
36_CD_CREATE_SYSTEM = 2
37_CD_EXTRACT_VERSION = 3
38_CD_EXTRACT_SYSTEM = 4 # is this meaningful?
39_CD_FLAG_BITS = 5
40_CD_COMPRESS_TYPE = 6
41_CD_TIME = 7
42_CD_DATE = 8
43_CD_CRC = 9
44_CD_COMPRESSED_SIZE = 10
45_CD_UNCOMPRESSED_SIZE = 11
46_CD_FILENAME_LENGTH = 12
47_CD_EXTRA_FIELD_LENGTH = 13
48_CD_COMMENT_LENGTH = 14
49_CD_DISK_NUMBER_START = 15
50_CD_INTERNAL_FILE_ATTRIBUTES = 16
51_CD_EXTERNAL_FILE_ATTRIBUTES = 17
52_CD_LOCAL_HEADER_OFFSET = 18
53
54# indexes of entries in the local file header structure
55_FH_SIGNATURE = 0
56_FH_EXTRACT_VERSION = 1
57_FH_EXTRACT_SYSTEM = 2 # is this meaningful?
58_FH_GENERAL_PURPOSE_FLAG_BITS = 3
59_FH_COMPRESSION_METHOD = 4
60_FH_LAST_MOD_TIME = 5
61_FH_LAST_MOD_DATE = 6
62_FH_CRC = 7
63_FH_COMPRESSED_SIZE = 8
64_FH_UNCOMPRESSED_SIZE = 9
65_FH_FILENAME_LENGTH = 10
66_FH_EXTRA_FIELD_LENGTH = 11
67
Fred Drake3d9091e2001-03-26 15:49:24 +000068# Used to compare file passed to ZipFile
69_STRING_TYPES = (type('s'), type(u's'))
70
Fred Drake484d7352000-10-02 21:14:52 +000071
Guido van Rossum32abe6f2000-03-31 17:30:02 +000072def is_zipfile(filename):
73 """Quickly see if file is a ZIP file by checking the magic number.
74
Fred Drake484d7352000-10-02 21:14:52 +000075 Will not accept a ZIP archive with an ending comment.
76 """
Guido van Rossum32abe6f2000-03-31 17:30:02 +000077 try:
78 fpin = open(filename, "rb")
Tim Peterse1190062001-01-15 03:34:38 +000079 fpin.seek(-22, 2) # Seek to end-of-file record
Guido van Rossum32abe6f2000-03-31 17:30:02 +000080 endrec = fpin.read()
81 fpin.close()
82 if endrec[0:4] == "PK\005\006" and endrec[-2:] == "\000\000":
Tim Peterse1190062001-01-15 03:34:38 +000083 return 1 # file has correct magic number
Fred Drake7e473802001-05-11 19:52:57 +000084 except IOError:
Guido van Rossum32abe6f2000-03-31 17:30:02 +000085 pass
86
Fred Drake484d7352000-10-02 21:14:52 +000087
Guido van Rossum32abe6f2000-03-31 17:30:02 +000088class ZipInfo:
Fred Drake484d7352000-10-02 21:14:52 +000089 """Class with attributes describing each file in the ZIP archive."""
90
Guido van Rossum32abe6f2000-03-31 17:30:02 +000091 def __init__(self, filename="NoName", date_time=(1980,1,1,0,0,0)):
Fred Drakea58947f2001-07-19 19:44:25 +000092 self.filename = _normpath(filename) # Name of the file in the archive
Tim Peterse1190062001-01-15 03:34:38 +000093 self.date_time = date_time # year, month, day, hour, min, sec
Guido van Rossum32abe6f2000-03-31 17:30:02 +000094 # Standard values:
Tim Peterse1190062001-01-15 03:34:38 +000095 self.compress_type = ZIP_STORED # Type of compression for the file
96 self.comment = "" # Comment for each file
97 self.extra = "" # ZIP extra data
98 self.create_system = 0 # System which created ZIP archive
99 self.create_version = 20 # Version which created ZIP archive
100 self.extract_version = 20 # Version needed to extract archive
101 self.reserved = 0 # Must be zero
102 self.flag_bits = 0 # ZIP flag bits
103 self.volume = 0 # Volume number of file header
104 self.internal_attr = 0 # Internal attributes
105 self.external_attr = 0 # External file attributes
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000106 # Other attributes are set by class ZipFile:
Tim Peterse1190062001-01-15 03:34:38 +0000107 # header_offset Byte offset to the file header
108 # file_offset Byte offset to the start of the file data
109 # CRC CRC-32 of the uncompressed file
110 # compress_size Size of the compressed file
111 # file_size Size of the uncompressed file
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000112
113 def FileHeader(self):
Fred Drake484d7352000-10-02 21:14:52 +0000114 """Return the per-file header as a string."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000115 dt = self.date_time
116 dosdate = (dt[0] - 1980) << 9 | dt[1] << 5 | dt[2]
117 dostime = dt[3] << 11 | dt[4] << 5 | dt[5] / 2
118 if self.flag_bits & 0x08:
Tim Peterse1190062001-01-15 03:34:38 +0000119 # Set these to zero because we write them after the file data
120 CRC = compress_size = file_size = 0
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000121 else:
Tim Peterse1190062001-01-15 03:34:38 +0000122 CRC = self.CRC
123 compress_size = self.compress_size
124 file_size = self.file_size
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000125 header = struct.pack(structFileHeader, stringFileHeader,
126 self.extract_version, self.reserved, self.flag_bits,
127 self.compress_type, dostime, dosdate, CRC,
128 compress_size, file_size,
129 len(self.filename), len(self.extra))
130 return header + self.filename + self.extra
131
132
Fred Drakea58947f2001-07-19 19:44:25 +0000133# This is used to ensure paths in generated ZIP files always use
134# forward slashes as the directory separator, as required by the
135# ZIP format specification.
136if os.sep != "/":
137 def _normpath(path):
138 return path.replace(os.sep, "/")
139else:
140 def _normpath(path):
141 return path
142
143
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000144class ZipFile:
Tim Petersa19a1682001-03-29 04:36:09 +0000145 """ Class with methods to open, read, write, close, list zip files.
146
Fred Drake3d9091e2001-03-26 15:49:24 +0000147 z = ZipFile(file, mode="r", compression=ZIP_STORED)
Tim Petersa19a1682001-03-29 04:36:09 +0000148
Fred Drake3d9091e2001-03-26 15:49:24 +0000149 file: Either the path to the file, or a file-like object.
150 If it is a path, the file will be opened and closed by ZipFile.
151 mode: The mode can be either read "r", write "w" or append "a".
152 compression: ZIP_STORED (no compression) or ZIP_DEFLATED (requires zlib).
153 """
Fred Drake484d7352000-10-02 21:14:52 +0000154
Fred Drake90eac282001-02-28 05:29:34 +0000155 fp = None # Set here since __del__ checks it
156
Fred Drake3d9091e2001-03-26 15:49:24 +0000157 def __init__(self, file, mode="r", compression=ZIP_STORED):
Fred Drake484d7352000-10-02 21:14:52 +0000158 """Open the ZIP file with mode read "r", write "w" or append "a"."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000159 if compression == ZIP_STORED:
160 pass
161 elif compression == ZIP_DEFLATED:
162 if not zlib:
163 raise RuntimeError,\
Fred Drake5db246d2000-09-29 20:44:48 +0000164 "Compression requires the (missing) zlib module"
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000165 else:
166 raise RuntimeError, "That compression method is not supported"
Tim Peterse1190062001-01-15 03:34:38 +0000167 self.debug = 0 # Level of printing: 0 through 3
168 self.NameToInfo = {} # Find file info given name
169 self.filelist = [] # List of ZipInfo instances for archive
170 self.compression = compression # Method of compression
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000171 self.mode = key = mode[0]
Tim Petersa19a1682001-03-29 04:36:09 +0000172
Fred Drake3d9091e2001-03-26 15:49:24 +0000173 # Check if we were passed a file-like object
174 if type(file) in _STRING_TYPES:
175 self._filePassed = 0
176 self.filename = file
177 modeDict = {'r' : 'rb', 'w': 'wb', 'a' : 'r+b'}
178 self.fp = open(file, modeDict[mode])
179 else:
180 self._filePassed = 1
181 self.fp = file
182 self.filename = getattr(file, 'name', None)
Tim Petersa19a1682001-03-29 04:36:09 +0000183
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000184 if key == 'r':
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000185 self._GetContents()
186 elif key == 'w':
Fred Drake3d9091e2001-03-26 15:49:24 +0000187 pass
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000188 elif key == 'a':
Fred Drake3d9091e2001-03-26 15:49:24 +0000189 fp = self.fp
Tim Peterse1190062001-01-15 03:34:38 +0000190 fp.seek(-22, 2) # Seek to end-of-file record
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000191 endrec = fp.read()
192 if endrec[0:4] == stringEndArchive and \
193 endrec[-2:] == "\000\000":
Tim Peterse1190062001-01-15 03:34:38 +0000194 self._GetContents() # file is a zip file
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000195 # seek to start of directory and overwrite
196 fp.seek(self.start_dir, 0)
Tim Peterse1190062001-01-15 03:34:38 +0000197 else: # file is not a zip file, just append
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000198 fp.seek(0, 2)
199 else:
Tim Peters7d3bad62001-04-04 18:56:49 +0000200 if not self._filePassed:
201 self.fp.close()
202 self.fp = None
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000203 raise RuntimeError, 'Mode must be "r", "w" or "a"'
204
205 def _GetContents(self):
Tim Peters7d3bad62001-04-04 18:56:49 +0000206 """Read the directory, making sure we close the file if the format
207 is bad."""
208 try:
209 self._RealGetContents()
210 except BadZipfile:
211 if not self._filePassed:
212 self.fp.close()
213 self.fp = None
214 raise
215
216 def _RealGetContents(self):
Fred Drake484d7352000-10-02 21:14:52 +0000217 """Read in the table of contents for the ZIP file."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000218 fp = self.fp
Tim Peterse1190062001-01-15 03:34:38 +0000219 fp.seek(-22, 2) # Start of end-of-archive record
220 filesize = fp.tell() + 22 # Get file size
221 endrec = fp.read(22) # Archive must not end with a comment!
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000222 if endrec[0:4] != stringEndArchive or endrec[-2:] != "\000\000":
223 raise BadZipfile, "File is not a zip file, or ends with a comment"
224 endrec = struct.unpack(structEndArchive, endrec)
225 if self.debug > 1:
226 print endrec
Tim Peterse1190062001-01-15 03:34:38 +0000227 size_cd = endrec[5] # bytes in central directory
228 offset_cd = endrec[6] # offset of central directory
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000229 x = filesize - 22 - size_cd
230 # "concat" is zero, unless zip was concatenated to another file
231 concat = x - offset_cd
232 if self.debug > 2:
233 print "given, inferred, offset", offset_cd, x, concat
234 # self.start_dir: Position of start of central directory
235 self.start_dir = offset_cd + concat
236 fp.seek(self.start_dir, 0)
237 total = 0
238 while total < size_cd:
239 centdir = fp.read(46)
240 total = total + 46
241 if centdir[0:4] != stringCentralDir:
242 raise BadZipfile, "Bad magic number for central directory"
243 centdir = struct.unpack(structCentralDir, centdir)
244 if self.debug > 2:
245 print centdir
Fred Drake3e038e52001-02-28 17:56:26 +0000246 filename = fp.read(centdir[_CD_FILENAME_LENGTH])
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000247 # Create ZipInfo instance to store file information
248 x = ZipInfo(filename)
Fred Drake3e038e52001-02-28 17:56:26 +0000249 x.extra = fp.read(centdir[_CD_EXTRA_FIELD_LENGTH])
250 x.comment = fp.read(centdir[_CD_COMMENT_LENGTH])
251 total = (total + centdir[_CD_FILENAME_LENGTH]
252 + centdir[_CD_EXTRA_FIELD_LENGTH]
253 + centdir[_CD_COMMENT_LENGTH])
254 x.header_offset = centdir[_CD_LOCAL_HEADER_OFFSET] + concat
255 # file_offset must be computed below...
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000256 (x.create_version, x.create_system, x.extract_version, x.reserved,
257 x.flag_bits, x.compress_type, t, d,
258 x.CRC, x.compress_size, x.file_size) = centdir[1:12]
259 x.volume, x.internal_attr, x.external_attr = centdir[15:18]
260 # Convert date/time code to (year, month, day, hour, min, sec)
261 x.date_time = ( (d>>9)+1980, (d>>5)&0xF, d&0x1F,
Fred Drake414ca662000-06-13 18:49:53 +0000262 t>>11, (t>>5)&0x3F, (t&0x1F) * 2 )
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000263 self.filelist.append(x)
264 self.NameToInfo[x.filename] = x
265 if self.debug > 2:
266 print "total", total
267 for data in self.filelist:
268 fp.seek(data.header_offset, 0)
269 fheader = fp.read(30)
270 if fheader[0:4] != stringFileHeader:
271 raise BadZipfile, "Bad magic number for file header"
272 fheader = struct.unpack(structFileHeader, fheader)
Fred Drake3e038e52001-02-28 17:56:26 +0000273 # file_offset is computed here, since the extra field for
274 # the central directory and for the local file header
275 # refer to different fields, and they can have different
276 # lengths
277 data.file_offset = (data.header_offset + 30
278 + fheader[_FH_FILENAME_LENGTH]
279 + fheader[_FH_EXTRA_FIELD_LENGTH])
280 fname = fp.read(fheader[_FH_FILENAME_LENGTH])
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000281 if fname != data.filename:
282 raise RuntimeError, \
Fred Drake5db246d2000-09-29 20:44:48 +0000283 'File name in directory "%s" and header "%s" differ.' % (
284 data.filename, fname)
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000285
286 def namelist(self):
Fred Drake484d7352000-10-02 21:14:52 +0000287 """Return a list of file names in the archive."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000288 l = []
289 for data in self.filelist:
290 l.append(data.filename)
291 return l
292
293 def infolist(self):
Fred Drake484d7352000-10-02 21:14:52 +0000294 """Return a list of class ZipInfo instances for files in the
295 archive."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000296 return self.filelist
297
298 def printdir(self):
Fred Drake484d7352000-10-02 21:14:52 +0000299 """Print a table of contents for the zip file."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000300 print "%-46s %19s %12s" % ("File Name", "Modified ", "Size")
301 for zinfo in self.filelist:
302 date = "%d-%02d-%02d %02d:%02d:%02d" % zinfo.date_time
303 print "%-46s %s %12d" % (zinfo.filename, date, zinfo.file_size)
304
305 def testzip(self):
Fred Drake484d7352000-10-02 21:14:52 +0000306 """Read all the files and check the CRC."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000307 for zinfo in self.filelist:
308 try:
Tim Peterse1190062001-01-15 03:34:38 +0000309 self.read(zinfo.filename) # Check CRC-32
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000310 except:
311 return zinfo.filename
312
313 def getinfo(self, name):
Fred Drake484d7352000-10-02 21:14:52 +0000314 """Return the instance of ZipInfo given 'name'."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000315 return self.NameToInfo[name]
316
317 def read(self, name):
Fred Drake484d7352000-10-02 21:14:52 +0000318 """Return file bytes (as a string) for name."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000319 if self.mode not in ("r", "a"):
320 raise RuntimeError, 'read() requires mode "r" or "a"'
321 if not self.fp:
322 raise RuntimeError, \
Fred Drake5db246d2000-09-29 20:44:48 +0000323 "Attempt to read ZIP archive that was already closed"
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000324 zinfo = self.getinfo(name)
325 filepos = self.fp.tell()
326 self.fp.seek(zinfo.file_offset, 0)
327 bytes = self.fp.read(zinfo.compress_size)
328 self.fp.seek(filepos, 0)
329 if zinfo.compress_type == ZIP_STORED:
330 pass
331 elif zinfo.compress_type == ZIP_DEFLATED:
332 if not zlib:
333 raise RuntimeError, \
Fred Drake5db246d2000-09-29 20:44:48 +0000334 "De-compression requires the (missing) zlib module"
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000335 # zlib compress/decompress code by Jeremy Hylton of CNRI
336 dc = zlib.decompressobj(-15)
337 bytes = dc.decompress(bytes)
338 # need to feed in unused pad byte so that zlib won't choke
339 ex = dc.decompress('Z') + dc.flush()
340 if ex:
341 bytes = bytes + ex
342 else:
343 raise BadZipfile, \
Fred Drake5db246d2000-09-29 20:44:48 +0000344 "Unsupported compression method %d for file %s" % \
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000345 (zinfo.compress_type, name)
346 crc = binascii.crc32(bytes)
347 if crc != zinfo.CRC:
348 raise BadZipfile, "Bad CRC-32 for file %s" % name
349 return bytes
350
351 def _writecheck(self, zinfo):
Fred Drake484d7352000-10-02 21:14:52 +0000352 """Check for errors before writing a file to the archive."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000353 if self.NameToInfo.has_key(zinfo.filename):
Tim Peterse1190062001-01-15 03:34:38 +0000354 if self.debug: # Warning for duplicate names
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000355 print "Duplicate name:", zinfo.filename
356 if self.mode not in ("w", "a"):
357 raise RuntimeError, 'write() requires mode "w" or "a"'
358 if not self.fp:
359 raise RuntimeError, \
Fred Drake5db246d2000-09-29 20:44:48 +0000360 "Attempt to write ZIP archive that was already closed"
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000361 if zinfo.compress_type == ZIP_DEFLATED and not zlib:
362 raise RuntimeError, \
Fred Drake5db246d2000-09-29 20:44:48 +0000363 "Compression requires the (missing) zlib module"
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000364 if zinfo.compress_type not in (ZIP_STORED, ZIP_DEFLATED):
365 raise RuntimeError, \
Fred Drake5db246d2000-09-29 20:44:48 +0000366 "That compression method is not supported"
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000367
368 def write(self, filename, arcname=None, compress_type=None):
Fred Drake484d7352000-10-02 21:14:52 +0000369 """Put the bytes from filename into the archive under the name
370 arcname."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000371 st = os.stat(filename)
372 mtime = time.localtime(st[8])
373 date_time = mtime[0:6]
374 # Create ZipInfo instance to store file information
375 if arcname is None:
Tim Peterse1190062001-01-15 03:34:38 +0000376 zinfo = ZipInfo(filename, date_time)
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000377 else:
Tim Peterse1190062001-01-15 03:34:38 +0000378 zinfo = ZipInfo(arcname, date_time)
379 zinfo.external_attr = st[0] << 16 # Unix attributes
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000380 if compress_type is None:
Tim Peterse1190062001-01-15 03:34:38 +0000381 zinfo.compress_type = self.compression
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000382 else:
Tim Peterse1190062001-01-15 03:34:38 +0000383 zinfo.compress_type = compress_type
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000384 self._writecheck(zinfo)
385 fp = open(filename, "rb")
386 zinfo.flag_bits = 0x08
Tim Peterse1190062001-01-15 03:34:38 +0000387 zinfo.header_offset = self.fp.tell() # Start of header bytes
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000388 self.fp.write(zinfo.FileHeader())
Tim Peterse1190062001-01-15 03:34:38 +0000389 zinfo.file_offset = self.fp.tell() # Start of file bytes
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000390 CRC = 0
391 compress_size = 0
392 file_size = 0
393 if zinfo.compress_type == ZIP_DEFLATED:
394 cmpr = zlib.compressobj(zlib.Z_DEFAULT_COMPRESSION,
395 zlib.DEFLATED, -15)
396 else:
397 cmpr = None
398 while 1:
399 buf = fp.read(1024 * 8)
400 if not buf:
401 break
402 file_size = file_size + len(buf)
403 CRC = binascii.crc32(buf, CRC)
404 if cmpr:
405 buf = cmpr.compress(buf)
406 compress_size = compress_size + len(buf)
407 self.fp.write(buf)
408 fp.close()
409 if cmpr:
410 buf = cmpr.flush()
411 compress_size = compress_size + len(buf)
412 self.fp.write(buf)
413 zinfo.compress_size = compress_size
414 else:
415 zinfo.compress_size = file_size
416 zinfo.CRC = CRC
417 zinfo.file_size = file_size
418 # Write CRC and file sizes after the file data
419 self.fp.write(struct.pack("<lll", zinfo.CRC, zinfo.compress_size,
420 zinfo.file_size))
421 self.filelist.append(zinfo)
422 self.NameToInfo[zinfo.filename] = zinfo
423
424 def writestr(self, zinfo, bytes):
Fred Drake484d7352000-10-02 21:14:52 +0000425 """Write a file into the archive. The contents is the string
426 'bytes'."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000427 self._writecheck(zinfo)
Tim Peterse1190062001-01-15 03:34:38 +0000428 zinfo.file_size = len(bytes) # Uncompressed size
429 zinfo.CRC = binascii.crc32(bytes) # CRC-32 checksum
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000430 if zinfo.compress_type == ZIP_DEFLATED:
431 co = zlib.compressobj(zlib.Z_DEFAULT_COMPRESSION,
432 zlib.DEFLATED, -15)
433 bytes = co.compress(bytes) + co.flush()
Tim Peterse1190062001-01-15 03:34:38 +0000434 zinfo.compress_size = len(bytes) # Compressed size
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000435 else:
436 zinfo.compress_size = zinfo.file_size
Tim Peterse1190062001-01-15 03:34:38 +0000437 zinfo.header_offset = self.fp.tell() # Start of header bytes
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000438 self.fp.write(zinfo.FileHeader())
Tim Peterse1190062001-01-15 03:34:38 +0000439 zinfo.file_offset = self.fp.tell() # Start of file bytes
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000440 self.fp.write(bytes)
441 if zinfo.flag_bits & 0x08:
Tim Peterse1190062001-01-15 03:34:38 +0000442 # Write CRC and file sizes after the file data
443 self.fp.write(struct.pack("<lll", zinfo.CRC, zinfo.compress_size,
444 zinfo.file_size))
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000445 self.filelist.append(zinfo)
446 self.NameToInfo[zinfo.filename] = zinfo
447
448 def __del__(self):
Fred Drake484d7352000-10-02 21:14:52 +0000449 """Call the "close()" method in case the user forgot."""
Fred Drake3d9091e2001-03-26 15:49:24 +0000450 if self.fp and not self._filePassed:
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000451 self.fp.close()
452 self.fp = None
453
454 def close(self):
Fred Drake484d7352000-10-02 21:14:52 +0000455 """Close the file, and for mode "w" and "a" write the ending
456 records."""
Tim Peterse1190062001-01-15 03:34:38 +0000457 if self.mode in ("w", "a"): # write ending records
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000458 count = 0
459 pos1 = self.fp.tell()
Tim Peterse1190062001-01-15 03:34:38 +0000460 for zinfo in self.filelist: # write central directory
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000461 count = count + 1
462 dt = zinfo.date_time
463 dosdate = (dt[0] - 1980) << 9 | dt[1] << 5 | dt[2]
464 dostime = dt[3] << 11 | dt[4] << 5 | dt[5] / 2
465 centdir = struct.pack(structCentralDir,
466 stringCentralDir, zinfo.create_version,
467 zinfo.create_system, zinfo.extract_version, zinfo.reserved,
468 zinfo.flag_bits, zinfo.compress_type, dostime, dosdate,
469 zinfo.CRC, zinfo.compress_size, zinfo.file_size,
470 len(zinfo.filename), len(zinfo.extra), len(zinfo.comment),
471 0, zinfo.internal_attr, zinfo.external_attr,
472 zinfo.header_offset)
473 self.fp.write(centdir)
474 self.fp.write(zinfo.filename)
475 self.fp.write(zinfo.extra)
476 self.fp.write(zinfo.comment)
477 pos2 = self.fp.tell()
478 # Write end-of-zip-archive record
479 endrec = struct.pack(structEndArchive, stringEndArchive,
480 0, 0, count, count, pos2 - pos1, pos1, 0)
481 self.fp.write(endrec)
Guido van Rossumf85af612001-04-14 16:45:14 +0000482 self.fp.flush()
Fred Drake3d9091e2001-03-26 15:49:24 +0000483 if not self._filePassed:
484 self.fp.close()
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000485 self.fp = None
486
487
488class PyZipFile(ZipFile):
Fred Drake484d7352000-10-02 21:14:52 +0000489 """Class to create ZIP archives with Python library files and packages."""
490
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000491 def writepy(self, pathname, basename = ""):
492 """Add all files from "pathname" to the ZIP archive.
493
Fred Drake484d7352000-10-02 21:14:52 +0000494 If pathname is a package directory, search the directory and
495 all package subdirectories recursively for all *.py and enter
496 the modules into the archive. If pathname is a plain
497 directory, listdir *.py and enter all modules. Else, pathname
498 must be a Python *.py file and the module will be put into the
499 archive. Added modules are always module.pyo or module.pyc.
500 This method will compile the module.py into module.pyc if
501 necessary.
502 """
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000503 dir, name = os.path.split(pathname)
504 if os.path.isdir(pathname):
505 initname = os.path.join(pathname, "__init__.py")
506 if os.path.isfile(initname):
507 # This is a package directory, add it
508 if basename:
509 basename = "%s/%s" % (basename, name)
510 else:
511 basename = name
512 if self.debug:
513 print "Adding package in", pathname, "as", basename
514 fname, arcname = self._get_codename(initname[0:-3], basename)
515 if self.debug:
516 print "Adding", arcname
517 self.write(fname, arcname)
518 dirlist = os.listdir(pathname)
519 dirlist.remove("__init__.py")
520 # Add all *.py files and package subdirectories
521 for filename in dirlist:
522 path = os.path.join(pathname, filename)
523 root, ext = os.path.splitext(filename)
524 if os.path.isdir(path):
525 if os.path.isfile(os.path.join(path, "__init__.py")):
526 # This is a package directory, add it
527 self.writepy(path, basename) # Recursive call
528 elif ext == ".py":
529 fname, arcname = self._get_codename(path[0:-3],
530 basename)
531 if self.debug:
532 print "Adding", arcname
533 self.write(fname, arcname)
534 else:
535 # This is NOT a package directory, add its files at top level
536 if self.debug:
537 print "Adding files from directory", pathname
538 for filename in os.listdir(pathname):
539 path = os.path.join(pathname, filename)
540 root, ext = os.path.splitext(filename)
541 if ext == ".py":
542 fname, arcname = self._get_codename(path[0:-3],
543 basename)
544 if self.debug:
545 print "Adding", arcname
546 self.write(fname, arcname)
547 else:
548 if pathname[-3:] != ".py":
549 raise RuntimeError, \
Fred Drake5db246d2000-09-29 20:44:48 +0000550 'Files added with writepy() must end with ".py"'
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000551 fname, arcname = self._get_codename(pathname[0:-3], basename)
552 if self.debug:
553 print "Adding file", arcname
554 self.write(fname, arcname)
555
556 def _get_codename(self, pathname, basename):
557 """Return (filename, archivename) for the path.
558
Fred Drake484d7352000-10-02 21:14:52 +0000559 Given a module name path, return the correct file path and
560 archive name, compiling if necessary. For example, given
561 /python/lib/string, return (/python/lib/string.pyc, string).
562 """
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000563 file_py = pathname + ".py"
564 file_pyc = pathname + ".pyc"
565 file_pyo = pathname + ".pyo"
566 if os.path.isfile(file_pyo) and \
567 os.stat(file_pyo)[8] >= os.stat(file_py)[8]:
Tim Peterse1190062001-01-15 03:34:38 +0000568 fname = file_pyo # Use .pyo file
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000569 elif not os.path.isfile(file_pyc) or \
Fred Drake484d7352000-10-02 21:14:52 +0000570 os.stat(file_pyc)[8] < os.stat(file_py)[8]:
571 import py_compile
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000572 if self.debug:
573 print "Compiling", file_py
574 py_compile.compile(file_py, file_pyc)
575 fname = file_pyc
576 else:
577 fname = file_pyc
578 archivename = os.path.split(fname)[1]
579 if basename:
580 archivename = "%s/%s" % (basename, archivename)
581 return (fname, archivename)