blob: 0efcad3a8a4f38fd90bbb5164b5e201df717d5c0 [file] [log] [blame]
Fred Drake484d7352000-10-02 21:14:52 +00001"Read and write ZIP files."
Guido van Rossum32abe6f2000-03-31 17:30:02 +00002# Written by James C. Ahlstrom jim@interet.com
3# All rights transferred to CNRI pursuant to the Python contribution agreement
4
5import struct, os, time
Fred Drake484d7352000-10-02 21:14:52 +00006import binascii
Guido van Rossum32abe6f2000-03-31 17:30:02 +00007
8try:
Tim Peterse1190062001-01-15 03:34:38 +00009 import zlib # We may need its compression method
Guido van Rossum9c673f32001-04-10 15:37:12 +000010except ImportError:
Guido van Rossum32abe6f2000-03-31 17:30:02 +000011 zlib = None
12
Skip Montanaro40fc1602001-03-01 04:27:19 +000013__all__ = ["BadZipfile", "error", "ZIP_STORED", "ZIP_DEFLATED", "is_zipfile",
14 "ZipInfo", "ZipFile", "PyZipFile"]
15
Fred Drake5db246d2000-09-29 20:44:48 +000016class BadZipfile(Exception):
Guido van Rossum32abe6f2000-03-31 17:30:02 +000017 pass
Tim Peterse1190062001-01-15 03:34:38 +000018error = BadZipfile # The exception raised by this module
Guido van Rossum32abe6f2000-03-31 17:30:02 +000019
20# constants for Zip file compression methods
21ZIP_STORED = 0
22ZIP_DEFLATED = 8
23# Other ZIP compression methods not supported
24
25# Here are some struct module formats for reading headers
26structEndArchive = "<4s4H2lH" # 9 items, end of archive, 22 bytes
27stringEndArchive = "PK\005\006" # magic number for end of archive record
28structCentralDir = "<4s4B4H3l5H2l"# 19 items, central directory, 46 bytes
29stringCentralDir = "PK\001\002" # magic number for central directory
30structFileHeader = "<4s2B4H3l2H" # 12 items, file header record, 30 bytes
31stringFileHeader = "PK\003\004" # magic number for file header
32
Fred Drake3e038e52001-02-28 17:56:26 +000033# indexes of entries in the central directory structure
34_CD_SIGNATURE = 0
35_CD_CREATE_VERSION = 1
36_CD_CREATE_SYSTEM = 2
37_CD_EXTRACT_VERSION = 3
38_CD_EXTRACT_SYSTEM = 4 # is this meaningful?
39_CD_FLAG_BITS = 5
40_CD_COMPRESS_TYPE = 6
41_CD_TIME = 7
42_CD_DATE = 8
43_CD_CRC = 9
44_CD_COMPRESSED_SIZE = 10
45_CD_UNCOMPRESSED_SIZE = 11
46_CD_FILENAME_LENGTH = 12
47_CD_EXTRA_FIELD_LENGTH = 13
48_CD_COMMENT_LENGTH = 14
49_CD_DISK_NUMBER_START = 15
50_CD_INTERNAL_FILE_ATTRIBUTES = 16
51_CD_EXTERNAL_FILE_ATTRIBUTES = 17
52_CD_LOCAL_HEADER_OFFSET = 18
53
54# indexes of entries in the local file header structure
55_FH_SIGNATURE = 0
56_FH_EXTRACT_VERSION = 1
57_FH_EXTRACT_SYSTEM = 2 # is this meaningful?
58_FH_GENERAL_PURPOSE_FLAG_BITS = 3
59_FH_COMPRESSION_METHOD = 4
60_FH_LAST_MOD_TIME = 5
61_FH_LAST_MOD_DATE = 6
62_FH_CRC = 7
63_FH_COMPRESSED_SIZE = 8
64_FH_UNCOMPRESSED_SIZE = 9
65_FH_FILENAME_LENGTH = 10
66_FH_EXTRA_FIELD_LENGTH = 11
67
Fred Drake3d9091e2001-03-26 15:49:24 +000068# Used to compare file passed to ZipFile
Guido van Rossumdbb718f2001-09-21 19:22:34 +000069import types
70_STRING_TYPES = (types.StringType,)
71if hasattr(types, "UnicodeType"):
72 _STRING_TYPES = _STRING_TYPES + (types.UnicodeType,)
Fred Drake3d9091e2001-03-26 15:49:24 +000073
Fred Drake484d7352000-10-02 21:14:52 +000074
Guido van Rossum32abe6f2000-03-31 17:30:02 +000075def is_zipfile(filename):
76 """Quickly see if file is a ZIP file by checking the magic number.
77
Fred Drake484d7352000-10-02 21:14:52 +000078 Will not accept a ZIP archive with an ending comment.
79 """
Guido van Rossum32abe6f2000-03-31 17:30:02 +000080 try:
81 fpin = open(filename, "rb")
Tim Peterse1190062001-01-15 03:34:38 +000082 fpin.seek(-22, 2) # Seek to end-of-file record
Guido van Rossum32abe6f2000-03-31 17:30:02 +000083 endrec = fpin.read()
84 fpin.close()
85 if endrec[0:4] == "PK\005\006" and endrec[-2:] == "\000\000":
Tim Peterse1190062001-01-15 03:34:38 +000086 return 1 # file has correct magic number
Fred Drake7e473802001-05-11 19:52:57 +000087 except IOError:
Guido van Rossum32abe6f2000-03-31 17:30:02 +000088 pass
89
Fred Drake484d7352000-10-02 21:14:52 +000090
Guido van Rossum32abe6f2000-03-31 17:30:02 +000091class ZipInfo:
Fred Drake484d7352000-10-02 21:14:52 +000092 """Class with attributes describing each file in the ZIP archive."""
93
Guido van Rossum32abe6f2000-03-31 17:30:02 +000094 def __init__(self, filename="NoName", date_time=(1980,1,1,0,0,0)):
Fred Drakea58947f2001-07-19 19:44:25 +000095 self.filename = _normpath(filename) # Name of the file in the archive
Tim Peterse1190062001-01-15 03:34:38 +000096 self.date_time = date_time # year, month, day, hour, min, sec
Guido van Rossum32abe6f2000-03-31 17:30:02 +000097 # Standard values:
Tim Peterse1190062001-01-15 03:34:38 +000098 self.compress_type = ZIP_STORED # Type of compression for the file
99 self.comment = "" # Comment for each file
100 self.extra = "" # ZIP extra data
101 self.create_system = 0 # System which created ZIP archive
102 self.create_version = 20 # Version which created ZIP archive
103 self.extract_version = 20 # Version needed to extract archive
104 self.reserved = 0 # Must be zero
105 self.flag_bits = 0 # ZIP flag bits
106 self.volume = 0 # Volume number of file header
107 self.internal_attr = 0 # Internal attributes
108 self.external_attr = 0 # External file attributes
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000109 # Other attributes are set by class ZipFile:
Tim Peterse1190062001-01-15 03:34:38 +0000110 # header_offset Byte offset to the file header
111 # file_offset Byte offset to the start of the file data
112 # CRC CRC-32 of the uncompressed file
113 # compress_size Size of the compressed file
114 # file_size Size of the uncompressed file
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000115
116 def FileHeader(self):
Fred Drake484d7352000-10-02 21:14:52 +0000117 """Return the per-file header as a string."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000118 dt = self.date_time
119 dosdate = (dt[0] - 1980) << 9 | dt[1] << 5 | dt[2]
Tim Peters3caca232001-12-06 06:23:26 +0000120 dostime = dt[3] << 11 | dt[4] << 5 | (dt[5] // 2)
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000121 if self.flag_bits & 0x08:
Tim Peterse1190062001-01-15 03:34:38 +0000122 # Set these to zero because we write them after the file data
123 CRC = compress_size = file_size = 0
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000124 else:
Tim Peterse1190062001-01-15 03:34:38 +0000125 CRC = self.CRC
126 compress_size = self.compress_size
127 file_size = self.file_size
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000128 header = struct.pack(structFileHeader, stringFileHeader,
129 self.extract_version, self.reserved, self.flag_bits,
130 self.compress_type, dostime, dosdate, CRC,
131 compress_size, file_size,
132 len(self.filename), len(self.extra))
133 return header + self.filename + self.extra
134
135
Fred Drakea58947f2001-07-19 19:44:25 +0000136# This is used to ensure paths in generated ZIP files always use
137# forward slashes as the directory separator, as required by the
138# ZIP format specification.
139if os.sep != "/":
140 def _normpath(path):
141 return path.replace(os.sep, "/")
142else:
143 def _normpath(path):
144 return path
145
146
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000147class ZipFile:
Tim Petersa19a1682001-03-29 04:36:09 +0000148 """ Class with methods to open, read, write, close, list zip files.
149
Fred Drake3d9091e2001-03-26 15:49:24 +0000150 z = ZipFile(file, mode="r", compression=ZIP_STORED)
Tim Petersa19a1682001-03-29 04:36:09 +0000151
Fred Drake3d9091e2001-03-26 15:49:24 +0000152 file: Either the path to the file, or a file-like object.
153 If it is a path, the file will be opened and closed by ZipFile.
154 mode: The mode can be either read "r", write "w" or append "a".
155 compression: ZIP_STORED (no compression) or ZIP_DEFLATED (requires zlib).
156 """
Fred Drake484d7352000-10-02 21:14:52 +0000157
Fred Drake90eac282001-02-28 05:29:34 +0000158 fp = None # Set here since __del__ checks it
159
Fred Drake3d9091e2001-03-26 15:49:24 +0000160 def __init__(self, file, mode="r", compression=ZIP_STORED):
Fred Drake484d7352000-10-02 21:14:52 +0000161 """Open the ZIP file with mode read "r", write "w" or append "a"."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000162 if compression == ZIP_STORED:
163 pass
164 elif compression == ZIP_DEFLATED:
165 if not zlib:
166 raise RuntimeError,\
Fred Drake5db246d2000-09-29 20:44:48 +0000167 "Compression requires the (missing) zlib module"
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000168 else:
169 raise RuntimeError, "That compression method is not supported"
Tim Peterse1190062001-01-15 03:34:38 +0000170 self.debug = 0 # Level of printing: 0 through 3
171 self.NameToInfo = {} # Find file info given name
172 self.filelist = [] # List of ZipInfo instances for archive
173 self.compression = compression # Method of compression
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000174 self.mode = key = mode[0]
Tim Petersa19a1682001-03-29 04:36:09 +0000175
Fred Drake3d9091e2001-03-26 15:49:24 +0000176 # Check if we were passed a file-like object
177 if type(file) in _STRING_TYPES:
178 self._filePassed = 0
179 self.filename = file
180 modeDict = {'r' : 'rb', 'w': 'wb', 'a' : 'r+b'}
181 self.fp = open(file, modeDict[mode])
182 else:
183 self._filePassed = 1
184 self.fp = file
185 self.filename = getattr(file, 'name', None)
Tim Petersa19a1682001-03-29 04:36:09 +0000186
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000187 if key == 'r':
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000188 self._GetContents()
189 elif key == 'w':
Fred Drake3d9091e2001-03-26 15:49:24 +0000190 pass
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000191 elif key == 'a':
Fred Drake3d9091e2001-03-26 15:49:24 +0000192 fp = self.fp
Tim Peterse1190062001-01-15 03:34:38 +0000193 fp.seek(-22, 2) # Seek to end-of-file record
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000194 endrec = fp.read()
195 if endrec[0:4] == stringEndArchive and \
196 endrec[-2:] == "\000\000":
Tim Peterse1190062001-01-15 03:34:38 +0000197 self._GetContents() # file is a zip file
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000198 # seek to start of directory and overwrite
199 fp.seek(self.start_dir, 0)
Tim Peterse1190062001-01-15 03:34:38 +0000200 else: # file is not a zip file, just append
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000201 fp.seek(0, 2)
202 else:
Tim Peters7d3bad62001-04-04 18:56:49 +0000203 if not self._filePassed:
204 self.fp.close()
205 self.fp = None
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000206 raise RuntimeError, 'Mode must be "r", "w" or "a"'
207
208 def _GetContents(self):
Tim Peters7d3bad62001-04-04 18:56:49 +0000209 """Read the directory, making sure we close the file if the format
210 is bad."""
211 try:
212 self._RealGetContents()
213 except BadZipfile:
214 if not self._filePassed:
215 self.fp.close()
216 self.fp = None
217 raise
218
219 def _RealGetContents(self):
Fred Drake484d7352000-10-02 21:14:52 +0000220 """Read in the table of contents for the ZIP file."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000221 fp = self.fp
Tim Peterse1190062001-01-15 03:34:38 +0000222 fp.seek(-22, 2) # Start of end-of-archive record
223 filesize = fp.tell() + 22 # Get file size
224 endrec = fp.read(22) # Archive must not end with a comment!
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000225 if endrec[0:4] != stringEndArchive or endrec[-2:] != "\000\000":
226 raise BadZipfile, "File is not a zip file, or ends with a comment"
227 endrec = struct.unpack(structEndArchive, endrec)
228 if self.debug > 1:
229 print endrec
Tim Peterse1190062001-01-15 03:34:38 +0000230 size_cd = endrec[5] # bytes in central directory
231 offset_cd = endrec[6] # offset of central directory
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000232 x = filesize - 22 - size_cd
233 # "concat" is zero, unless zip was concatenated to another file
234 concat = x - offset_cd
235 if self.debug > 2:
236 print "given, inferred, offset", offset_cd, x, concat
237 # self.start_dir: Position of start of central directory
238 self.start_dir = offset_cd + concat
239 fp.seek(self.start_dir, 0)
240 total = 0
241 while total < size_cd:
242 centdir = fp.read(46)
243 total = total + 46
244 if centdir[0:4] != stringCentralDir:
245 raise BadZipfile, "Bad magic number for central directory"
246 centdir = struct.unpack(structCentralDir, centdir)
247 if self.debug > 2:
248 print centdir
Fred Drake3e038e52001-02-28 17:56:26 +0000249 filename = fp.read(centdir[_CD_FILENAME_LENGTH])
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000250 # Create ZipInfo instance to store file information
251 x = ZipInfo(filename)
Fred Drake3e038e52001-02-28 17:56:26 +0000252 x.extra = fp.read(centdir[_CD_EXTRA_FIELD_LENGTH])
253 x.comment = fp.read(centdir[_CD_COMMENT_LENGTH])
254 total = (total + centdir[_CD_FILENAME_LENGTH]
255 + centdir[_CD_EXTRA_FIELD_LENGTH]
256 + centdir[_CD_COMMENT_LENGTH])
257 x.header_offset = centdir[_CD_LOCAL_HEADER_OFFSET] + concat
258 # file_offset must be computed below...
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000259 (x.create_version, x.create_system, x.extract_version, x.reserved,
260 x.flag_bits, x.compress_type, t, d,
261 x.CRC, x.compress_size, x.file_size) = centdir[1:12]
262 x.volume, x.internal_attr, x.external_attr = centdir[15:18]
263 # Convert date/time code to (year, month, day, hour, min, sec)
264 x.date_time = ( (d>>9)+1980, (d>>5)&0xF, d&0x1F,
Fred Drake414ca662000-06-13 18:49:53 +0000265 t>>11, (t>>5)&0x3F, (t&0x1F) * 2 )
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000266 self.filelist.append(x)
267 self.NameToInfo[x.filename] = x
268 if self.debug > 2:
269 print "total", total
270 for data in self.filelist:
271 fp.seek(data.header_offset, 0)
272 fheader = fp.read(30)
273 if fheader[0:4] != stringFileHeader:
274 raise BadZipfile, "Bad magic number for file header"
275 fheader = struct.unpack(structFileHeader, fheader)
Fred Drake3e038e52001-02-28 17:56:26 +0000276 # file_offset is computed here, since the extra field for
277 # the central directory and for the local file header
278 # refer to different fields, and they can have different
279 # lengths
280 data.file_offset = (data.header_offset + 30
281 + fheader[_FH_FILENAME_LENGTH]
282 + fheader[_FH_EXTRA_FIELD_LENGTH])
283 fname = fp.read(fheader[_FH_FILENAME_LENGTH])
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000284 if fname != data.filename:
285 raise RuntimeError, \
Fred Drake5db246d2000-09-29 20:44:48 +0000286 'File name in directory "%s" and header "%s" differ.' % (
287 data.filename, fname)
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000288
289 def namelist(self):
Fred Drake484d7352000-10-02 21:14:52 +0000290 """Return a list of file names in the archive."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000291 l = []
292 for data in self.filelist:
293 l.append(data.filename)
294 return l
295
296 def infolist(self):
Fred Drake484d7352000-10-02 21:14:52 +0000297 """Return a list of class ZipInfo instances for files in the
298 archive."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000299 return self.filelist
300
301 def printdir(self):
Fred Drake484d7352000-10-02 21:14:52 +0000302 """Print a table of contents for the zip file."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000303 print "%-46s %19s %12s" % ("File Name", "Modified ", "Size")
304 for zinfo in self.filelist:
305 date = "%d-%02d-%02d %02d:%02d:%02d" % zinfo.date_time
306 print "%-46s %s %12d" % (zinfo.filename, date, zinfo.file_size)
307
308 def testzip(self):
Fred Drake484d7352000-10-02 21:14:52 +0000309 """Read all the files and check the CRC."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000310 for zinfo in self.filelist:
311 try:
Tim Peterse1190062001-01-15 03:34:38 +0000312 self.read(zinfo.filename) # Check CRC-32
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000313 except:
314 return zinfo.filename
315
316 def getinfo(self, name):
Fred Drake484d7352000-10-02 21:14:52 +0000317 """Return the instance of ZipInfo given 'name'."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000318 return self.NameToInfo[name]
319
320 def read(self, name):
Fred Drake484d7352000-10-02 21:14:52 +0000321 """Return file bytes (as a string) for name."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000322 if self.mode not in ("r", "a"):
323 raise RuntimeError, 'read() requires mode "r" or "a"'
324 if not self.fp:
325 raise RuntimeError, \
Fred Drake5db246d2000-09-29 20:44:48 +0000326 "Attempt to read ZIP archive that was already closed"
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000327 zinfo = self.getinfo(name)
328 filepos = self.fp.tell()
329 self.fp.seek(zinfo.file_offset, 0)
330 bytes = self.fp.read(zinfo.compress_size)
331 self.fp.seek(filepos, 0)
332 if zinfo.compress_type == ZIP_STORED:
333 pass
334 elif zinfo.compress_type == ZIP_DEFLATED:
335 if not zlib:
336 raise RuntimeError, \
Fred Drake5db246d2000-09-29 20:44:48 +0000337 "De-compression requires the (missing) zlib module"
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000338 # zlib compress/decompress code by Jeremy Hylton of CNRI
339 dc = zlib.decompressobj(-15)
340 bytes = dc.decompress(bytes)
341 # need to feed in unused pad byte so that zlib won't choke
342 ex = dc.decompress('Z') + dc.flush()
343 if ex:
344 bytes = bytes + ex
345 else:
346 raise BadZipfile, \
Fred Drake5db246d2000-09-29 20:44:48 +0000347 "Unsupported compression method %d for file %s" % \
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000348 (zinfo.compress_type, name)
349 crc = binascii.crc32(bytes)
350 if crc != zinfo.CRC:
351 raise BadZipfile, "Bad CRC-32 for file %s" % name
352 return bytes
353
354 def _writecheck(self, zinfo):
Fred Drake484d7352000-10-02 21:14:52 +0000355 """Check for errors before writing a file to the archive."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000356 if self.NameToInfo.has_key(zinfo.filename):
Tim Peterse1190062001-01-15 03:34:38 +0000357 if self.debug: # Warning for duplicate names
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000358 print "Duplicate name:", zinfo.filename
359 if self.mode not in ("w", "a"):
360 raise RuntimeError, 'write() requires mode "w" or "a"'
361 if not self.fp:
362 raise RuntimeError, \
Fred Drake5db246d2000-09-29 20:44:48 +0000363 "Attempt to write ZIP archive that was already closed"
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000364 if zinfo.compress_type == ZIP_DEFLATED and not zlib:
365 raise RuntimeError, \
Fred Drake5db246d2000-09-29 20:44:48 +0000366 "Compression requires the (missing) zlib module"
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000367 if zinfo.compress_type not in (ZIP_STORED, ZIP_DEFLATED):
368 raise RuntimeError, \
Fred Drake5db246d2000-09-29 20:44:48 +0000369 "That compression method is not supported"
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000370
371 def write(self, filename, arcname=None, compress_type=None):
Fred Drake484d7352000-10-02 21:14:52 +0000372 """Put the bytes from filename into the archive under the name
373 arcname."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000374 st = os.stat(filename)
375 mtime = time.localtime(st[8])
376 date_time = mtime[0:6]
377 # Create ZipInfo instance to store file information
378 if arcname is None:
Tim Peterse1190062001-01-15 03:34:38 +0000379 zinfo = ZipInfo(filename, date_time)
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000380 else:
Tim Peterse1190062001-01-15 03:34:38 +0000381 zinfo = ZipInfo(arcname, date_time)
382 zinfo.external_attr = st[0] << 16 # Unix attributes
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000383 if compress_type is None:
Tim Peterse1190062001-01-15 03:34:38 +0000384 zinfo.compress_type = self.compression
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000385 else:
Tim Peterse1190062001-01-15 03:34:38 +0000386 zinfo.compress_type = compress_type
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000387 self._writecheck(zinfo)
388 fp = open(filename, "rb")
Finn Bock03a3bb82001-09-05 18:40:33 +0000389 zinfo.flag_bits = 0x00
Tim Peterse1190062001-01-15 03:34:38 +0000390 zinfo.header_offset = self.fp.tell() # Start of header bytes
Finn Bock03a3bb82001-09-05 18:40:33 +0000391 # Must overwrite CRC and sizes with correct data later
392 zinfo.CRC = CRC = 0
393 zinfo.compress_size = compress_size = 0
394 zinfo.file_size = file_size = 0
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000395 self.fp.write(zinfo.FileHeader())
Tim Peterse1190062001-01-15 03:34:38 +0000396 zinfo.file_offset = self.fp.tell() # Start of file bytes
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000397 if zinfo.compress_type == ZIP_DEFLATED:
398 cmpr = zlib.compressobj(zlib.Z_DEFAULT_COMPRESSION,
399 zlib.DEFLATED, -15)
400 else:
401 cmpr = None
402 while 1:
403 buf = fp.read(1024 * 8)
404 if not buf:
405 break
406 file_size = file_size + len(buf)
407 CRC = binascii.crc32(buf, CRC)
408 if cmpr:
409 buf = cmpr.compress(buf)
410 compress_size = compress_size + len(buf)
411 self.fp.write(buf)
412 fp.close()
413 if cmpr:
414 buf = cmpr.flush()
415 compress_size = compress_size + len(buf)
416 self.fp.write(buf)
417 zinfo.compress_size = compress_size
418 else:
419 zinfo.compress_size = file_size
420 zinfo.CRC = CRC
421 zinfo.file_size = file_size
Finn Bock03a3bb82001-09-05 18:40:33 +0000422 # Seek backwards and write CRC and file sizes
Tim Petersb64bec32001-09-18 02:26:39 +0000423 position = self.fp.tell() # Preserve current position in file
Finn Bock03a3bb82001-09-05 18:40:33 +0000424 self.fp.seek(zinfo.header_offset + 14, 0)
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000425 self.fp.write(struct.pack("<lll", zinfo.CRC, zinfo.compress_size,
426 zinfo.file_size))
Finn Bock03a3bb82001-09-05 18:40:33 +0000427 self.fp.seek(position, 0)
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000428 self.filelist.append(zinfo)
429 self.NameToInfo[zinfo.filename] = zinfo
430
431 def writestr(self, zinfo, bytes):
Fred Drake484d7352000-10-02 21:14:52 +0000432 """Write a file into the archive. The contents is the string
433 'bytes'."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000434 self._writecheck(zinfo)
Tim Peterse1190062001-01-15 03:34:38 +0000435 zinfo.file_size = len(bytes) # Uncompressed size
436 zinfo.CRC = binascii.crc32(bytes) # CRC-32 checksum
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000437 if zinfo.compress_type == ZIP_DEFLATED:
438 co = zlib.compressobj(zlib.Z_DEFAULT_COMPRESSION,
439 zlib.DEFLATED, -15)
440 bytes = co.compress(bytes) + co.flush()
Tim Peterse1190062001-01-15 03:34:38 +0000441 zinfo.compress_size = len(bytes) # Compressed size
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000442 else:
443 zinfo.compress_size = zinfo.file_size
Tim Peterse1190062001-01-15 03:34:38 +0000444 zinfo.header_offset = self.fp.tell() # Start of header bytes
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000445 self.fp.write(zinfo.FileHeader())
Tim Peterse1190062001-01-15 03:34:38 +0000446 zinfo.file_offset = self.fp.tell() # Start of file bytes
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000447 self.fp.write(bytes)
448 if zinfo.flag_bits & 0x08:
Tim Peterse1190062001-01-15 03:34:38 +0000449 # Write CRC and file sizes after the file data
450 self.fp.write(struct.pack("<lll", zinfo.CRC, zinfo.compress_size,
451 zinfo.file_size))
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000452 self.filelist.append(zinfo)
453 self.NameToInfo[zinfo.filename] = zinfo
454
455 def __del__(self):
Fred Drake484d7352000-10-02 21:14:52 +0000456 """Call the "close()" method in case the user forgot."""
Tim Petersd15f8bb2001-11-28 23:16:40 +0000457 self.close()
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000458
459 def close(self):
Fred Drake484d7352000-10-02 21:14:52 +0000460 """Close the file, and for mode "w" and "a" write the ending
461 records."""
Tim Petersd15f8bb2001-11-28 23:16:40 +0000462 if self.fp is None:
463 return
Tim Peterse1190062001-01-15 03:34:38 +0000464 if self.mode in ("w", "a"): # write ending records
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000465 count = 0
466 pos1 = self.fp.tell()
Tim Peterse1190062001-01-15 03:34:38 +0000467 for zinfo in self.filelist: # write central directory
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000468 count = count + 1
469 dt = zinfo.date_time
470 dosdate = (dt[0] - 1980) << 9 | dt[1] << 5 | dt[2]
Tim Peters3caca232001-12-06 06:23:26 +0000471 dostime = dt[3] << 11 | dt[4] << 5 | (dt[5] // 2)
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000472 centdir = struct.pack(structCentralDir,
473 stringCentralDir, zinfo.create_version,
474 zinfo.create_system, zinfo.extract_version, zinfo.reserved,
475 zinfo.flag_bits, zinfo.compress_type, dostime, dosdate,
476 zinfo.CRC, zinfo.compress_size, zinfo.file_size,
477 len(zinfo.filename), len(zinfo.extra), len(zinfo.comment),
478 0, zinfo.internal_attr, zinfo.external_attr,
479 zinfo.header_offset)
480 self.fp.write(centdir)
481 self.fp.write(zinfo.filename)
482 self.fp.write(zinfo.extra)
483 self.fp.write(zinfo.comment)
484 pos2 = self.fp.tell()
485 # Write end-of-zip-archive record
486 endrec = struct.pack(structEndArchive, stringEndArchive,
487 0, 0, count, count, pos2 - pos1, pos1, 0)
488 self.fp.write(endrec)
Guido van Rossumf85af612001-04-14 16:45:14 +0000489 self.fp.flush()
Fred Drake3d9091e2001-03-26 15:49:24 +0000490 if not self._filePassed:
491 self.fp.close()
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000492 self.fp = None
493
494
495class PyZipFile(ZipFile):
Fred Drake484d7352000-10-02 21:14:52 +0000496 """Class to create ZIP archives with Python library files and packages."""
497
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000498 def writepy(self, pathname, basename = ""):
499 """Add all files from "pathname" to the ZIP archive.
500
Fred Drake484d7352000-10-02 21:14:52 +0000501 If pathname is a package directory, search the directory and
502 all package subdirectories recursively for all *.py and enter
503 the modules into the archive. If pathname is a plain
504 directory, listdir *.py and enter all modules. Else, pathname
505 must be a Python *.py file and the module will be put into the
506 archive. Added modules are always module.pyo or module.pyc.
507 This method will compile the module.py into module.pyc if
508 necessary.
509 """
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000510 dir, name = os.path.split(pathname)
511 if os.path.isdir(pathname):
512 initname = os.path.join(pathname, "__init__.py")
513 if os.path.isfile(initname):
514 # This is a package directory, add it
515 if basename:
516 basename = "%s/%s" % (basename, name)
517 else:
518 basename = name
519 if self.debug:
520 print "Adding package in", pathname, "as", basename
521 fname, arcname = self._get_codename(initname[0:-3], basename)
522 if self.debug:
523 print "Adding", arcname
524 self.write(fname, arcname)
525 dirlist = os.listdir(pathname)
526 dirlist.remove("__init__.py")
527 # Add all *.py files and package subdirectories
528 for filename in dirlist:
529 path = os.path.join(pathname, filename)
530 root, ext = os.path.splitext(filename)
531 if os.path.isdir(path):
532 if os.path.isfile(os.path.join(path, "__init__.py")):
533 # This is a package directory, add it
534 self.writepy(path, basename) # Recursive call
535 elif ext == ".py":
536 fname, arcname = self._get_codename(path[0:-3],
537 basename)
538 if self.debug:
539 print "Adding", arcname
540 self.write(fname, arcname)
541 else:
542 # This is NOT a package directory, add its files at top level
543 if self.debug:
544 print "Adding files from directory", pathname
545 for filename in os.listdir(pathname):
546 path = os.path.join(pathname, filename)
547 root, ext = os.path.splitext(filename)
548 if ext == ".py":
549 fname, arcname = self._get_codename(path[0:-3],
550 basename)
551 if self.debug:
552 print "Adding", arcname
553 self.write(fname, arcname)
554 else:
555 if pathname[-3:] != ".py":
556 raise RuntimeError, \
Fred Drake5db246d2000-09-29 20:44:48 +0000557 'Files added with writepy() must end with ".py"'
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000558 fname, arcname = self._get_codename(pathname[0:-3], basename)
559 if self.debug:
560 print "Adding file", arcname
561 self.write(fname, arcname)
562
563 def _get_codename(self, pathname, basename):
564 """Return (filename, archivename) for the path.
565
Fred Drake484d7352000-10-02 21:14:52 +0000566 Given a module name path, return the correct file path and
567 archive name, compiling if necessary. For example, given
568 /python/lib/string, return (/python/lib/string.pyc, string).
569 """
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000570 file_py = pathname + ".py"
571 file_pyc = pathname + ".pyc"
572 file_pyo = pathname + ".pyo"
573 if os.path.isfile(file_pyo) and \
574 os.stat(file_pyo)[8] >= os.stat(file_py)[8]:
Tim Peterse1190062001-01-15 03:34:38 +0000575 fname = file_pyo # Use .pyo file
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000576 elif not os.path.isfile(file_pyc) or \
Fred Drake484d7352000-10-02 21:14:52 +0000577 os.stat(file_pyc)[8] < os.stat(file_py)[8]:
578 import py_compile
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000579 if self.debug:
580 print "Compiling", file_py
581 py_compile.compile(file_py, file_pyc)
582 fname = file_pyc
583 else:
584 fname = file_pyc
585 archivename = os.path.split(fname)[1]
586 if basename:
587 archivename = "%s/%s" % (basename, archivename)
588 return (fname, archivename)