blob: 93436cf8b350f3138fbe67f1fbde8a853c16c0ae [file] [log] [blame]
Fred Drake484d7352000-10-02 21:14:52 +00001"Read and write ZIP files."
Guido van Rossum32abe6f2000-03-31 17:30:02 +00002
3import struct, os, time
Fred Drake484d7352000-10-02 21:14:52 +00004import binascii
Guido van Rossum32abe6f2000-03-31 17:30:02 +00005
6try:
Tim Peterse1190062001-01-15 03:34:38 +00007 import zlib # We may need its compression method
Guido van Rossum9c673f32001-04-10 15:37:12 +00008except ImportError:
Guido van Rossum32abe6f2000-03-31 17:30:02 +00009 zlib = None
10
Skip Montanaro40fc1602001-03-01 04:27:19 +000011__all__ = ["BadZipfile", "error", "ZIP_STORED", "ZIP_DEFLATED", "is_zipfile",
12 "ZipInfo", "ZipFile", "PyZipFile"]
13
Fred Drake5db246d2000-09-29 20:44:48 +000014class BadZipfile(Exception):
Guido van Rossum32abe6f2000-03-31 17:30:02 +000015 pass
Tim Peterse1190062001-01-15 03:34:38 +000016error = BadZipfile # The exception raised by this module
Guido van Rossum32abe6f2000-03-31 17:30:02 +000017
18# constants for Zip file compression methods
19ZIP_STORED = 0
20ZIP_DEFLATED = 8
21# Other ZIP compression methods not supported
22
23# Here are some struct module formats for reading headers
24structEndArchive = "<4s4H2lH" # 9 items, end of archive, 22 bytes
25stringEndArchive = "PK\005\006" # magic number for end of archive record
Brett Cannonff450f72004-07-10 19:09:20 +000026structCentralDir = "<4s4B4HlLL5HLl"# 19 items, central directory, 46 bytes
Guido van Rossum32abe6f2000-03-31 17:30:02 +000027stringCentralDir = "PK\001\002" # magic number for central directory
Brett Cannonff450f72004-07-10 19:09:20 +000028structFileHeader = "<4s2B4HlLL2H" # 12 items, file header record, 30 bytes
Guido van Rossum32abe6f2000-03-31 17:30:02 +000029stringFileHeader = "PK\003\004" # magic number for file header
30
Fred Drake3e038e52001-02-28 17:56:26 +000031# indexes of entries in the central directory structure
32_CD_SIGNATURE = 0
33_CD_CREATE_VERSION = 1
34_CD_CREATE_SYSTEM = 2
35_CD_EXTRACT_VERSION = 3
36_CD_EXTRACT_SYSTEM = 4 # is this meaningful?
37_CD_FLAG_BITS = 5
38_CD_COMPRESS_TYPE = 6
39_CD_TIME = 7
40_CD_DATE = 8
41_CD_CRC = 9
42_CD_COMPRESSED_SIZE = 10
43_CD_UNCOMPRESSED_SIZE = 11
44_CD_FILENAME_LENGTH = 12
45_CD_EXTRA_FIELD_LENGTH = 13
46_CD_COMMENT_LENGTH = 14
47_CD_DISK_NUMBER_START = 15
48_CD_INTERNAL_FILE_ATTRIBUTES = 16
49_CD_EXTERNAL_FILE_ATTRIBUTES = 17
50_CD_LOCAL_HEADER_OFFSET = 18
51
52# indexes of entries in the local file header structure
53_FH_SIGNATURE = 0
54_FH_EXTRACT_VERSION = 1
55_FH_EXTRACT_SYSTEM = 2 # is this meaningful?
56_FH_GENERAL_PURPOSE_FLAG_BITS = 3
57_FH_COMPRESSION_METHOD = 4
58_FH_LAST_MOD_TIME = 5
59_FH_LAST_MOD_DATE = 6
60_FH_CRC = 7
61_FH_COMPRESSED_SIZE = 8
62_FH_UNCOMPRESSED_SIZE = 9
63_FH_FILENAME_LENGTH = 10
64_FH_EXTRA_FIELD_LENGTH = 11
65
Guido van Rossum32abe6f2000-03-31 17:30:02 +000066def is_zipfile(filename):
Martin v. Löwis6f6873b2002-10-13 13:54:50 +000067 """Quickly see if file is a ZIP file by checking the magic number."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +000068 try:
69 fpin = open(filename, "rb")
Martin v. Löwis6f6873b2002-10-13 13:54:50 +000070 endrec = _EndRecData(fpin)
Guido van Rossum32abe6f2000-03-31 17:30:02 +000071 fpin.close()
Martin v. Löwis6f6873b2002-10-13 13:54:50 +000072 if endrec:
Guido van Rossum8ca162f2002-04-07 06:36:23 +000073 return True # file has correct magic number
Fred Drake7e473802001-05-11 19:52:57 +000074 except IOError:
Guido van Rossum32abe6f2000-03-31 17:30:02 +000075 pass
Guido van Rossum8ca162f2002-04-07 06:36:23 +000076 return False
Guido van Rossum32abe6f2000-03-31 17:30:02 +000077
Martin v. Löwis6f6873b2002-10-13 13:54:50 +000078def _EndRecData(fpin):
79 """Return data from the "End of Central Directory" record, or None.
80
81 The data is a list of the nine items in the ZIP "End of central dir"
82 record followed by a tenth item, the file seek offset of this record."""
83 fpin.seek(-22, 2) # Assume no archive comment.
84 filesize = fpin.tell() + 22 # Get file size
85 data = fpin.read()
86 if data[0:4] == stringEndArchive and data[-2:] == "\000\000":
87 endrec = struct.unpack(structEndArchive, data)
88 endrec = list(endrec)
89 endrec.append("") # Append the archive comment
90 endrec.append(filesize - 22) # Append the record start offset
91 return endrec
92 # Search the last END_BLOCK bytes of the file for the record signature.
93 # The comment is appended to the ZIP file and has a 16 bit length.
94 # So the comment may be up to 64K long. We limit the search for the
95 # signature to a few Kbytes at the end of the file for efficiency.
96 # also, the signature must not appear in the comment.
97 END_BLOCK = min(filesize, 1024 * 4)
98 fpin.seek(filesize - END_BLOCK, 0)
99 data = fpin.read()
100 start = data.rfind(stringEndArchive)
101 if start >= 0: # Correct signature string was found
102 endrec = struct.unpack(structEndArchive, data[start:start+22])
103 endrec = list(endrec)
104 comment = data[start+22:]
105 if endrec[7] == len(comment): # Comment length checks out
106 # Append the archive comment and start offset
107 endrec.append(comment)
108 endrec.append(filesize - END_BLOCK + start)
109 return endrec
110 return # Error, return None
111
Fred Drake484d7352000-10-02 21:14:52 +0000112
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000113class ZipInfo:
Fred Drake484d7352000-10-02 21:14:52 +0000114 """Class with attributes describing each file in the ZIP archive."""
115
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000116 def __init__(self, filename="NoName", date_time=(1980,1,1,0,0,0)):
Greg Ward8e36d282003-06-18 00:53:06 +0000117 self.orig_filename = filename # Original file name in archive
118# Terminate the file name at the first null byte. Null bytes in file
119# names are used as tricks by viruses in archives.
120 null_byte = filename.find(chr(0))
121 if null_byte >= 0:
122 filename = filename[0:null_byte]
Greg Ward8e36d282003-06-18 00:53:06 +0000123# This is used to ensure paths in generated ZIP files always use
124# forward slashes as the directory separator, as required by the
125# ZIP format specification.
126 if os.sep != "/":
127 filename = filename.replace(os.sep, "/")
128 self.filename = filename # Normalized file name
Tim Peterse1190062001-01-15 03:34:38 +0000129 self.date_time = date_time # year, month, day, hour, min, sec
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000130 # Standard values:
Tim Peterse1190062001-01-15 03:34:38 +0000131 self.compress_type = ZIP_STORED # Type of compression for the file
132 self.comment = "" # Comment for each file
133 self.extra = "" # ZIP extra data
134 self.create_system = 0 # System which created ZIP archive
135 self.create_version = 20 # Version which created ZIP archive
136 self.extract_version = 20 # Version needed to extract archive
137 self.reserved = 0 # Must be zero
138 self.flag_bits = 0 # ZIP flag bits
139 self.volume = 0 # Volume number of file header
140 self.internal_attr = 0 # Internal attributes
141 self.external_attr = 0 # External file attributes
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000142 # Other attributes are set by class ZipFile:
Tim Peterse1190062001-01-15 03:34:38 +0000143 # header_offset Byte offset to the file header
144 # file_offset Byte offset to the start of the file data
145 # CRC CRC-32 of the uncompressed file
146 # compress_size Size of the compressed file
147 # file_size Size of the uncompressed file
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000148
149 def FileHeader(self):
Fred Drake484d7352000-10-02 21:14:52 +0000150 """Return the per-file header as a string."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000151 dt = self.date_time
152 dosdate = (dt[0] - 1980) << 9 | dt[1] << 5 | dt[2]
Tim Peters3caca232001-12-06 06:23:26 +0000153 dostime = dt[3] << 11 | dt[4] << 5 | (dt[5] // 2)
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000154 if self.flag_bits & 0x08:
Tim Peterse1190062001-01-15 03:34:38 +0000155 # Set these to zero because we write them after the file data
156 CRC = compress_size = file_size = 0
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000157 else:
Tim Peterse1190062001-01-15 03:34:38 +0000158 CRC = self.CRC
159 compress_size = self.compress_size
160 file_size = self.file_size
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000161 header = struct.pack(structFileHeader, stringFileHeader,
162 self.extract_version, self.reserved, self.flag_bits,
163 self.compress_type, dostime, dosdate, CRC,
164 compress_size, file_size,
165 len(self.filename), len(self.extra))
166 return header + self.filename + self.extra
167
168
169class ZipFile:
Tim Petersa19a1682001-03-29 04:36:09 +0000170 """ Class with methods to open, read, write, close, list zip files.
171
Fred Drake3d9091e2001-03-26 15:49:24 +0000172 z = ZipFile(file, mode="r", compression=ZIP_STORED)
Tim Petersa19a1682001-03-29 04:36:09 +0000173
Fred Drake3d9091e2001-03-26 15:49:24 +0000174 file: Either the path to the file, or a file-like object.
175 If it is a path, the file will be opened and closed by ZipFile.
176 mode: The mode can be either read "r", write "w" or append "a".
177 compression: ZIP_STORED (no compression) or ZIP_DEFLATED (requires zlib).
178 """
Fred Drake484d7352000-10-02 21:14:52 +0000179
Fred Drake90eac282001-02-28 05:29:34 +0000180 fp = None # Set here since __del__ checks it
181
Fred Drake3d9091e2001-03-26 15:49:24 +0000182 def __init__(self, file, mode="r", compression=ZIP_STORED):
Fred Drake484d7352000-10-02 21:14:52 +0000183 """Open the ZIP file with mode read "r", write "w" or append "a"."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000184 if compression == ZIP_STORED:
185 pass
186 elif compression == ZIP_DEFLATED:
187 if not zlib:
188 raise RuntimeError,\
Fred Drake5db246d2000-09-29 20:44:48 +0000189 "Compression requires the (missing) zlib module"
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000190 else:
191 raise RuntimeError, "That compression method is not supported"
Tim Peterse1190062001-01-15 03:34:38 +0000192 self.debug = 0 # Level of printing: 0 through 3
193 self.NameToInfo = {} # Find file info given name
194 self.filelist = [] # List of ZipInfo instances for archive
195 self.compression = compression # Method of compression
Raymond Hettinger2ca7c192005-02-16 09:27:49 +0000196 self.mode = key = mode.replace('b', '')[0]
Tim Petersa19a1682001-03-29 04:36:09 +0000197
Fred Drake3d9091e2001-03-26 15:49:24 +0000198 # Check if we were passed a file-like object
Walter Dörwald65230a22002-06-03 15:58:32 +0000199 if isinstance(file, basestring):
Fred Drake3d9091e2001-03-26 15:49:24 +0000200 self._filePassed = 0
201 self.filename = file
202 modeDict = {'r' : 'rb', 'w': 'wb', 'a' : 'r+b'}
203 self.fp = open(file, modeDict[mode])
204 else:
205 self._filePassed = 1
206 self.fp = file
207 self.filename = getattr(file, 'name', None)
Tim Petersa19a1682001-03-29 04:36:09 +0000208
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000209 if key == 'r':
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000210 self._GetContents()
211 elif key == 'w':
Fred Drake3d9091e2001-03-26 15:49:24 +0000212 pass
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000213 elif key == 'a':
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000214 try: # See if file is a zip file
215 self._RealGetContents()
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000216 # seek to start of directory and overwrite
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000217 self.fp.seek(self.start_dir, 0)
218 except BadZipfile: # file is not a zip file, just append
219 self.fp.seek(0, 2)
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000220 else:
Tim Peters7d3bad62001-04-04 18:56:49 +0000221 if not self._filePassed:
222 self.fp.close()
223 self.fp = None
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000224 raise RuntimeError, 'Mode must be "r", "w" or "a"'
225
226 def _GetContents(self):
Tim Peters7d3bad62001-04-04 18:56:49 +0000227 """Read the directory, making sure we close the file if the format
228 is bad."""
229 try:
230 self._RealGetContents()
231 except BadZipfile:
232 if not self._filePassed:
233 self.fp.close()
234 self.fp = None
235 raise
236
237 def _RealGetContents(self):
Fred Drake484d7352000-10-02 21:14:52 +0000238 """Read in the table of contents for the ZIP file."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000239 fp = self.fp
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000240 endrec = _EndRecData(fp)
241 if not endrec:
242 raise BadZipfile, "File is not a zip file"
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000243 if self.debug > 1:
244 print endrec
Tim Peterse1190062001-01-15 03:34:38 +0000245 size_cd = endrec[5] # bytes in central directory
246 offset_cd = endrec[6] # offset of central directory
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000247 self.comment = endrec[8] # archive comment
248 # endrec[9] is the offset of the "End of Central Dir" record
249 x = endrec[9] - size_cd
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000250 # "concat" is zero, unless zip was concatenated to another file
251 concat = x - offset_cd
252 if self.debug > 2:
253 print "given, inferred, offset", offset_cd, x, concat
254 # self.start_dir: Position of start of central directory
255 self.start_dir = offset_cd + concat
256 fp.seek(self.start_dir, 0)
257 total = 0
258 while total < size_cd:
259 centdir = fp.read(46)
260 total = total + 46
261 if centdir[0:4] != stringCentralDir:
262 raise BadZipfile, "Bad magic number for central directory"
263 centdir = struct.unpack(structCentralDir, centdir)
264 if self.debug > 2:
265 print centdir
Fred Drake3e038e52001-02-28 17:56:26 +0000266 filename = fp.read(centdir[_CD_FILENAME_LENGTH])
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000267 # Create ZipInfo instance to store file information
268 x = ZipInfo(filename)
Fred Drake3e038e52001-02-28 17:56:26 +0000269 x.extra = fp.read(centdir[_CD_EXTRA_FIELD_LENGTH])
270 x.comment = fp.read(centdir[_CD_COMMENT_LENGTH])
271 total = (total + centdir[_CD_FILENAME_LENGTH]
272 + centdir[_CD_EXTRA_FIELD_LENGTH]
273 + centdir[_CD_COMMENT_LENGTH])
274 x.header_offset = centdir[_CD_LOCAL_HEADER_OFFSET] + concat
275 # file_offset must be computed below...
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000276 (x.create_version, x.create_system, x.extract_version, x.reserved,
277 x.flag_bits, x.compress_type, t, d,
278 x.CRC, x.compress_size, x.file_size) = centdir[1:12]
279 x.volume, x.internal_attr, x.external_attr = centdir[15:18]
280 # Convert date/time code to (year, month, day, hour, min, sec)
281 x.date_time = ( (d>>9)+1980, (d>>5)&0xF, d&0x1F,
Fred Drake414ca662000-06-13 18:49:53 +0000282 t>>11, (t>>5)&0x3F, (t&0x1F) * 2 )
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000283 self.filelist.append(x)
284 self.NameToInfo[x.filename] = x
285 if self.debug > 2:
286 print "total", total
287 for data in self.filelist:
288 fp.seek(data.header_offset, 0)
289 fheader = fp.read(30)
290 if fheader[0:4] != stringFileHeader:
291 raise BadZipfile, "Bad magic number for file header"
292 fheader = struct.unpack(structFileHeader, fheader)
Fred Drake3e038e52001-02-28 17:56:26 +0000293 # file_offset is computed here, since the extra field for
294 # the central directory and for the local file header
295 # refer to different fields, and they can have different
296 # lengths
297 data.file_offset = (data.header_offset + 30
298 + fheader[_FH_FILENAME_LENGTH]
299 + fheader[_FH_EXTRA_FIELD_LENGTH])
300 fname = fp.read(fheader[_FH_FILENAME_LENGTH])
Greg Ward8e36d282003-06-18 00:53:06 +0000301 if fname != data.orig_filename:
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000302 raise RuntimeError, \
Fred Drake5db246d2000-09-29 20:44:48 +0000303 'File name in directory "%s" and header "%s" differ.' % (
Greg Ward8e36d282003-06-18 00:53:06 +0000304 data.orig_filename, fname)
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000305
306 def namelist(self):
Fred Drake484d7352000-10-02 21:14:52 +0000307 """Return a list of file names in the archive."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000308 l = []
309 for data in self.filelist:
310 l.append(data.filename)
311 return l
312
313 def infolist(self):
Fred Drake484d7352000-10-02 21:14:52 +0000314 """Return a list of class ZipInfo instances for files in the
315 archive."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000316 return self.filelist
317
318 def printdir(self):
Fred Drake484d7352000-10-02 21:14:52 +0000319 """Print a table of contents for the zip file."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000320 print "%-46s %19s %12s" % ("File Name", "Modified ", "Size")
321 for zinfo in self.filelist:
322 date = "%d-%02d-%02d %02d:%02d:%02d" % zinfo.date_time
323 print "%-46s %s %12d" % (zinfo.filename, date, zinfo.file_size)
324
325 def testzip(self):
Fred Drake484d7352000-10-02 21:14:52 +0000326 """Read all the files and check the CRC."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000327 for zinfo in self.filelist:
328 try:
Tim Peterse1190062001-01-15 03:34:38 +0000329 self.read(zinfo.filename) # Check CRC-32
Raymond Hettingerc0fac962003-06-27 22:25:03 +0000330 except BadZipfile:
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000331 return zinfo.filename
332
333 def getinfo(self, name):
Fred Drake484d7352000-10-02 21:14:52 +0000334 """Return the instance of ZipInfo given 'name'."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000335 return self.NameToInfo[name]
336
337 def read(self, name):
Fred Drake484d7352000-10-02 21:14:52 +0000338 """Return file bytes (as a string) for name."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000339 if self.mode not in ("r", "a"):
340 raise RuntimeError, 'read() requires mode "r" or "a"'
341 if not self.fp:
342 raise RuntimeError, \
Fred Drake5db246d2000-09-29 20:44:48 +0000343 "Attempt to read ZIP archive that was already closed"
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000344 zinfo = self.getinfo(name)
345 filepos = self.fp.tell()
346 self.fp.seek(zinfo.file_offset, 0)
347 bytes = self.fp.read(zinfo.compress_size)
348 self.fp.seek(filepos, 0)
349 if zinfo.compress_type == ZIP_STORED:
350 pass
351 elif zinfo.compress_type == ZIP_DEFLATED:
352 if not zlib:
353 raise RuntimeError, \
Fred Drake5db246d2000-09-29 20:44:48 +0000354 "De-compression requires the (missing) zlib module"
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000355 # zlib compress/decompress code by Jeremy Hylton of CNRI
356 dc = zlib.decompressobj(-15)
357 bytes = dc.decompress(bytes)
358 # need to feed in unused pad byte so that zlib won't choke
359 ex = dc.decompress('Z') + dc.flush()
360 if ex:
361 bytes = bytes + ex
362 else:
363 raise BadZipfile, \
Fred Drake5db246d2000-09-29 20:44:48 +0000364 "Unsupported compression method %d for file %s" % \
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000365 (zinfo.compress_type, name)
366 crc = binascii.crc32(bytes)
367 if crc != zinfo.CRC:
368 raise BadZipfile, "Bad CRC-32 for file %s" % name
369 return bytes
370
371 def _writecheck(self, zinfo):
Fred Drake484d7352000-10-02 21:14:52 +0000372 """Check for errors before writing a file to the archive."""
Raymond Hettinger54f02222002-06-01 14:18:47 +0000373 if zinfo.filename in self.NameToInfo:
Tim Peterse1190062001-01-15 03:34:38 +0000374 if self.debug: # Warning for duplicate names
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000375 print "Duplicate name:", zinfo.filename
376 if self.mode not in ("w", "a"):
377 raise RuntimeError, 'write() requires mode "w" or "a"'
378 if not self.fp:
379 raise RuntimeError, \
Fred Drake5db246d2000-09-29 20:44:48 +0000380 "Attempt to write ZIP archive that was already closed"
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000381 if zinfo.compress_type == ZIP_DEFLATED and not zlib:
382 raise RuntimeError, \
Fred Drake5db246d2000-09-29 20:44:48 +0000383 "Compression requires the (missing) zlib module"
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000384 if zinfo.compress_type not in (ZIP_STORED, ZIP_DEFLATED):
385 raise RuntimeError, \
Fred Drake5db246d2000-09-29 20:44:48 +0000386 "That compression method is not supported"
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000387
388 def write(self, filename, arcname=None, compress_type=None):
Fred Drake484d7352000-10-02 21:14:52 +0000389 """Put the bytes from filename into the archive under the name
390 arcname."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000391 st = os.stat(filename)
Raymond Hettinger32200ae2002-06-01 19:51:15 +0000392 mtime = time.localtime(st.st_mtime)
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000393 date_time = mtime[0:6]
394 # Create ZipInfo instance to store file information
395 if arcname is None:
Tim Peterse1190062001-01-15 03:34:38 +0000396 zinfo = ZipInfo(filename, date_time)
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000397 else:
Tim Peterse1190062001-01-15 03:34:38 +0000398 zinfo = ZipInfo(arcname, date_time)
Andrew M. Kuchling55430212004-07-10 15:40:29 +0000399 zinfo.external_attr = (st[0] & 0xFFFF) << 16L # Unix attributes
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000400 if compress_type is None:
Tim Peterse1190062001-01-15 03:34:38 +0000401 zinfo.compress_type = self.compression
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000402 else:
Tim Peterse1190062001-01-15 03:34:38 +0000403 zinfo.compress_type = compress_type
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000404 self._writecheck(zinfo)
405 fp = open(filename, "rb")
Finn Bock03a3bb82001-09-05 18:40:33 +0000406 zinfo.flag_bits = 0x00
Tim Peterse1190062001-01-15 03:34:38 +0000407 zinfo.header_offset = self.fp.tell() # Start of header bytes
Finn Bock03a3bb82001-09-05 18:40:33 +0000408 # Must overwrite CRC and sizes with correct data later
409 zinfo.CRC = CRC = 0
410 zinfo.compress_size = compress_size = 0
411 zinfo.file_size = file_size = 0
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000412 self.fp.write(zinfo.FileHeader())
Tim Peterse1190062001-01-15 03:34:38 +0000413 zinfo.file_offset = self.fp.tell() # Start of file bytes
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000414 if zinfo.compress_type == ZIP_DEFLATED:
415 cmpr = zlib.compressobj(zlib.Z_DEFAULT_COMPRESSION,
416 zlib.DEFLATED, -15)
417 else:
418 cmpr = None
419 while 1:
420 buf = fp.read(1024 * 8)
421 if not buf:
422 break
423 file_size = file_size + len(buf)
424 CRC = binascii.crc32(buf, CRC)
425 if cmpr:
426 buf = cmpr.compress(buf)
427 compress_size = compress_size + len(buf)
428 self.fp.write(buf)
429 fp.close()
430 if cmpr:
431 buf = cmpr.flush()
432 compress_size = compress_size + len(buf)
433 self.fp.write(buf)
434 zinfo.compress_size = compress_size
435 else:
436 zinfo.compress_size = file_size
437 zinfo.CRC = CRC
438 zinfo.file_size = file_size
Finn Bock03a3bb82001-09-05 18:40:33 +0000439 # Seek backwards and write CRC and file sizes
Tim Petersb64bec32001-09-18 02:26:39 +0000440 position = self.fp.tell() # Preserve current position in file
Finn Bock03a3bb82001-09-05 18:40:33 +0000441 self.fp.seek(zinfo.header_offset + 14, 0)
Brett Cannonff450f72004-07-10 19:09:20 +0000442 self.fp.write(struct.pack("<lLL", zinfo.CRC, zinfo.compress_size,
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000443 zinfo.file_size))
Finn Bock03a3bb82001-09-05 18:40:33 +0000444 self.fp.seek(position, 0)
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000445 self.filelist.append(zinfo)
446 self.NameToInfo[zinfo.filename] = zinfo
447
Just van Rossumb083cb32002-12-12 12:23:32 +0000448 def writestr(self, zinfo_or_arcname, bytes):
Fred Drake484d7352000-10-02 21:14:52 +0000449 """Write a file into the archive. The contents is the string
Just van Rossumb083cb32002-12-12 12:23:32 +0000450 'bytes'. 'zinfo_or_arcname' is either a ZipInfo instance or
451 the name of the file in the archive."""
452 if not isinstance(zinfo_or_arcname, ZipInfo):
453 zinfo = ZipInfo(filename=zinfo_or_arcname,
454 date_time=time.localtime(time.time()))
455 zinfo.compress_type = self.compression
456 else:
457 zinfo = zinfo_or_arcname
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000458 self._writecheck(zinfo)
Tim Peterse1190062001-01-15 03:34:38 +0000459 zinfo.file_size = len(bytes) # Uncompressed size
460 zinfo.CRC = binascii.crc32(bytes) # CRC-32 checksum
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000461 if zinfo.compress_type == ZIP_DEFLATED:
462 co = zlib.compressobj(zlib.Z_DEFAULT_COMPRESSION,
463 zlib.DEFLATED, -15)
464 bytes = co.compress(bytes) + co.flush()
Tim Peterse1190062001-01-15 03:34:38 +0000465 zinfo.compress_size = len(bytes) # Compressed size
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000466 else:
467 zinfo.compress_size = zinfo.file_size
Tim Peterse1190062001-01-15 03:34:38 +0000468 zinfo.header_offset = self.fp.tell() # Start of header bytes
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000469 self.fp.write(zinfo.FileHeader())
Tim Peterse1190062001-01-15 03:34:38 +0000470 zinfo.file_offset = self.fp.tell() # Start of file bytes
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000471 self.fp.write(bytes)
472 if zinfo.flag_bits & 0x08:
Tim Peterse1190062001-01-15 03:34:38 +0000473 # Write CRC and file sizes after the file data
Brett Cannonff450f72004-07-10 19:09:20 +0000474 self.fp.write(struct.pack("<lLL", zinfo.CRC, zinfo.compress_size,
Tim Peterse1190062001-01-15 03:34:38 +0000475 zinfo.file_size))
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000476 self.filelist.append(zinfo)
477 self.NameToInfo[zinfo.filename] = zinfo
478
479 def __del__(self):
Fred Drake484d7352000-10-02 21:14:52 +0000480 """Call the "close()" method in case the user forgot."""
Tim Petersd15f8bb2001-11-28 23:16:40 +0000481 self.close()
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000482
483 def close(self):
Fred Drake484d7352000-10-02 21:14:52 +0000484 """Close the file, and for mode "w" and "a" write the ending
485 records."""
Tim Petersd15f8bb2001-11-28 23:16:40 +0000486 if self.fp is None:
487 return
Tim Peterse1190062001-01-15 03:34:38 +0000488 if self.mode in ("w", "a"): # write ending records
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000489 count = 0
490 pos1 = self.fp.tell()
Tim Peterse1190062001-01-15 03:34:38 +0000491 for zinfo in self.filelist: # write central directory
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000492 count = count + 1
493 dt = zinfo.date_time
494 dosdate = (dt[0] - 1980) << 9 | dt[1] << 5 | dt[2]
Tim Peters3caca232001-12-06 06:23:26 +0000495 dostime = dt[3] << 11 | dt[4] << 5 | (dt[5] // 2)
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000496 centdir = struct.pack(structCentralDir,
497 stringCentralDir, zinfo.create_version,
498 zinfo.create_system, zinfo.extract_version, zinfo.reserved,
499 zinfo.flag_bits, zinfo.compress_type, dostime, dosdate,
500 zinfo.CRC, zinfo.compress_size, zinfo.file_size,
501 len(zinfo.filename), len(zinfo.extra), len(zinfo.comment),
502 0, zinfo.internal_attr, zinfo.external_attr,
503 zinfo.header_offset)
504 self.fp.write(centdir)
505 self.fp.write(zinfo.filename)
506 self.fp.write(zinfo.extra)
507 self.fp.write(zinfo.comment)
508 pos2 = self.fp.tell()
509 # Write end-of-zip-archive record
510 endrec = struct.pack(structEndArchive, stringEndArchive,
511 0, 0, count, count, pos2 - pos1, pos1, 0)
512 self.fp.write(endrec)
Guido van Rossumf85af612001-04-14 16:45:14 +0000513 self.fp.flush()
Fred Drake3d9091e2001-03-26 15:49:24 +0000514 if not self._filePassed:
515 self.fp.close()
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000516 self.fp = None
517
518
519class PyZipFile(ZipFile):
Fred Drake484d7352000-10-02 21:14:52 +0000520 """Class to create ZIP archives with Python library files and packages."""
521
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000522 def writepy(self, pathname, basename = ""):
523 """Add all files from "pathname" to the ZIP archive.
524
Fred Drake484d7352000-10-02 21:14:52 +0000525 If pathname is a package directory, search the directory and
526 all package subdirectories recursively for all *.py and enter
527 the modules into the archive. If pathname is a plain
528 directory, listdir *.py and enter all modules. Else, pathname
529 must be a Python *.py file and the module will be put into the
530 archive. Added modules are always module.pyo or module.pyc.
531 This method will compile the module.py into module.pyc if
532 necessary.
533 """
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000534 dir, name = os.path.split(pathname)
535 if os.path.isdir(pathname):
536 initname = os.path.join(pathname, "__init__.py")
537 if os.path.isfile(initname):
538 # This is a package directory, add it
539 if basename:
540 basename = "%s/%s" % (basename, name)
541 else:
542 basename = name
543 if self.debug:
544 print "Adding package in", pathname, "as", basename
545 fname, arcname = self._get_codename(initname[0:-3], basename)
546 if self.debug:
547 print "Adding", arcname
548 self.write(fname, arcname)
549 dirlist = os.listdir(pathname)
550 dirlist.remove("__init__.py")
551 # Add all *.py files and package subdirectories
552 for filename in dirlist:
553 path = os.path.join(pathname, filename)
554 root, ext = os.path.splitext(filename)
555 if os.path.isdir(path):
556 if os.path.isfile(os.path.join(path, "__init__.py")):
557 # This is a package directory, add it
558 self.writepy(path, basename) # Recursive call
559 elif ext == ".py":
560 fname, arcname = self._get_codename(path[0:-3],
561 basename)
562 if self.debug:
563 print "Adding", arcname
564 self.write(fname, arcname)
565 else:
566 # This is NOT a package directory, add its files at top level
567 if self.debug:
568 print "Adding files from directory", pathname
569 for filename in os.listdir(pathname):
570 path = os.path.join(pathname, filename)
571 root, ext = os.path.splitext(filename)
572 if ext == ".py":
573 fname, arcname = self._get_codename(path[0:-3],
574 basename)
575 if self.debug:
576 print "Adding", arcname
577 self.write(fname, arcname)
578 else:
579 if pathname[-3:] != ".py":
580 raise RuntimeError, \
Fred Drake5db246d2000-09-29 20:44:48 +0000581 'Files added with writepy() must end with ".py"'
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000582 fname, arcname = self._get_codename(pathname[0:-3], basename)
583 if self.debug:
584 print "Adding file", arcname
585 self.write(fname, arcname)
586
587 def _get_codename(self, pathname, basename):
588 """Return (filename, archivename) for the path.
589
Fred Drake484d7352000-10-02 21:14:52 +0000590 Given a module name path, return the correct file path and
591 archive name, compiling if necessary. For example, given
592 /python/lib/string, return (/python/lib/string.pyc, string).
593 """
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000594 file_py = pathname + ".py"
595 file_pyc = pathname + ".pyc"
596 file_pyo = pathname + ".pyo"
597 if os.path.isfile(file_pyo) and \
Raymond Hettinger32200ae2002-06-01 19:51:15 +0000598 os.stat(file_pyo).st_mtime >= os.stat(file_py).st_mtime:
Tim Peterse1190062001-01-15 03:34:38 +0000599 fname = file_pyo # Use .pyo file
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000600 elif not os.path.isfile(file_pyc) or \
Raymond Hettinger32200ae2002-06-01 19:51:15 +0000601 os.stat(file_pyc).st_mtime < os.stat(file_py).st_mtime:
Fred Drake484d7352000-10-02 21:14:52 +0000602 import py_compile
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000603 if self.debug:
604 print "Compiling", file_py
Martin v. Löwis0c6774d2003-01-15 11:51:06 +0000605 try:
606 py_compile.compile(file_py, file_pyc, None, True)
607 except py_compile.PyCompileError,err:
608 print err.msg
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000609 fname = file_pyc
610 else:
611 fname = file_pyc
612 archivename = os.path.split(fname)[1]
613 if basename:
614 archivename = "%s/%s" % (basename, archivename)
615 return (fname, archivename)