blob: 037843c86da4c5fb2ba21ced76f11ac3f0c4c7d3 [file] [log] [blame]
Fred Drake484d7352000-10-02 21:14:52 +00001"Read and write ZIP files."
Guido van Rossum32abe6f2000-03-31 17:30:02 +00002
Martin v. Löwis00756902006-02-05 17:09:41 +00003import struct, os, time, sys
Fred Drake484d7352000-10-02 21:14:52 +00004import binascii
Guido van Rossum32abe6f2000-03-31 17:30:02 +00005
6try:
Tim Peterse1190062001-01-15 03:34:38 +00007 import zlib # We may need its compression method
Guido van Rossum9c673f32001-04-10 15:37:12 +00008except ImportError:
Guido van Rossum32abe6f2000-03-31 17:30:02 +00009 zlib = None
10
Skip Montanaro40fc1602001-03-01 04:27:19 +000011__all__ = ["BadZipfile", "error", "ZIP_STORED", "ZIP_DEFLATED", "is_zipfile",
12 "ZipInfo", "ZipFile", "PyZipFile"]
13
Fred Drake5db246d2000-09-29 20:44:48 +000014class BadZipfile(Exception):
Guido van Rossum32abe6f2000-03-31 17:30:02 +000015 pass
Tim Peterse1190062001-01-15 03:34:38 +000016error = BadZipfile # The exception raised by this module
Guido van Rossum32abe6f2000-03-31 17:30:02 +000017
18# constants for Zip file compression methods
19ZIP_STORED = 0
20ZIP_DEFLATED = 8
21# Other ZIP compression methods not supported
22
23# Here are some struct module formats for reading headers
24structEndArchive = "<4s4H2lH" # 9 items, end of archive, 22 bytes
25stringEndArchive = "PK\005\006" # magic number for end of archive record
Brett Cannonff450f72004-07-10 19:09:20 +000026structCentralDir = "<4s4B4HlLL5HLl"# 19 items, central directory, 46 bytes
Guido van Rossum32abe6f2000-03-31 17:30:02 +000027stringCentralDir = "PK\001\002" # magic number for central directory
Brett Cannonff450f72004-07-10 19:09:20 +000028structFileHeader = "<4s2B4HlLL2H" # 12 items, file header record, 30 bytes
Guido van Rossum32abe6f2000-03-31 17:30:02 +000029stringFileHeader = "PK\003\004" # magic number for file header
30
Fred Drake3e038e52001-02-28 17:56:26 +000031# indexes of entries in the central directory structure
32_CD_SIGNATURE = 0
33_CD_CREATE_VERSION = 1
34_CD_CREATE_SYSTEM = 2
35_CD_EXTRACT_VERSION = 3
36_CD_EXTRACT_SYSTEM = 4 # is this meaningful?
37_CD_FLAG_BITS = 5
38_CD_COMPRESS_TYPE = 6
39_CD_TIME = 7
40_CD_DATE = 8
41_CD_CRC = 9
42_CD_COMPRESSED_SIZE = 10
43_CD_UNCOMPRESSED_SIZE = 11
44_CD_FILENAME_LENGTH = 12
45_CD_EXTRA_FIELD_LENGTH = 13
46_CD_COMMENT_LENGTH = 14
47_CD_DISK_NUMBER_START = 15
48_CD_INTERNAL_FILE_ATTRIBUTES = 16
49_CD_EXTERNAL_FILE_ATTRIBUTES = 17
50_CD_LOCAL_HEADER_OFFSET = 18
51
52# indexes of entries in the local file header structure
53_FH_SIGNATURE = 0
54_FH_EXTRACT_VERSION = 1
55_FH_EXTRACT_SYSTEM = 2 # is this meaningful?
56_FH_GENERAL_PURPOSE_FLAG_BITS = 3
57_FH_COMPRESSION_METHOD = 4
58_FH_LAST_MOD_TIME = 5
59_FH_LAST_MOD_DATE = 6
60_FH_CRC = 7
61_FH_COMPRESSED_SIZE = 8
62_FH_UNCOMPRESSED_SIZE = 9
63_FH_FILENAME_LENGTH = 10
64_FH_EXTRA_FIELD_LENGTH = 11
65
Guido van Rossum32abe6f2000-03-31 17:30:02 +000066def is_zipfile(filename):
Martin v. Löwis6f6873b2002-10-13 13:54:50 +000067 """Quickly see if file is a ZIP file by checking the magic number."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +000068 try:
69 fpin = open(filename, "rb")
Martin v. Löwis6f6873b2002-10-13 13:54:50 +000070 endrec = _EndRecData(fpin)
Guido van Rossum32abe6f2000-03-31 17:30:02 +000071 fpin.close()
Martin v. Löwis6f6873b2002-10-13 13:54:50 +000072 if endrec:
Guido van Rossum8ca162f2002-04-07 06:36:23 +000073 return True # file has correct magic number
Fred Drake7e473802001-05-11 19:52:57 +000074 except IOError:
Guido van Rossum32abe6f2000-03-31 17:30:02 +000075 pass
Guido van Rossum8ca162f2002-04-07 06:36:23 +000076 return False
Guido van Rossum32abe6f2000-03-31 17:30:02 +000077
Martin v. Löwis6f6873b2002-10-13 13:54:50 +000078def _EndRecData(fpin):
79 """Return data from the "End of Central Directory" record, or None.
80
81 The data is a list of the nine items in the ZIP "End of central dir"
82 record followed by a tenth item, the file seek offset of this record."""
83 fpin.seek(-22, 2) # Assume no archive comment.
84 filesize = fpin.tell() + 22 # Get file size
85 data = fpin.read()
86 if data[0:4] == stringEndArchive and data[-2:] == "\000\000":
87 endrec = struct.unpack(structEndArchive, data)
88 endrec = list(endrec)
89 endrec.append("") # Append the archive comment
90 endrec.append(filesize - 22) # Append the record start offset
91 return endrec
92 # Search the last END_BLOCK bytes of the file for the record signature.
93 # The comment is appended to the ZIP file and has a 16 bit length.
94 # So the comment may be up to 64K long. We limit the search for the
95 # signature to a few Kbytes at the end of the file for efficiency.
96 # also, the signature must not appear in the comment.
97 END_BLOCK = min(filesize, 1024 * 4)
98 fpin.seek(filesize - END_BLOCK, 0)
99 data = fpin.read()
100 start = data.rfind(stringEndArchive)
101 if start >= 0: # Correct signature string was found
102 endrec = struct.unpack(structEndArchive, data[start:start+22])
103 endrec = list(endrec)
104 comment = data[start+22:]
105 if endrec[7] == len(comment): # Comment length checks out
106 # Append the archive comment and start offset
107 endrec.append(comment)
108 endrec.append(filesize - END_BLOCK + start)
109 return endrec
110 return # Error, return None
111
Fred Drake484d7352000-10-02 21:14:52 +0000112
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000113class ZipInfo:
Fred Drake484d7352000-10-02 21:14:52 +0000114 """Class with attributes describing each file in the ZIP archive."""
115
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000116 def __init__(self, filename="NoName", date_time=(1980,1,1,0,0,0)):
Greg Ward8e36d282003-06-18 00:53:06 +0000117 self.orig_filename = filename # Original file name in archive
118# Terminate the file name at the first null byte. Null bytes in file
119# names are used as tricks by viruses in archives.
120 null_byte = filename.find(chr(0))
121 if null_byte >= 0:
122 filename = filename[0:null_byte]
Greg Ward8e36d282003-06-18 00:53:06 +0000123# This is used to ensure paths in generated ZIP files always use
124# forward slashes as the directory separator, as required by the
125# ZIP format specification.
126 if os.sep != "/":
127 filename = filename.replace(os.sep, "/")
128 self.filename = filename # Normalized file name
Tim Peterse1190062001-01-15 03:34:38 +0000129 self.date_time = date_time # year, month, day, hour, min, sec
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000130 # Standard values:
Tim Peterse1190062001-01-15 03:34:38 +0000131 self.compress_type = ZIP_STORED # Type of compression for the file
132 self.comment = "" # Comment for each file
133 self.extra = "" # ZIP extra data
Martin v. Löwis00756902006-02-05 17:09:41 +0000134 if sys.platform == 'win32':
135 self.create_system = 0 # System which created ZIP archive
136 else:
137 # Assume everything else is unix-y
138 self.create_system = 3 # System which created ZIP archive
Tim Peterse1190062001-01-15 03:34:38 +0000139 self.create_version = 20 # Version which created ZIP archive
140 self.extract_version = 20 # Version needed to extract archive
141 self.reserved = 0 # Must be zero
142 self.flag_bits = 0 # ZIP flag bits
143 self.volume = 0 # Volume number of file header
144 self.internal_attr = 0 # Internal attributes
145 self.external_attr = 0 # External file attributes
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000146 # Other attributes are set by class ZipFile:
Tim Peterse1190062001-01-15 03:34:38 +0000147 # header_offset Byte offset to the file header
148 # file_offset Byte offset to the start of the file data
149 # CRC CRC-32 of the uncompressed file
150 # compress_size Size of the compressed file
151 # file_size Size of the uncompressed file
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000152
153 def FileHeader(self):
Fred Drake484d7352000-10-02 21:14:52 +0000154 """Return the per-file header as a string."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000155 dt = self.date_time
156 dosdate = (dt[0] - 1980) << 9 | dt[1] << 5 | dt[2]
Tim Peters3caca232001-12-06 06:23:26 +0000157 dostime = dt[3] << 11 | dt[4] << 5 | (dt[5] // 2)
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000158 if self.flag_bits & 0x08:
Tim Peterse1190062001-01-15 03:34:38 +0000159 # Set these to zero because we write them after the file data
160 CRC = compress_size = file_size = 0
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000161 else:
Tim Peterse1190062001-01-15 03:34:38 +0000162 CRC = self.CRC
163 compress_size = self.compress_size
164 file_size = self.file_size
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000165 header = struct.pack(structFileHeader, stringFileHeader,
166 self.extract_version, self.reserved, self.flag_bits,
167 self.compress_type, dostime, dosdate, CRC,
168 compress_size, file_size,
169 len(self.filename), len(self.extra))
170 return header + self.filename + self.extra
171
172
173class ZipFile:
Tim Petersa19a1682001-03-29 04:36:09 +0000174 """ Class with methods to open, read, write, close, list zip files.
175
Fred Drake3d9091e2001-03-26 15:49:24 +0000176 z = ZipFile(file, mode="r", compression=ZIP_STORED)
Tim Petersa19a1682001-03-29 04:36:09 +0000177
Fred Drake3d9091e2001-03-26 15:49:24 +0000178 file: Either the path to the file, or a file-like object.
179 If it is a path, the file will be opened and closed by ZipFile.
180 mode: The mode can be either read "r", write "w" or append "a".
181 compression: ZIP_STORED (no compression) or ZIP_DEFLATED (requires zlib).
182 """
Fred Drake484d7352000-10-02 21:14:52 +0000183
Fred Drake90eac282001-02-28 05:29:34 +0000184 fp = None # Set here since __del__ checks it
185
Fred Drake3d9091e2001-03-26 15:49:24 +0000186 def __init__(self, file, mode="r", compression=ZIP_STORED):
Fred Drake484d7352000-10-02 21:14:52 +0000187 """Open the ZIP file with mode read "r", write "w" or append "a"."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000188 if compression == ZIP_STORED:
189 pass
190 elif compression == ZIP_DEFLATED:
191 if not zlib:
192 raise RuntimeError,\
Fred Drake5db246d2000-09-29 20:44:48 +0000193 "Compression requires the (missing) zlib module"
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000194 else:
195 raise RuntimeError, "That compression method is not supported"
Tim Peterse1190062001-01-15 03:34:38 +0000196 self.debug = 0 # Level of printing: 0 through 3
197 self.NameToInfo = {} # Find file info given name
198 self.filelist = [] # List of ZipInfo instances for archive
199 self.compression = compression # Method of compression
Raymond Hettinger2ca7c192005-02-16 09:27:49 +0000200 self.mode = key = mode.replace('b', '')[0]
Tim Petersa19a1682001-03-29 04:36:09 +0000201
Fred Drake3d9091e2001-03-26 15:49:24 +0000202 # Check if we were passed a file-like object
Walter Dörwald65230a22002-06-03 15:58:32 +0000203 if isinstance(file, basestring):
Fred Drake3d9091e2001-03-26 15:49:24 +0000204 self._filePassed = 0
205 self.filename = file
206 modeDict = {'r' : 'rb', 'w': 'wb', 'a' : 'r+b'}
207 self.fp = open(file, modeDict[mode])
208 else:
209 self._filePassed = 1
210 self.fp = file
211 self.filename = getattr(file, 'name', None)
Tim Petersa19a1682001-03-29 04:36:09 +0000212
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000213 if key == 'r':
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000214 self._GetContents()
215 elif key == 'w':
Fred Drake3d9091e2001-03-26 15:49:24 +0000216 pass
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000217 elif key == 'a':
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000218 try: # See if file is a zip file
219 self._RealGetContents()
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000220 # seek to start of directory and overwrite
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000221 self.fp.seek(self.start_dir, 0)
222 except BadZipfile: # file is not a zip file, just append
223 self.fp.seek(0, 2)
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000224 else:
Tim Peters7d3bad62001-04-04 18:56:49 +0000225 if not self._filePassed:
226 self.fp.close()
227 self.fp = None
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000228 raise RuntimeError, 'Mode must be "r", "w" or "a"'
229
230 def _GetContents(self):
Tim Peters7d3bad62001-04-04 18:56:49 +0000231 """Read the directory, making sure we close the file if the format
232 is bad."""
233 try:
234 self._RealGetContents()
235 except BadZipfile:
236 if not self._filePassed:
237 self.fp.close()
238 self.fp = None
239 raise
240
241 def _RealGetContents(self):
Fred Drake484d7352000-10-02 21:14:52 +0000242 """Read in the table of contents for the ZIP file."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000243 fp = self.fp
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000244 endrec = _EndRecData(fp)
245 if not endrec:
246 raise BadZipfile, "File is not a zip file"
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000247 if self.debug > 1:
248 print endrec
Tim Peterse1190062001-01-15 03:34:38 +0000249 size_cd = endrec[5] # bytes in central directory
250 offset_cd = endrec[6] # offset of central directory
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000251 self.comment = endrec[8] # archive comment
252 # endrec[9] is the offset of the "End of Central Dir" record
253 x = endrec[9] - size_cd
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000254 # "concat" is zero, unless zip was concatenated to another file
255 concat = x - offset_cd
256 if self.debug > 2:
257 print "given, inferred, offset", offset_cd, x, concat
258 # self.start_dir: Position of start of central directory
259 self.start_dir = offset_cd + concat
260 fp.seek(self.start_dir, 0)
261 total = 0
262 while total < size_cd:
263 centdir = fp.read(46)
264 total = total + 46
265 if centdir[0:4] != stringCentralDir:
266 raise BadZipfile, "Bad magic number for central directory"
267 centdir = struct.unpack(structCentralDir, centdir)
268 if self.debug > 2:
269 print centdir
Fred Drake3e038e52001-02-28 17:56:26 +0000270 filename = fp.read(centdir[_CD_FILENAME_LENGTH])
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000271 # Create ZipInfo instance to store file information
272 x = ZipInfo(filename)
Fred Drake3e038e52001-02-28 17:56:26 +0000273 x.extra = fp.read(centdir[_CD_EXTRA_FIELD_LENGTH])
274 x.comment = fp.read(centdir[_CD_COMMENT_LENGTH])
275 total = (total + centdir[_CD_FILENAME_LENGTH]
276 + centdir[_CD_EXTRA_FIELD_LENGTH]
277 + centdir[_CD_COMMENT_LENGTH])
278 x.header_offset = centdir[_CD_LOCAL_HEADER_OFFSET] + concat
279 # file_offset must be computed below...
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000280 (x.create_version, x.create_system, x.extract_version, x.reserved,
281 x.flag_bits, x.compress_type, t, d,
282 x.CRC, x.compress_size, x.file_size) = centdir[1:12]
283 x.volume, x.internal_attr, x.external_attr = centdir[15:18]
284 # Convert date/time code to (year, month, day, hour, min, sec)
285 x.date_time = ( (d>>9)+1980, (d>>5)&0xF, d&0x1F,
Fred Drake414ca662000-06-13 18:49:53 +0000286 t>>11, (t>>5)&0x3F, (t&0x1F) * 2 )
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000287 self.filelist.append(x)
288 self.NameToInfo[x.filename] = x
289 if self.debug > 2:
290 print "total", total
291 for data in self.filelist:
292 fp.seek(data.header_offset, 0)
293 fheader = fp.read(30)
294 if fheader[0:4] != stringFileHeader:
295 raise BadZipfile, "Bad magic number for file header"
296 fheader = struct.unpack(structFileHeader, fheader)
Fred Drake3e038e52001-02-28 17:56:26 +0000297 # file_offset is computed here, since the extra field for
298 # the central directory and for the local file header
299 # refer to different fields, and they can have different
300 # lengths
301 data.file_offset = (data.header_offset + 30
302 + fheader[_FH_FILENAME_LENGTH]
303 + fheader[_FH_EXTRA_FIELD_LENGTH])
304 fname = fp.read(fheader[_FH_FILENAME_LENGTH])
Greg Ward8e36d282003-06-18 00:53:06 +0000305 if fname != data.orig_filename:
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000306 raise RuntimeError, \
Fred Drake5db246d2000-09-29 20:44:48 +0000307 'File name in directory "%s" and header "%s" differ.' % (
Greg Ward8e36d282003-06-18 00:53:06 +0000308 data.orig_filename, fname)
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000309
310 def namelist(self):
Fred Drake484d7352000-10-02 21:14:52 +0000311 """Return a list of file names in the archive."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000312 l = []
313 for data in self.filelist:
314 l.append(data.filename)
315 return l
316
317 def infolist(self):
Fred Drake484d7352000-10-02 21:14:52 +0000318 """Return a list of class ZipInfo instances for files in the
319 archive."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000320 return self.filelist
321
322 def printdir(self):
Fred Drake484d7352000-10-02 21:14:52 +0000323 """Print a table of contents for the zip file."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000324 print "%-46s %19s %12s" % ("File Name", "Modified ", "Size")
325 for zinfo in self.filelist:
326 date = "%d-%02d-%02d %02d:%02d:%02d" % zinfo.date_time
327 print "%-46s %s %12d" % (zinfo.filename, date, zinfo.file_size)
328
329 def testzip(self):
Fred Drake484d7352000-10-02 21:14:52 +0000330 """Read all the files and check the CRC."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000331 for zinfo in self.filelist:
332 try:
Tim Peterse1190062001-01-15 03:34:38 +0000333 self.read(zinfo.filename) # Check CRC-32
Raymond Hettingerc0fac962003-06-27 22:25:03 +0000334 except BadZipfile:
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000335 return zinfo.filename
336
337 def getinfo(self, name):
Fred Drake484d7352000-10-02 21:14:52 +0000338 """Return the instance of ZipInfo given 'name'."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000339 return self.NameToInfo[name]
340
341 def read(self, name):
Fred Drake484d7352000-10-02 21:14:52 +0000342 """Return file bytes (as a string) for name."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000343 if self.mode not in ("r", "a"):
344 raise RuntimeError, 'read() requires mode "r" or "a"'
345 if not self.fp:
346 raise RuntimeError, \
Fred Drake5db246d2000-09-29 20:44:48 +0000347 "Attempt to read ZIP archive that was already closed"
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000348 zinfo = self.getinfo(name)
349 filepos = self.fp.tell()
350 self.fp.seek(zinfo.file_offset, 0)
351 bytes = self.fp.read(zinfo.compress_size)
352 self.fp.seek(filepos, 0)
353 if zinfo.compress_type == ZIP_STORED:
354 pass
355 elif zinfo.compress_type == ZIP_DEFLATED:
356 if not zlib:
357 raise RuntimeError, \
Fred Drake5db246d2000-09-29 20:44:48 +0000358 "De-compression requires the (missing) zlib module"
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000359 # zlib compress/decompress code by Jeremy Hylton of CNRI
360 dc = zlib.decompressobj(-15)
361 bytes = dc.decompress(bytes)
362 # need to feed in unused pad byte so that zlib won't choke
363 ex = dc.decompress('Z') + dc.flush()
364 if ex:
365 bytes = bytes + ex
366 else:
367 raise BadZipfile, \
Fred Drake5db246d2000-09-29 20:44:48 +0000368 "Unsupported compression method %d for file %s" % \
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000369 (zinfo.compress_type, name)
370 crc = binascii.crc32(bytes)
371 if crc != zinfo.CRC:
372 raise BadZipfile, "Bad CRC-32 for file %s" % name
373 return bytes
374
375 def _writecheck(self, zinfo):
Fred Drake484d7352000-10-02 21:14:52 +0000376 """Check for errors before writing a file to the archive."""
Raymond Hettinger54f02222002-06-01 14:18:47 +0000377 if zinfo.filename in self.NameToInfo:
Tim Peterse1190062001-01-15 03:34:38 +0000378 if self.debug: # Warning for duplicate names
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000379 print "Duplicate name:", zinfo.filename
380 if self.mode not in ("w", "a"):
381 raise RuntimeError, 'write() requires mode "w" or "a"'
382 if not self.fp:
383 raise RuntimeError, \
Fred Drake5db246d2000-09-29 20:44:48 +0000384 "Attempt to write ZIP archive that was already closed"
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000385 if zinfo.compress_type == ZIP_DEFLATED and not zlib:
386 raise RuntimeError, \
Fred Drake5db246d2000-09-29 20:44:48 +0000387 "Compression requires the (missing) zlib module"
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000388 if zinfo.compress_type not in (ZIP_STORED, ZIP_DEFLATED):
389 raise RuntimeError, \
Fred Drake5db246d2000-09-29 20:44:48 +0000390 "That compression method is not supported"
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000391
392 def write(self, filename, arcname=None, compress_type=None):
Fred Drake484d7352000-10-02 21:14:52 +0000393 """Put the bytes from filename into the archive under the name
394 arcname."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000395 st = os.stat(filename)
Raymond Hettinger32200ae2002-06-01 19:51:15 +0000396 mtime = time.localtime(st.st_mtime)
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000397 date_time = mtime[0:6]
398 # Create ZipInfo instance to store file information
399 if arcname is None:
Tim Peterse1190062001-01-15 03:34:38 +0000400 zinfo = ZipInfo(filename, date_time)
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000401 else:
Tim Peterse1190062001-01-15 03:34:38 +0000402 zinfo = ZipInfo(arcname, date_time)
Andrew M. Kuchling55430212004-07-10 15:40:29 +0000403 zinfo.external_attr = (st[0] & 0xFFFF) << 16L # Unix attributes
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000404 if compress_type is None:
Tim Peterse1190062001-01-15 03:34:38 +0000405 zinfo.compress_type = self.compression
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000406 else:
Tim Peterse1190062001-01-15 03:34:38 +0000407 zinfo.compress_type = compress_type
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000408 self._writecheck(zinfo)
409 fp = open(filename, "rb")
Finn Bock03a3bb82001-09-05 18:40:33 +0000410 zinfo.flag_bits = 0x00
Tim Peterse1190062001-01-15 03:34:38 +0000411 zinfo.header_offset = self.fp.tell() # Start of header bytes
Finn Bock03a3bb82001-09-05 18:40:33 +0000412 # Must overwrite CRC and sizes with correct data later
413 zinfo.CRC = CRC = 0
414 zinfo.compress_size = compress_size = 0
415 zinfo.file_size = file_size = 0
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000416 self.fp.write(zinfo.FileHeader())
Tim Peterse1190062001-01-15 03:34:38 +0000417 zinfo.file_offset = self.fp.tell() # Start of file bytes
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000418 if zinfo.compress_type == ZIP_DEFLATED:
419 cmpr = zlib.compressobj(zlib.Z_DEFAULT_COMPRESSION,
420 zlib.DEFLATED, -15)
421 else:
422 cmpr = None
423 while 1:
424 buf = fp.read(1024 * 8)
425 if not buf:
426 break
427 file_size = file_size + len(buf)
428 CRC = binascii.crc32(buf, CRC)
429 if cmpr:
430 buf = cmpr.compress(buf)
431 compress_size = compress_size + len(buf)
432 self.fp.write(buf)
433 fp.close()
434 if cmpr:
435 buf = cmpr.flush()
436 compress_size = compress_size + len(buf)
437 self.fp.write(buf)
438 zinfo.compress_size = compress_size
439 else:
440 zinfo.compress_size = file_size
441 zinfo.CRC = CRC
442 zinfo.file_size = file_size
Finn Bock03a3bb82001-09-05 18:40:33 +0000443 # Seek backwards and write CRC and file sizes
Tim Petersb64bec32001-09-18 02:26:39 +0000444 position = self.fp.tell() # Preserve current position in file
Finn Bock03a3bb82001-09-05 18:40:33 +0000445 self.fp.seek(zinfo.header_offset + 14, 0)
Brett Cannonff450f72004-07-10 19:09:20 +0000446 self.fp.write(struct.pack("<lLL", zinfo.CRC, zinfo.compress_size,
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000447 zinfo.file_size))
Finn Bock03a3bb82001-09-05 18:40:33 +0000448 self.fp.seek(position, 0)
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000449 self.filelist.append(zinfo)
450 self.NameToInfo[zinfo.filename] = zinfo
451
Just van Rossumb083cb32002-12-12 12:23:32 +0000452 def writestr(self, zinfo_or_arcname, bytes):
Fred Drake484d7352000-10-02 21:14:52 +0000453 """Write a file into the archive. The contents is the string
Just van Rossumb083cb32002-12-12 12:23:32 +0000454 'bytes'. 'zinfo_or_arcname' is either a ZipInfo instance or
455 the name of the file in the archive."""
456 if not isinstance(zinfo_or_arcname, ZipInfo):
457 zinfo = ZipInfo(filename=zinfo_or_arcname,
458 date_time=time.localtime(time.time()))
459 zinfo.compress_type = self.compression
460 else:
461 zinfo = zinfo_or_arcname
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000462 self._writecheck(zinfo)
Tim Peterse1190062001-01-15 03:34:38 +0000463 zinfo.file_size = len(bytes) # Uncompressed size
464 zinfo.CRC = binascii.crc32(bytes) # CRC-32 checksum
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000465 if zinfo.compress_type == ZIP_DEFLATED:
466 co = zlib.compressobj(zlib.Z_DEFAULT_COMPRESSION,
467 zlib.DEFLATED, -15)
468 bytes = co.compress(bytes) + co.flush()
Tim Peterse1190062001-01-15 03:34:38 +0000469 zinfo.compress_size = len(bytes) # Compressed size
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000470 else:
471 zinfo.compress_size = zinfo.file_size
Tim Peterse1190062001-01-15 03:34:38 +0000472 zinfo.header_offset = self.fp.tell() # Start of header bytes
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000473 self.fp.write(zinfo.FileHeader())
Tim Peterse1190062001-01-15 03:34:38 +0000474 zinfo.file_offset = self.fp.tell() # Start of file bytes
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000475 self.fp.write(bytes)
476 if zinfo.flag_bits & 0x08:
Tim Peterse1190062001-01-15 03:34:38 +0000477 # Write CRC and file sizes after the file data
Brett Cannonff450f72004-07-10 19:09:20 +0000478 self.fp.write(struct.pack("<lLL", zinfo.CRC, zinfo.compress_size,
Tim Peterse1190062001-01-15 03:34:38 +0000479 zinfo.file_size))
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000480 self.filelist.append(zinfo)
481 self.NameToInfo[zinfo.filename] = zinfo
482
483 def __del__(self):
Fred Drake484d7352000-10-02 21:14:52 +0000484 """Call the "close()" method in case the user forgot."""
Tim Petersd15f8bb2001-11-28 23:16:40 +0000485 self.close()
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000486
487 def close(self):
Fred Drake484d7352000-10-02 21:14:52 +0000488 """Close the file, and for mode "w" and "a" write the ending
489 records."""
Tim Petersd15f8bb2001-11-28 23:16:40 +0000490 if self.fp is None:
491 return
Tim Peterse1190062001-01-15 03:34:38 +0000492 if self.mode in ("w", "a"): # write ending records
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000493 count = 0
494 pos1 = self.fp.tell()
Tim Peterse1190062001-01-15 03:34:38 +0000495 for zinfo in self.filelist: # write central directory
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000496 count = count + 1
497 dt = zinfo.date_time
498 dosdate = (dt[0] - 1980) << 9 | dt[1] << 5 | dt[2]
Tim Peters3caca232001-12-06 06:23:26 +0000499 dostime = dt[3] << 11 | dt[4] << 5 | (dt[5] // 2)
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000500 centdir = struct.pack(structCentralDir,
501 stringCentralDir, zinfo.create_version,
502 zinfo.create_system, zinfo.extract_version, zinfo.reserved,
503 zinfo.flag_bits, zinfo.compress_type, dostime, dosdate,
504 zinfo.CRC, zinfo.compress_size, zinfo.file_size,
505 len(zinfo.filename), len(zinfo.extra), len(zinfo.comment),
506 0, zinfo.internal_attr, zinfo.external_attr,
507 zinfo.header_offset)
508 self.fp.write(centdir)
509 self.fp.write(zinfo.filename)
510 self.fp.write(zinfo.extra)
511 self.fp.write(zinfo.comment)
512 pos2 = self.fp.tell()
513 # Write end-of-zip-archive record
514 endrec = struct.pack(structEndArchive, stringEndArchive,
515 0, 0, count, count, pos2 - pos1, pos1, 0)
516 self.fp.write(endrec)
Guido van Rossumf85af612001-04-14 16:45:14 +0000517 self.fp.flush()
Fred Drake3d9091e2001-03-26 15:49:24 +0000518 if not self._filePassed:
519 self.fp.close()
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000520 self.fp = None
521
522
523class PyZipFile(ZipFile):
Fred Drake484d7352000-10-02 21:14:52 +0000524 """Class to create ZIP archives with Python library files and packages."""
525
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000526 def writepy(self, pathname, basename = ""):
527 """Add all files from "pathname" to the ZIP archive.
528
Fred Drake484d7352000-10-02 21:14:52 +0000529 If pathname is a package directory, search the directory and
530 all package subdirectories recursively for all *.py and enter
531 the modules into the archive. If pathname is a plain
532 directory, listdir *.py and enter all modules. Else, pathname
533 must be a Python *.py file and the module will be put into the
534 archive. Added modules are always module.pyo or module.pyc.
535 This method will compile the module.py into module.pyc if
536 necessary.
537 """
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000538 dir, name = os.path.split(pathname)
539 if os.path.isdir(pathname):
540 initname = os.path.join(pathname, "__init__.py")
541 if os.path.isfile(initname):
542 # This is a package directory, add it
543 if basename:
544 basename = "%s/%s" % (basename, name)
545 else:
546 basename = name
547 if self.debug:
548 print "Adding package in", pathname, "as", basename
549 fname, arcname = self._get_codename(initname[0:-3], basename)
550 if self.debug:
551 print "Adding", arcname
552 self.write(fname, arcname)
553 dirlist = os.listdir(pathname)
554 dirlist.remove("__init__.py")
555 # Add all *.py files and package subdirectories
556 for filename in dirlist:
557 path = os.path.join(pathname, filename)
558 root, ext = os.path.splitext(filename)
559 if os.path.isdir(path):
560 if os.path.isfile(os.path.join(path, "__init__.py")):
561 # This is a package directory, add it
562 self.writepy(path, basename) # Recursive call
563 elif ext == ".py":
564 fname, arcname = self._get_codename(path[0:-3],
565 basename)
566 if self.debug:
567 print "Adding", arcname
568 self.write(fname, arcname)
569 else:
570 # This is NOT a package directory, add its files at top level
571 if self.debug:
572 print "Adding files from directory", pathname
573 for filename in os.listdir(pathname):
574 path = os.path.join(pathname, filename)
575 root, ext = os.path.splitext(filename)
576 if ext == ".py":
577 fname, arcname = self._get_codename(path[0:-3],
578 basename)
579 if self.debug:
580 print "Adding", arcname
581 self.write(fname, arcname)
582 else:
583 if pathname[-3:] != ".py":
584 raise RuntimeError, \
Fred Drake5db246d2000-09-29 20:44:48 +0000585 'Files added with writepy() must end with ".py"'
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000586 fname, arcname = self._get_codename(pathname[0:-3], basename)
587 if self.debug:
588 print "Adding file", arcname
589 self.write(fname, arcname)
590
591 def _get_codename(self, pathname, basename):
592 """Return (filename, archivename) for the path.
593
Fred Drake484d7352000-10-02 21:14:52 +0000594 Given a module name path, return the correct file path and
595 archive name, compiling if necessary. For example, given
596 /python/lib/string, return (/python/lib/string.pyc, string).
597 """
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000598 file_py = pathname + ".py"
599 file_pyc = pathname + ".pyc"
600 file_pyo = pathname + ".pyo"
601 if os.path.isfile(file_pyo) and \
Raymond Hettinger32200ae2002-06-01 19:51:15 +0000602 os.stat(file_pyo).st_mtime >= os.stat(file_py).st_mtime:
Tim Peterse1190062001-01-15 03:34:38 +0000603 fname = file_pyo # Use .pyo file
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000604 elif not os.path.isfile(file_pyc) or \
Raymond Hettinger32200ae2002-06-01 19:51:15 +0000605 os.stat(file_pyc).st_mtime < os.stat(file_py).st_mtime:
Fred Drake484d7352000-10-02 21:14:52 +0000606 import py_compile
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000607 if self.debug:
608 print "Compiling", file_py
Martin v. Löwis0c6774d2003-01-15 11:51:06 +0000609 try:
610 py_compile.compile(file_py, file_pyc, None, True)
611 except py_compile.PyCompileError,err:
612 print err.msg
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000613 fname = file_pyc
614 else:
615 fname = file_pyc
616 archivename = os.path.split(fname)[1]
617 if basename:
618 archivename = "%s/%s" % (basename, archivename)
619 return (fname, archivename)