blob: 6aed1729996ced1d046ae92e5ba1f6c441d6eb10 [file] [log] [blame]
Fred Drake484d7352000-10-02 21:14:52 +00001"Read and write ZIP files."
Guido van Rossum32abe6f2000-03-31 17:30:02 +00002# Written by James C. Ahlstrom jim@interet.com
3# All rights transferred to CNRI pursuant to the Python contribution agreement
4
5import struct, os, time
Fred Drake484d7352000-10-02 21:14:52 +00006import binascii
Guido van Rossum32abe6f2000-03-31 17:30:02 +00007
8try:
Tim Peterse1190062001-01-15 03:34:38 +00009 import zlib # We may need its compression method
Guido van Rossum9c673f32001-04-10 15:37:12 +000010except ImportError:
Guido van Rossum32abe6f2000-03-31 17:30:02 +000011 zlib = None
12
Skip Montanaro40fc1602001-03-01 04:27:19 +000013__all__ = ["BadZipfile", "error", "ZIP_STORED", "ZIP_DEFLATED", "is_zipfile",
14 "ZipInfo", "ZipFile", "PyZipFile"]
15
Fred Drake5db246d2000-09-29 20:44:48 +000016class BadZipfile(Exception):
Guido van Rossum32abe6f2000-03-31 17:30:02 +000017 pass
Tim Peterse1190062001-01-15 03:34:38 +000018error = BadZipfile # The exception raised by this module
Guido van Rossum32abe6f2000-03-31 17:30:02 +000019
20# constants for Zip file compression methods
21ZIP_STORED = 0
22ZIP_DEFLATED = 8
23# Other ZIP compression methods not supported
24
25# Here are some struct module formats for reading headers
26structEndArchive = "<4s4H2lH" # 9 items, end of archive, 22 bytes
27stringEndArchive = "PK\005\006" # magic number for end of archive record
Guido van Rossum1ae4c3d2002-08-12 15:15:51 +000028structCentralDir = "<4s4B4H3l5HLl"# 19 items, central directory, 46 bytes
Guido van Rossum32abe6f2000-03-31 17:30:02 +000029stringCentralDir = "PK\001\002" # magic number for central directory
30structFileHeader = "<4s2B4H3l2H" # 12 items, file header record, 30 bytes
31stringFileHeader = "PK\003\004" # magic number for file header
32
Fred Drake3e038e52001-02-28 17:56:26 +000033# indexes of entries in the central directory structure
34_CD_SIGNATURE = 0
35_CD_CREATE_VERSION = 1
36_CD_CREATE_SYSTEM = 2
37_CD_EXTRACT_VERSION = 3
38_CD_EXTRACT_SYSTEM = 4 # is this meaningful?
39_CD_FLAG_BITS = 5
40_CD_COMPRESS_TYPE = 6
41_CD_TIME = 7
42_CD_DATE = 8
43_CD_CRC = 9
44_CD_COMPRESSED_SIZE = 10
45_CD_UNCOMPRESSED_SIZE = 11
46_CD_FILENAME_LENGTH = 12
47_CD_EXTRA_FIELD_LENGTH = 13
48_CD_COMMENT_LENGTH = 14
49_CD_DISK_NUMBER_START = 15
50_CD_INTERNAL_FILE_ATTRIBUTES = 16
51_CD_EXTERNAL_FILE_ATTRIBUTES = 17
52_CD_LOCAL_HEADER_OFFSET = 18
53
54# indexes of entries in the local file header structure
55_FH_SIGNATURE = 0
56_FH_EXTRACT_VERSION = 1
57_FH_EXTRACT_SYSTEM = 2 # is this meaningful?
58_FH_GENERAL_PURPOSE_FLAG_BITS = 3
59_FH_COMPRESSION_METHOD = 4
60_FH_LAST_MOD_TIME = 5
61_FH_LAST_MOD_DATE = 6
62_FH_CRC = 7
63_FH_COMPRESSED_SIZE = 8
64_FH_UNCOMPRESSED_SIZE = 9
65_FH_FILENAME_LENGTH = 10
66_FH_EXTRA_FIELD_LENGTH = 11
67
Guido van Rossum32abe6f2000-03-31 17:30:02 +000068def is_zipfile(filename):
Martin v. Löwis6f6873b2002-10-13 13:54:50 +000069 """Quickly see if file is a ZIP file by checking the magic number."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +000070 try:
71 fpin = open(filename, "rb")
Martin v. Löwis6f6873b2002-10-13 13:54:50 +000072 endrec = _EndRecData(fpin)
Guido van Rossum32abe6f2000-03-31 17:30:02 +000073 fpin.close()
Martin v. Löwis6f6873b2002-10-13 13:54:50 +000074 if endrec:
Guido van Rossum8ca162f2002-04-07 06:36:23 +000075 return True # file has correct magic number
Fred Drake7e473802001-05-11 19:52:57 +000076 except IOError:
Guido van Rossum32abe6f2000-03-31 17:30:02 +000077 pass
Guido van Rossum8ca162f2002-04-07 06:36:23 +000078 return False
Guido van Rossum32abe6f2000-03-31 17:30:02 +000079
Martin v. Löwis6f6873b2002-10-13 13:54:50 +000080def _EndRecData(fpin):
81 """Return data from the "End of Central Directory" record, or None.
82
83 The data is a list of the nine items in the ZIP "End of central dir"
84 record followed by a tenth item, the file seek offset of this record."""
85 fpin.seek(-22, 2) # Assume no archive comment.
86 filesize = fpin.tell() + 22 # Get file size
87 data = fpin.read()
88 if data[0:4] == stringEndArchive and data[-2:] == "\000\000":
89 endrec = struct.unpack(structEndArchive, data)
90 endrec = list(endrec)
91 endrec.append("") # Append the archive comment
92 endrec.append(filesize - 22) # Append the record start offset
93 return endrec
94 # Search the last END_BLOCK bytes of the file for the record signature.
95 # The comment is appended to the ZIP file and has a 16 bit length.
96 # So the comment may be up to 64K long. We limit the search for the
97 # signature to a few Kbytes at the end of the file for efficiency.
98 # also, the signature must not appear in the comment.
99 END_BLOCK = min(filesize, 1024 * 4)
100 fpin.seek(filesize - END_BLOCK, 0)
101 data = fpin.read()
102 start = data.rfind(stringEndArchive)
103 if start >= 0: # Correct signature string was found
104 endrec = struct.unpack(structEndArchive, data[start:start+22])
105 endrec = list(endrec)
106 comment = data[start+22:]
107 if endrec[7] == len(comment): # Comment length checks out
108 # Append the archive comment and start offset
109 endrec.append(comment)
110 endrec.append(filesize - END_BLOCK + start)
111 return endrec
112 return # Error, return None
113
Fred Drake484d7352000-10-02 21:14:52 +0000114
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000115class ZipInfo:
Fred Drake484d7352000-10-02 21:14:52 +0000116 """Class with attributes describing each file in the ZIP archive."""
117
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000118 def __init__(self, filename="NoName", date_time=(1980,1,1,0,0,0)):
Fred Drakea58947f2001-07-19 19:44:25 +0000119 self.filename = _normpath(filename) # Name of the file in the archive
Tim Peterse1190062001-01-15 03:34:38 +0000120 self.date_time = date_time # year, month, day, hour, min, sec
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000121 # Standard values:
Tim Peterse1190062001-01-15 03:34:38 +0000122 self.compress_type = ZIP_STORED # Type of compression for the file
123 self.comment = "" # Comment for each file
124 self.extra = "" # ZIP extra data
125 self.create_system = 0 # System which created ZIP archive
126 self.create_version = 20 # Version which created ZIP archive
127 self.extract_version = 20 # Version needed to extract archive
128 self.reserved = 0 # Must be zero
129 self.flag_bits = 0 # ZIP flag bits
130 self.volume = 0 # Volume number of file header
131 self.internal_attr = 0 # Internal attributes
132 self.external_attr = 0 # External file attributes
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000133 # Other attributes are set by class ZipFile:
Tim Peterse1190062001-01-15 03:34:38 +0000134 # header_offset Byte offset to the file header
135 # file_offset Byte offset to the start of the file data
136 # CRC CRC-32 of the uncompressed file
137 # compress_size Size of the compressed file
138 # file_size Size of the uncompressed file
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000139
140 def FileHeader(self):
Fred Drake484d7352000-10-02 21:14:52 +0000141 """Return the per-file header as a string."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000142 dt = self.date_time
143 dosdate = (dt[0] - 1980) << 9 | dt[1] << 5 | dt[2]
Tim Peters3caca232001-12-06 06:23:26 +0000144 dostime = dt[3] << 11 | dt[4] << 5 | (dt[5] // 2)
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000145 if self.flag_bits & 0x08:
Tim Peterse1190062001-01-15 03:34:38 +0000146 # Set these to zero because we write them after the file data
147 CRC = compress_size = file_size = 0
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000148 else:
Tim Peterse1190062001-01-15 03:34:38 +0000149 CRC = self.CRC
150 compress_size = self.compress_size
151 file_size = self.file_size
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000152 header = struct.pack(structFileHeader, stringFileHeader,
153 self.extract_version, self.reserved, self.flag_bits,
154 self.compress_type, dostime, dosdate, CRC,
155 compress_size, file_size,
156 len(self.filename), len(self.extra))
157 return header + self.filename + self.extra
158
159
Fred Drakea58947f2001-07-19 19:44:25 +0000160# This is used to ensure paths in generated ZIP files always use
161# forward slashes as the directory separator, as required by the
162# ZIP format specification.
163if os.sep != "/":
164 def _normpath(path):
165 return path.replace(os.sep, "/")
166else:
167 def _normpath(path):
168 return path
169
170
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000171class ZipFile:
Tim Petersa19a1682001-03-29 04:36:09 +0000172 """ Class with methods to open, read, write, close, list zip files.
173
Fred Drake3d9091e2001-03-26 15:49:24 +0000174 z = ZipFile(file, mode="r", compression=ZIP_STORED)
Tim Petersa19a1682001-03-29 04:36:09 +0000175
Fred Drake3d9091e2001-03-26 15:49:24 +0000176 file: Either the path to the file, or a file-like object.
177 If it is a path, the file will be opened and closed by ZipFile.
178 mode: The mode can be either read "r", write "w" or append "a".
179 compression: ZIP_STORED (no compression) or ZIP_DEFLATED (requires zlib).
180 """
Fred Drake484d7352000-10-02 21:14:52 +0000181
Fred Drake90eac282001-02-28 05:29:34 +0000182 fp = None # Set here since __del__ checks it
183
Fred Drake3d9091e2001-03-26 15:49:24 +0000184 def __init__(self, file, mode="r", compression=ZIP_STORED):
Fred Drake484d7352000-10-02 21:14:52 +0000185 """Open the ZIP file with mode read "r", write "w" or append "a"."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000186 if compression == ZIP_STORED:
187 pass
188 elif compression == ZIP_DEFLATED:
189 if not zlib:
190 raise RuntimeError,\
Fred Drake5db246d2000-09-29 20:44:48 +0000191 "Compression requires the (missing) zlib module"
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000192 else:
193 raise RuntimeError, "That compression method is not supported"
Tim Peterse1190062001-01-15 03:34:38 +0000194 self.debug = 0 # Level of printing: 0 through 3
195 self.NameToInfo = {} # Find file info given name
196 self.filelist = [] # List of ZipInfo instances for archive
197 self.compression = compression # Method of compression
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000198 self.mode = key = mode[0]
Tim Petersa19a1682001-03-29 04:36:09 +0000199
Fred Drake3d9091e2001-03-26 15:49:24 +0000200 # Check if we were passed a file-like object
Walter Dörwald65230a22002-06-03 15:58:32 +0000201 if isinstance(file, basestring):
Fred Drake3d9091e2001-03-26 15:49:24 +0000202 self._filePassed = 0
203 self.filename = file
204 modeDict = {'r' : 'rb', 'w': 'wb', 'a' : 'r+b'}
205 self.fp = open(file, modeDict[mode])
206 else:
207 self._filePassed = 1
208 self.fp = file
209 self.filename = getattr(file, 'name', None)
Tim Petersa19a1682001-03-29 04:36:09 +0000210
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000211 if key == 'r':
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000212 self._GetContents()
213 elif key == 'w':
Fred Drake3d9091e2001-03-26 15:49:24 +0000214 pass
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000215 elif key == 'a':
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000216 try: # See if file is a zip file
217 self._RealGetContents()
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000218 # seek to start of directory and overwrite
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000219 self.fp.seek(self.start_dir, 0)
220 except BadZipfile: # file is not a zip file, just append
221 self.fp.seek(0, 2)
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000222 else:
Tim Peters7d3bad62001-04-04 18:56:49 +0000223 if not self._filePassed:
224 self.fp.close()
225 self.fp = None
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000226 raise RuntimeError, 'Mode must be "r", "w" or "a"'
227
228 def _GetContents(self):
Tim Peters7d3bad62001-04-04 18:56:49 +0000229 """Read the directory, making sure we close the file if the format
230 is bad."""
231 try:
232 self._RealGetContents()
233 except BadZipfile:
234 if not self._filePassed:
235 self.fp.close()
236 self.fp = None
237 raise
238
239 def _RealGetContents(self):
Fred Drake484d7352000-10-02 21:14:52 +0000240 """Read in the table of contents for the ZIP file."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000241 fp = self.fp
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000242 endrec = _EndRecData(fp)
243 if not endrec:
244 raise BadZipfile, "File is not a zip file"
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000245 if self.debug > 1:
246 print endrec
Tim Peterse1190062001-01-15 03:34:38 +0000247 size_cd = endrec[5] # bytes in central directory
248 offset_cd = endrec[6] # offset of central directory
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000249 self.comment = endrec[8] # archive comment
250 # endrec[9] is the offset of the "End of Central Dir" record
251 x = endrec[9] - size_cd
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000252 # "concat" is zero, unless zip was concatenated to another file
253 concat = x - offset_cd
254 if self.debug > 2:
255 print "given, inferred, offset", offset_cd, x, concat
256 # self.start_dir: Position of start of central directory
257 self.start_dir = offset_cd + concat
258 fp.seek(self.start_dir, 0)
259 total = 0
260 while total < size_cd:
261 centdir = fp.read(46)
262 total = total + 46
263 if centdir[0:4] != stringCentralDir:
264 raise BadZipfile, "Bad magic number for central directory"
265 centdir = struct.unpack(structCentralDir, centdir)
266 if self.debug > 2:
267 print centdir
Fred Drake3e038e52001-02-28 17:56:26 +0000268 filename = fp.read(centdir[_CD_FILENAME_LENGTH])
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000269 # Create ZipInfo instance to store file information
270 x = ZipInfo(filename)
Fred Drake3e038e52001-02-28 17:56:26 +0000271 x.extra = fp.read(centdir[_CD_EXTRA_FIELD_LENGTH])
272 x.comment = fp.read(centdir[_CD_COMMENT_LENGTH])
273 total = (total + centdir[_CD_FILENAME_LENGTH]
274 + centdir[_CD_EXTRA_FIELD_LENGTH]
275 + centdir[_CD_COMMENT_LENGTH])
276 x.header_offset = centdir[_CD_LOCAL_HEADER_OFFSET] + concat
277 # file_offset must be computed below...
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000278 (x.create_version, x.create_system, x.extract_version, x.reserved,
279 x.flag_bits, x.compress_type, t, d,
280 x.CRC, x.compress_size, x.file_size) = centdir[1:12]
281 x.volume, x.internal_attr, x.external_attr = centdir[15:18]
282 # Convert date/time code to (year, month, day, hour, min, sec)
283 x.date_time = ( (d>>9)+1980, (d>>5)&0xF, d&0x1F,
Fred Drake414ca662000-06-13 18:49:53 +0000284 t>>11, (t>>5)&0x3F, (t&0x1F) * 2 )
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000285 self.filelist.append(x)
286 self.NameToInfo[x.filename] = x
287 if self.debug > 2:
288 print "total", total
289 for data in self.filelist:
290 fp.seek(data.header_offset, 0)
291 fheader = fp.read(30)
292 if fheader[0:4] != stringFileHeader:
293 raise BadZipfile, "Bad magic number for file header"
294 fheader = struct.unpack(structFileHeader, fheader)
Fred Drake3e038e52001-02-28 17:56:26 +0000295 # file_offset is computed here, since the extra field for
296 # the central directory and for the local file header
297 # refer to different fields, and they can have different
298 # lengths
299 data.file_offset = (data.header_offset + 30
300 + fheader[_FH_FILENAME_LENGTH]
301 + fheader[_FH_EXTRA_FIELD_LENGTH])
302 fname = fp.read(fheader[_FH_FILENAME_LENGTH])
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000303 if fname != data.filename:
304 raise RuntimeError, \
Fred Drake5db246d2000-09-29 20:44:48 +0000305 'File name in directory "%s" and header "%s" differ.' % (
306 data.filename, fname)
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000307
308 def namelist(self):
Fred Drake484d7352000-10-02 21:14:52 +0000309 """Return a list of file names in the archive."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000310 l = []
311 for data in self.filelist:
312 l.append(data.filename)
313 return l
314
315 def infolist(self):
Fred Drake484d7352000-10-02 21:14:52 +0000316 """Return a list of class ZipInfo instances for files in the
317 archive."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000318 return self.filelist
319
320 def printdir(self):
Fred Drake484d7352000-10-02 21:14:52 +0000321 """Print a table of contents for the zip file."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000322 print "%-46s %19s %12s" % ("File Name", "Modified ", "Size")
323 for zinfo in self.filelist:
324 date = "%d-%02d-%02d %02d:%02d:%02d" % zinfo.date_time
325 print "%-46s %s %12d" % (zinfo.filename, date, zinfo.file_size)
326
327 def testzip(self):
Fred Drake484d7352000-10-02 21:14:52 +0000328 """Read all the files and check the CRC."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000329 for zinfo in self.filelist:
330 try:
Tim Peterse1190062001-01-15 03:34:38 +0000331 self.read(zinfo.filename) # Check CRC-32
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000332 except:
333 return zinfo.filename
334
335 def getinfo(self, name):
Fred Drake484d7352000-10-02 21:14:52 +0000336 """Return the instance of ZipInfo given 'name'."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000337 return self.NameToInfo[name]
338
339 def read(self, name):
Fred Drake484d7352000-10-02 21:14:52 +0000340 """Return file bytes (as a string) for name."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000341 if self.mode not in ("r", "a"):
342 raise RuntimeError, 'read() requires mode "r" or "a"'
343 if not self.fp:
344 raise RuntimeError, \
Fred Drake5db246d2000-09-29 20:44:48 +0000345 "Attempt to read ZIP archive that was already closed"
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000346 zinfo = self.getinfo(name)
347 filepos = self.fp.tell()
348 self.fp.seek(zinfo.file_offset, 0)
349 bytes = self.fp.read(zinfo.compress_size)
350 self.fp.seek(filepos, 0)
351 if zinfo.compress_type == ZIP_STORED:
352 pass
353 elif zinfo.compress_type == ZIP_DEFLATED:
354 if not zlib:
355 raise RuntimeError, \
Fred Drake5db246d2000-09-29 20:44:48 +0000356 "De-compression requires the (missing) zlib module"
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000357 # zlib compress/decompress code by Jeremy Hylton of CNRI
358 dc = zlib.decompressobj(-15)
359 bytes = dc.decompress(bytes)
360 # need to feed in unused pad byte so that zlib won't choke
361 ex = dc.decompress('Z') + dc.flush()
362 if ex:
363 bytes = bytes + ex
364 else:
365 raise BadZipfile, \
Fred Drake5db246d2000-09-29 20:44:48 +0000366 "Unsupported compression method %d for file %s" % \
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000367 (zinfo.compress_type, name)
368 crc = binascii.crc32(bytes)
369 if crc != zinfo.CRC:
370 raise BadZipfile, "Bad CRC-32 for file %s" % name
371 return bytes
372
373 def _writecheck(self, zinfo):
Fred Drake484d7352000-10-02 21:14:52 +0000374 """Check for errors before writing a file to the archive."""
Raymond Hettinger54f02222002-06-01 14:18:47 +0000375 if zinfo.filename in self.NameToInfo:
Tim Peterse1190062001-01-15 03:34:38 +0000376 if self.debug: # Warning for duplicate names
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000377 print "Duplicate name:", zinfo.filename
378 if self.mode not in ("w", "a"):
379 raise RuntimeError, 'write() requires mode "w" or "a"'
380 if not self.fp:
381 raise RuntimeError, \
Fred Drake5db246d2000-09-29 20:44:48 +0000382 "Attempt to write ZIP archive that was already closed"
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000383 if zinfo.compress_type == ZIP_DEFLATED and not zlib:
384 raise RuntimeError, \
Fred Drake5db246d2000-09-29 20:44:48 +0000385 "Compression requires the (missing) zlib module"
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000386 if zinfo.compress_type not in (ZIP_STORED, ZIP_DEFLATED):
387 raise RuntimeError, \
Fred Drake5db246d2000-09-29 20:44:48 +0000388 "That compression method is not supported"
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000389
390 def write(self, filename, arcname=None, compress_type=None):
Fred Drake484d7352000-10-02 21:14:52 +0000391 """Put the bytes from filename into the archive under the name
392 arcname."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000393 st = os.stat(filename)
Raymond Hettinger32200ae2002-06-01 19:51:15 +0000394 mtime = time.localtime(st.st_mtime)
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000395 date_time = mtime[0:6]
396 # Create ZipInfo instance to store file information
397 if arcname is None:
Tim Peterse1190062001-01-15 03:34:38 +0000398 zinfo = ZipInfo(filename, date_time)
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000399 else:
Tim Peterse1190062001-01-15 03:34:38 +0000400 zinfo = ZipInfo(arcname, date_time)
Guido van Rossum1ae4c3d2002-08-12 15:15:51 +0000401 zinfo.external_attr = st[0] << 16L # Unix attributes
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000402 if compress_type is None:
Tim Peterse1190062001-01-15 03:34:38 +0000403 zinfo.compress_type = self.compression
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000404 else:
Tim Peterse1190062001-01-15 03:34:38 +0000405 zinfo.compress_type = compress_type
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000406 self._writecheck(zinfo)
407 fp = open(filename, "rb")
Finn Bock03a3bb82001-09-05 18:40:33 +0000408 zinfo.flag_bits = 0x00
Tim Peterse1190062001-01-15 03:34:38 +0000409 zinfo.header_offset = self.fp.tell() # Start of header bytes
Finn Bock03a3bb82001-09-05 18:40:33 +0000410 # Must overwrite CRC and sizes with correct data later
411 zinfo.CRC = CRC = 0
412 zinfo.compress_size = compress_size = 0
413 zinfo.file_size = file_size = 0
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000414 self.fp.write(zinfo.FileHeader())
Tim Peterse1190062001-01-15 03:34:38 +0000415 zinfo.file_offset = self.fp.tell() # Start of file bytes
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000416 if zinfo.compress_type == ZIP_DEFLATED:
417 cmpr = zlib.compressobj(zlib.Z_DEFAULT_COMPRESSION,
418 zlib.DEFLATED, -15)
419 else:
420 cmpr = None
421 while 1:
422 buf = fp.read(1024 * 8)
423 if not buf:
424 break
425 file_size = file_size + len(buf)
426 CRC = binascii.crc32(buf, CRC)
427 if cmpr:
428 buf = cmpr.compress(buf)
429 compress_size = compress_size + len(buf)
430 self.fp.write(buf)
431 fp.close()
432 if cmpr:
433 buf = cmpr.flush()
434 compress_size = compress_size + len(buf)
435 self.fp.write(buf)
436 zinfo.compress_size = compress_size
437 else:
438 zinfo.compress_size = file_size
439 zinfo.CRC = CRC
440 zinfo.file_size = file_size
Finn Bock03a3bb82001-09-05 18:40:33 +0000441 # Seek backwards and write CRC and file sizes
Tim Petersb64bec32001-09-18 02:26:39 +0000442 position = self.fp.tell() # Preserve current position in file
Finn Bock03a3bb82001-09-05 18:40:33 +0000443 self.fp.seek(zinfo.header_offset + 14, 0)
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000444 self.fp.write(struct.pack("<lll", zinfo.CRC, zinfo.compress_size,
445 zinfo.file_size))
Finn Bock03a3bb82001-09-05 18:40:33 +0000446 self.fp.seek(position, 0)
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000447 self.filelist.append(zinfo)
448 self.NameToInfo[zinfo.filename] = zinfo
449
Just van Rossumb083cb32002-12-12 12:23:32 +0000450 def writestr(self, zinfo_or_arcname, bytes):
Fred Drake484d7352000-10-02 21:14:52 +0000451 """Write a file into the archive. The contents is the string
Just van Rossumb083cb32002-12-12 12:23:32 +0000452 'bytes'. 'zinfo_or_arcname' is either a ZipInfo instance or
453 the name of the file in the archive."""
454 if not isinstance(zinfo_or_arcname, ZipInfo):
455 zinfo = ZipInfo(filename=zinfo_or_arcname,
456 date_time=time.localtime(time.time()))
457 zinfo.compress_type = self.compression
458 else:
459 zinfo = zinfo_or_arcname
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000460 self._writecheck(zinfo)
Tim Peterse1190062001-01-15 03:34:38 +0000461 zinfo.file_size = len(bytes) # Uncompressed size
462 zinfo.CRC = binascii.crc32(bytes) # CRC-32 checksum
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000463 if zinfo.compress_type == ZIP_DEFLATED:
464 co = zlib.compressobj(zlib.Z_DEFAULT_COMPRESSION,
465 zlib.DEFLATED, -15)
466 bytes = co.compress(bytes) + co.flush()
Tim Peterse1190062001-01-15 03:34:38 +0000467 zinfo.compress_size = len(bytes) # Compressed size
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000468 else:
469 zinfo.compress_size = zinfo.file_size
Tim Peterse1190062001-01-15 03:34:38 +0000470 zinfo.header_offset = self.fp.tell() # Start of header bytes
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000471 self.fp.write(zinfo.FileHeader())
Tim Peterse1190062001-01-15 03:34:38 +0000472 zinfo.file_offset = self.fp.tell() # Start of file bytes
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000473 self.fp.write(bytes)
474 if zinfo.flag_bits & 0x08:
Tim Peterse1190062001-01-15 03:34:38 +0000475 # Write CRC and file sizes after the file data
476 self.fp.write(struct.pack("<lll", zinfo.CRC, zinfo.compress_size,
477 zinfo.file_size))
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000478 self.filelist.append(zinfo)
479 self.NameToInfo[zinfo.filename] = zinfo
480
481 def __del__(self):
Fred Drake484d7352000-10-02 21:14:52 +0000482 """Call the "close()" method in case the user forgot."""
Tim Petersd15f8bb2001-11-28 23:16:40 +0000483 self.close()
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000484
485 def close(self):
Fred Drake484d7352000-10-02 21:14:52 +0000486 """Close the file, and for mode "w" and "a" write the ending
487 records."""
Tim Petersd15f8bb2001-11-28 23:16:40 +0000488 if self.fp is None:
489 return
Tim Peterse1190062001-01-15 03:34:38 +0000490 if self.mode in ("w", "a"): # write ending records
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000491 count = 0
492 pos1 = self.fp.tell()
Tim Peterse1190062001-01-15 03:34:38 +0000493 for zinfo in self.filelist: # write central directory
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000494 count = count + 1
495 dt = zinfo.date_time
496 dosdate = (dt[0] - 1980) << 9 | dt[1] << 5 | dt[2]
Tim Peters3caca232001-12-06 06:23:26 +0000497 dostime = dt[3] << 11 | dt[4] << 5 | (dt[5] // 2)
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000498 centdir = struct.pack(structCentralDir,
499 stringCentralDir, zinfo.create_version,
500 zinfo.create_system, zinfo.extract_version, zinfo.reserved,
501 zinfo.flag_bits, zinfo.compress_type, dostime, dosdate,
502 zinfo.CRC, zinfo.compress_size, zinfo.file_size,
503 len(zinfo.filename), len(zinfo.extra), len(zinfo.comment),
504 0, zinfo.internal_attr, zinfo.external_attr,
505 zinfo.header_offset)
506 self.fp.write(centdir)
507 self.fp.write(zinfo.filename)
508 self.fp.write(zinfo.extra)
509 self.fp.write(zinfo.comment)
510 pos2 = self.fp.tell()
511 # Write end-of-zip-archive record
512 endrec = struct.pack(structEndArchive, stringEndArchive,
513 0, 0, count, count, pos2 - pos1, pos1, 0)
514 self.fp.write(endrec)
Guido van Rossumf85af612001-04-14 16:45:14 +0000515 self.fp.flush()
Fred Drake3d9091e2001-03-26 15:49:24 +0000516 if not self._filePassed:
517 self.fp.close()
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000518 self.fp = None
519
520
521class PyZipFile(ZipFile):
Fred Drake484d7352000-10-02 21:14:52 +0000522 """Class to create ZIP archives with Python library files and packages."""
523
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000524 def writepy(self, pathname, basename = ""):
525 """Add all files from "pathname" to the ZIP archive.
526
Fred Drake484d7352000-10-02 21:14:52 +0000527 If pathname is a package directory, search the directory and
528 all package subdirectories recursively for all *.py and enter
529 the modules into the archive. If pathname is a plain
530 directory, listdir *.py and enter all modules. Else, pathname
531 must be a Python *.py file and the module will be put into the
532 archive. Added modules are always module.pyo or module.pyc.
533 This method will compile the module.py into module.pyc if
534 necessary.
535 """
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000536 dir, name = os.path.split(pathname)
537 if os.path.isdir(pathname):
538 initname = os.path.join(pathname, "__init__.py")
539 if os.path.isfile(initname):
540 # This is a package directory, add it
541 if basename:
542 basename = "%s/%s" % (basename, name)
543 else:
544 basename = name
545 if self.debug:
546 print "Adding package in", pathname, "as", basename
547 fname, arcname = self._get_codename(initname[0:-3], basename)
548 if self.debug:
549 print "Adding", arcname
550 self.write(fname, arcname)
551 dirlist = os.listdir(pathname)
552 dirlist.remove("__init__.py")
553 # Add all *.py files and package subdirectories
554 for filename in dirlist:
555 path = os.path.join(pathname, filename)
556 root, ext = os.path.splitext(filename)
557 if os.path.isdir(path):
558 if os.path.isfile(os.path.join(path, "__init__.py")):
559 # This is a package directory, add it
560 self.writepy(path, basename) # Recursive call
561 elif ext == ".py":
562 fname, arcname = self._get_codename(path[0:-3],
563 basename)
564 if self.debug:
565 print "Adding", arcname
566 self.write(fname, arcname)
567 else:
568 # This is NOT a package directory, add its files at top level
569 if self.debug:
570 print "Adding files from directory", pathname
571 for filename in os.listdir(pathname):
572 path = os.path.join(pathname, filename)
573 root, ext = os.path.splitext(filename)
574 if ext == ".py":
575 fname, arcname = self._get_codename(path[0:-3],
576 basename)
577 if self.debug:
578 print "Adding", arcname
579 self.write(fname, arcname)
580 else:
581 if pathname[-3:] != ".py":
582 raise RuntimeError, \
Fred Drake5db246d2000-09-29 20:44:48 +0000583 'Files added with writepy() must end with ".py"'
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000584 fname, arcname = self._get_codename(pathname[0:-3], basename)
585 if self.debug:
586 print "Adding file", arcname
587 self.write(fname, arcname)
588
589 def _get_codename(self, pathname, basename):
590 """Return (filename, archivename) for the path.
591
Fred Drake484d7352000-10-02 21:14:52 +0000592 Given a module name path, return the correct file path and
593 archive name, compiling if necessary. For example, given
594 /python/lib/string, return (/python/lib/string.pyc, string).
595 """
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000596 file_py = pathname + ".py"
597 file_pyc = pathname + ".pyc"
598 file_pyo = pathname + ".pyo"
599 if os.path.isfile(file_pyo) and \
Raymond Hettinger32200ae2002-06-01 19:51:15 +0000600 os.stat(file_pyo).st_mtime >= os.stat(file_py).st_mtime:
Tim Peterse1190062001-01-15 03:34:38 +0000601 fname = file_pyo # Use .pyo file
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000602 elif not os.path.isfile(file_pyc) or \
Raymond Hettinger32200ae2002-06-01 19:51:15 +0000603 os.stat(file_pyc).st_mtime < os.stat(file_py).st_mtime:
Fred Drake484d7352000-10-02 21:14:52 +0000604 import py_compile
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000605 if self.debug:
606 print "Compiling", file_py
607 py_compile.compile(file_py, file_pyc)
608 fname = file_pyc
609 else:
610 fname = file_pyc
611 archivename = os.path.split(fname)[1]
612 if basename:
613 archivename = "%s/%s" % (basename, archivename)
614 return (fname, archivename)