blob: 4f2b9468a941ffecc3d520e0bb3cd4e970592f75 [file] [log] [blame]
Fred Drake484d7352000-10-02 21:14:52 +00001"Read and write ZIP files."
Guido van Rossum32abe6f2000-03-31 17:30:02 +00002# Written by James C. Ahlstrom jim@interet.com
3# All rights transferred to CNRI pursuant to the Python contribution agreement
4
5import struct, os, time
Fred Drake484d7352000-10-02 21:14:52 +00006import binascii
Guido van Rossum32abe6f2000-03-31 17:30:02 +00007
8try:
Tim Peterse1190062001-01-15 03:34:38 +00009 import zlib # We may need its compression method
Guido van Rossum9c673f32001-04-10 15:37:12 +000010except ImportError:
Guido van Rossum32abe6f2000-03-31 17:30:02 +000011 zlib = None
12
Skip Montanaro40fc1602001-03-01 04:27:19 +000013__all__ = ["BadZipfile", "error", "ZIP_STORED", "ZIP_DEFLATED", "is_zipfile",
14 "ZipInfo", "ZipFile", "PyZipFile"]
15
Fred Drake5db246d2000-09-29 20:44:48 +000016class BadZipfile(Exception):
Guido van Rossum32abe6f2000-03-31 17:30:02 +000017 pass
Tim Peterse1190062001-01-15 03:34:38 +000018error = BadZipfile # The exception raised by this module
Guido van Rossum32abe6f2000-03-31 17:30:02 +000019
20# constants for Zip file compression methods
21ZIP_STORED = 0
22ZIP_DEFLATED = 8
23# Other ZIP compression methods not supported
24
25# Here are some struct module formats for reading headers
26structEndArchive = "<4s4H2lH" # 9 items, end of archive, 22 bytes
27stringEndArchive = "PK\005\006" # magic number for end of archive record
Guido van Rossum1ae4c3d2002-08-12 15:15:51 +000028structCentralDir = "<4s4B4H3l5HLl"# 19 items, central directory, 46 bytes
Guido van Rossum32abe6f2000-03-31 17:30:02 +000029stringCentralDir = "PK\001\002" # magic number for central directory
30structFileHeader = "<4s2B4H3l2H" # 12 items, file header record, 30 bytes
31stringFileHeader = "PK\003\004" # magic number for file header
32
Fred Drake3e038e52001-02-28 17:56:26 +000033# indexes of entries in the central directory structure
34_CD_SIGNATURE = 0
35_CD_CREATE_VERSION = 1
36_CD_CREATE_SYSTEM = 2
37_CD_EXTRACT_VERSION = 3
38_CD_EXTRACT_SYSTEM = 4 # is this meaningful?
39_CD_FLAG_BITS = 5
40_CD_COMPRESS_TYPE = 6
41_CD_TIME = 7
42_CD_DATE = 8
43_CD_CRC = 9
44_CD_COMPRESSED_SIZE = 10
45_CD_UNCOMPRESSED_SIZE = 11
46_CD_FILENAME_LENGTH = 12
47_CD_EXTRA_FIELD_LENGTH = 13
48_CD_COMMENT_LENGTH = 14
49_CD_DISK_NUMBER_START = 15
50_CD_INTERNAL_FILE_ATTRIBUTES = 16
51_CD_EXTERNAL_FILE_ATTRIBUTES = 17
52_CD_LOCAL_HEADER_OFFSET = 18
53
54# indexes of entries in the local file header structure
55_FH_SIGNATURE = 0
56_FH_EXTRACT_VERSION = 1
57_FH_EXTRACT_SYSTEM = 2 # is this meaningful?
58_FH_GENERAL_PURPOSE_FLAG_BITS = 3
59_FH_COMPRESSION_METHOD = 4
60_FH_LAST_MOD_TIME = 5
61_FH_LAST_MOD_DATE = 6
62_FH_CRC = 7
63_FH_COMPRESSED_SIZE = 8
64_FH_UNCOMPRESSED_SIZE = 9
65_FH_FILENAME_LENGTH = 10
66_FH_EXTRA_FIELD_LENGTH = 11
67
Guido van Rossum32abe6f2000-03-31 17:30:02 +000068def is_zipfile(filename):
Martin v. Löwis6f6873b2002-10-13 13:54:50 +000069 """Quickly see if file is a ZIP file by checking the magic number."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +000070 try:
71 fpin = open(filename, "rb")
Martin v. Löwis6f6873b2002-10-13 13:54:50 +000072 endrec = _EndRecData(fpin)
Guido van Rossum32abe6f2000-03-31 17:30:02 +000073 fpin.close()
Martin v. Löwis6f6873b2002-10-13 13:54:50 +000074 if endrec:
Guido van Rossum8ca162f2002-04-07 06:36:23 +000075 return True # file has correct magic number
Fred Drake7e473802001-05-11 19:52:57 +000076 except IOError:
Guido van Rossum32abe6f2000-03-31 17:30:02 +000077 pass
Guido van Rossum8ca162f2002-04-07 06:36:23 +000078 return False
Guido van Rossum32abe6f2000-03-31 17:30:02 +000079
Martin v. Löwis6f6873b2002-10-13 13:54:50 +000080def _EndRecData(fpin):
81 """Return data from the "End of Central Directory" record, or None.
82
83 The data is a list of the nine items in the ZIP "End of central dir"
84 record followed by a tenth item, the file seek offset of this record."""
85 fpin.seek(-22, 2) # Assume no archive comment.
86 filesize = fpin.tell() + 22 # Get file size
87 data = fpin.read()
88 if data[0:4] == stringEndArchive and data[-2:] == "\000\000":
89 endrec = struct.unpack(structEndArchive, data)
90 endrec = list(endrec)
91 endrec.append("") # Append the archive comment
92 endrec.append(filesize - 22) # Append the record start offset
93 return endrec
94 # Search the last END_BLOCK bytes of the file for the record signature.
95 # The comment is appended to the ZIP file and has a 16 bit length.
96 # So the comment may be up to 64K long. We limit the search for the
97 # signature to a few Kbytes at the end of the file for efficiency.
98 # also, the signature must not appear in the comment.
99 END_BLOCK = min(filesize, 1024 * 4)
100 fpin.seek(filesize - END_BLOCK, 0)
101 data = fpin.read()
102 start = data.rfind(stringEndArchive)
103 if start >= 0: # Correct signature string was found
104 endrec = struct.unpack(structEndArchive, data[start:start+22])
105 endrec = list(endrec)
106 comment = data[start+22:]
107 if endrec[7] == len(comment): # Comment length checks out
108 # Append the archive comment and start offset
109 endrec.append(comment)
110 endrec.append(filesize - END_BLOCK + start)
111 return endrec
112 return # Error, return None
113
Fred Drake484d7352000-10-02 21:14:52 +0000114
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000115class ZipInfo:
Fred Drake484d7352000-10-02 21:14:52 +0000116 """Class with attributes describing each file in the ZIP archive."""
117
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000118 def __init__(self, filename="NoName", date_time=(1980,1,1,0,0,0)):
Fred Drakea58947f2001-07-19 19:44:25 +0000119 self.filename = _normpath(filename) # Name of the file in the archive
Tim Peterse1190062001-01-15 03:34:38 +0000120 self.date_time = date_time # year, month, day, hour, min, sec
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000121 # Standard values:
Tim Peterse1190062001-01-15 03:34:38 +0000122 self.compress_type = ZIP_STORED # Type of compression for the file
123 self.comment = "" # Comment for each file
124 self.extra = "" # ZIP extra data
125 self.create_system = 0 # System which created ZIP archive
126 self.create_version = 20 # Version which created ZIP archive
127 self.extract_version = 20 # Version needed to extract archive
128 self.reserved = 0 # Must be zero
129 self.flag_bits = 0 # ZIP flag bits
130 self.volume = 0 # Volume number of file header
131 self.internal_attr = 0 # Internal attributes
132 self.external_attr = 0 # External file attributes
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000133 # Other attributes are set by class ZipFile:
Tim Peterse1190062001-01-15 03:34:38 +0000134 # header_offset Byte offset to the file header
135 # file_offset Byte offset to the start of the file data
136 # CRC CRC-32 of the uncompressed file
137 # compress_size Size of the compressed file
138 # file_size Size of the uncompressed file
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000139
140 def FileHeader(self):
Fred Drake484d7352000-10-02 21:14:52 +0000141 """Return the per-file header as a string."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000142 dt = self.date_time
143 dosdate = (dt[0] - 1980) << 9 | dt[1] << 5 | dt[2]
Tim Peters3caca232001-12-06 06:23:26 +0000144 dostime = dt[3] << 11 | dt[4] << 5 | (dt[5] // 2)
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000145 if self.flag_bits & 0x08:
Tim Peterse1190062001-01-15 03:34:38 +0000146 # Set these to zero because we write them after the file data
147 CRC = compress_size = file_size = 0
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000148 else:
Tim Peterse1190062001-01-15 03:34:38 +0000149 CRC = self.CRC
150 compress_size = self.compress_size
151 file_size = self.file_size
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000152 header = struct.pack(structFileHeader, stringFileHeader,
153 self.extract_version, self.reserved, self.flag_bits,
154 self.compress_type, dostime, dosdate, CRC,
155 compress_size, file_size,
156 len(self.filename), len(self.extra))
157 return header + self.filename + self.extra
158
159
Fred Drakea58947f2001-07-19 19:44:25 +0000160# This is used to ensure paths in generated ZIP files always use
161# forward slashes as the directory separator, as required by the
162# ZIP format specification.
163if os.sep != "/":
164 def _normpath(path):
165 return path.replace(os.sep, "/")
166else:
167 def _normpath(path):
168 return path
169
170
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000171class ZipFile:
Tim Petersa19a1682001-03-29 04:36:09 +0000172 """ Class with methods to open, read, write, close, list zip files.
173
Fred Drake3d9091e2001-03-26 15:49:24 +0000174 z = ZipFile(file, mode="r", compression=ZIP_STORED)
Tim Petersa19a1682001-03-29 04:36:09 +0000175
Fred Drake3d9091e2001-03-26 15:49:24 +0000176 file: Either the path to the file, or a file-like object.
177 If it is a path, the file will be opened and closed by ZipFile.
178 mode: The mode can be either read "r", write "w" or append "a".
179 compression: ZIP_STORED (no compression) or ZIP_DEFLATED (requires zlib).
180 """
Fred Drake484d7352000-10-02 21:14:52 +0000181
Fred Drake90eac282001-02-28 05:29:34 +0000182 fp = None # Set here since __del__ checks it
183
Fred Drake3d9091e2001-03-26 15:49:24 +0000184 def __init__(self, file, mode="r", compression=ZIP_STORED):
Fred Drake484d7352000-10-02 21:14:52 +0000185 """Open the ZIP file with mode read "r", write "w" or append "a"."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000186 if compression == ZIP_STORED:
187 pass
188 elif compression == ZIP_DEFLATED:
189 if not zlib:
190 raise RuntimeError,\
Fred Drake5db246d2000-09-29 20:44:48 +0000191 "Compression requires the (missing) zlib module"
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000192 else:
193 raise RuntimeError, "That compression method is not supported"
Tim Peterse1190062001-01-15 03:34:38 +0000194 self.debug = 0 # Level of printing: 0 through 3
195 self.NameToInfo = {} # Find file info given name
196 self.filelist = [] # List of ZipInfo instances for archive
197 self.compression = compression # Method of compression
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000198 self.mode = key = mode[0]
Tim Petersa19a1682001-03-29 04:36:09 +0000199
Fred Drake3d9091e2001-03-26 15:49:24 +0000200 # Check if we were passed a file-like object
Walter Dörwald65230a22002-06-03 15:58:32 +0000201 if isinstance(file, basestring):
Fred Drake3d9091e2001-03-26 15:49:24 +0000202 self._filePassed = 0
203 self.filename = file
204 modeDict = {'r' : 'rb', 'w': 'wb', 'a' : 'r+b'}
205 self.fp = open(file, modeDict[mode])
206 else:
207 self._filePassed = 1
208 self.fp = file
209 self.filename = getattr(file, 'name', None)
Tim Petersa19a1682001-03-29 04:36:09 +0000210
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000211 if key == 'r':
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000212 self._GetContents()
213 elif key == 'w':
Fred Drake3d9091e2001-03-26 15:49:24 +0000214 pass
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000215 elif key == 'a':
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000216 try: # See if file is a zip file
217 self._RealGetContents()
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000218 # seek to start of directory and overwrite
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000219 self.fp.seek(self.start_dir, 0)
220 except BadZipfile: # file is not a zip file, just append
221 self.fp.seek(0, 2)
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000222 else:
Tim Peters7d3bad62001-04-04 18:56:49 +0000223 if not self._filePassed:
224 self.fp.close()
225 self.fp = None
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000226 raise RuntimeError, 'Mode must be "r", "w" or "a"'
227
228 def _GetContents(self):
Tim Peters7d3bad62001-04-04 18:56:49 +0000229 """Read the directory, making sure we close the file if the format
230 is bad."""
231 try:
232 self._RealGetContents()
233 except BadZipfile:
234 if not self._filePassed:
235 self.fp.close()
236 self.fp = None
237 raise
238
239 def _RealGetContents(self):
Fred Drake484d7352000-10-02 21:14:52 +0000240 """Read in the table of contents for the ZIP file."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000241 fp = self.fp
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000242 endrec = _EndRecData(fp)
243 if not endrec:
244 raise BadZipfile, "File is not a zip file"
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000245 if self.debug > 1:
246 print endrec
Tim Peterse1190062001-01-15 03:34:38 +0000247 size_cd = endrec[5] # bytes in central directory
248 offset_cd = endrec[6] # offset of central directory
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000249 self.comment = endrec[8] # archive comment
250 # endrec[9] is the offset of the "End of Central Dir" record
251 x = endrec[9] - size_cd
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000252 # "concat" is zero, unless zip was concatenated to another file
253 concat = x - offset_cd
254 if self.debug > 2:
255 print "given, inferred, offset", offset_cd, x, concat
256 # self.start_dir: Position of start of central directory
257 self.start_dir = offset_cd + concat
258 fp.seek(self.start_dir, 0)
259 total = 0
260 while total < size_cd:
261 centdir = fp.read(46)
262 total = total + 46
263 if centdir[0:4] != stringCentralDir:
264 raise BadZipfile, "Bad magic number for central directory"
265 centdir = struct.unpack(structCentralDir, centdir)
266 if self.debug > 2:
267 print centdir
Fred Drake3e038e52001-02-28 17:56:26 +0000268 filename = fp.read(centdir[_CD_FILENAME_LENGTH])
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000269 # Create ZipInfo instance to store file information
270 x = ZipInfo(filename)
Fred Drake3e038e52001-02-28 17:56:26 +0000271 x.extra = fp.read(centdir[_CD_EXTRA_FIELD_LENGTH])
272 x.comment = fp.read(centdir[_CD_COMMENT_LENGTH])
273 total = (total + centdir[_CD_FILENAME_LENGTH]
274 + centdir[_CD_EXTRA_FIELD_LENGTH]
275 + centdir[_CD_COMMENT_LENGTH])
276 x.header_offset = centdir[_CD_LOCAL_HEADER_OFFSET] + concat
277 # file_offset must be computed below...
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000278 (x.create_version, x.create_system, x.extract_version, x.reserved,
279 x.flag_bits, x.compress_type, t, d,
280 x.CRC, x.compress_size, x.file_size) = centdir[1:12]
281 x.volume, x.internal_attr, x.external_attr = centdir[15:18]
282 # Convert date/time code to (year, month, day, hour, min, sec)
283 x.date_time = ( (d>>9)+1980, (d>>5)&0xF, d&0x1F,
Fred Drake414ca662000-06-13 18:49:53 +0000284 t>>11, (t>>5)&0x3F, (t&0x1F) * 2 )
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000285 self.filelist.append(x)
286 self.NameToInfo[x.filename] = x
287 if self.debug > 2:
288 print "total", total
289 for data in self.filelist:
290 fp.seek(data.header_offset, 0)
291 fheader = fp.read(30)
292 if fheader[0:4] != stringFileHeader:
293 raise BadZipfile, "Bad magic number for file header"
294 fheader = struct.unpack(structFileHeader, fheader)
Fred Drake3e038e52001-02-28 17:56:26 +0000295 # file_offset is computed here, since the extra field for
296 # the central directory and for the local file header
297 # refer to different fields, and they can have different
298 # lengths
299 data.file_offset = (data.header_offset + 30
300 + fheader[_FH_FILENAME_LENGTH]
301 + fheader[_FH_EXTRA_FIELD_LENGTH])
302 fname = fp.read(fheader[_FH_FILENAME_LENGTH])
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000303 if fname != data.filename:
304 raise RuntimeError, \
Fred Drake5db246d2000-09-29 20:44:48 +0000305 'File name in directory "%s" and header "%s" differ.' % (
306 data.filename, fname)
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000307
308 def namelist(self):
Fred Drake484d7352000-10-02 21:14:52 +0000309 """Return a list of file names in the archive."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000310 l = []
311 for data in self.filelist:
312 l.append(data.filename)
313 return l
314
315 def infolist(self):
Fred Drake484d7352000-10-02 21:14:52 +0000316 """Return a list of class ZipInfo instances for files in the
317 archive."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000318 return self.filelist
319
320 def printdir(self):
Fred Drake484d7352000-10-02 21:14:52 +0000321 """Print a table of contents for the zip file."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000322 print "%-46s %19s %12s" % ("File Name", "Modified ", "Size")
323 for zinfo in self.filelist:
324 date = "%d-%02d-%02d %02d:%02d:%02d" % zinfo.date_time
325 print "%-46s %s %12d" % (zinfo.filename, date, zinfo.file_size)
326
327 def testzip(self):
Fred Drake484d7352000-10-02 21:14:52 +0000328 """Read all the files and check the CRC."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000329 for zinfo in self.filelist:
330 try:
Tim Peterse1190062001-01-15 03:34:38 +0000331 self.read(zinfo.filename) # Check CRC-32
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000332 except:
333 return zinfo.filename
334
335 def getinfo(self, name):
Fred Drake484d7352000-10-02 21:14:52 +0000336 """Return the instance of ZipInfo given 'name'."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000337 return self.NameToInfo[name]
338
339 def read(self, name):
Fred Drake484d7352000-10-02 21:14:52 +0000340 """Return file bytes (as a string) for name."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000341 if self.mode not in ("r", "a"):
342 raise RuntimeError, 'read() requires mode "r" or "a"'
343 if not self.fp:
344 raise RuntimeError, \
Fred Drake5db246d2000-09-29 20:44:48 +0000345 "Attempt to read ZIP archive that was already closed"
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000346 zinfo = self.getinfo(name)
347 filepos = self.fp.tell()
348 self.fp.seek(zinfo.file_offset, 0)
349 bytes = self.fp.read(zinfo.compress_size)
350 self.fp.seek(filepos, 0)
351 if zinfo.compress_type == ZIP_STORED:
352 pass
353 elif zinfo.compress_type == ZIP_DEFLATED:
354 if not zlib:
355 raise RuntimeError, \
Fred Drake5db246d2000-09-29 20:44:48 +0000356 "De-compression requires the (missing) zlib module"
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000357 # zlib compress/decompress code by Jeremy Hylton of CNRI
358 dc = zlib.decompressobj(-15)
359 bytes = dc.decompress(bytes)
360 # need to feed in unused pad byte so that zlib won't choke
361 ex = dc.decompress('Z') + dc.flush()
362 if ex:
363 bytes = bytes + ex
364 else:
365 raise BadZipfile, \
Fred Drake5db246d2000-09-29 20:44:48 +0000366 "Unsupported compression method %d for file %s" % \
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000367 (zinfo.compress_type, name)
368 crc = binascii.crc32(bytes)
369 if crc != zinfo.CRC:
370 raise BadZipfile, "Bad CRC-32 for file %s" % name
371 return bytes
372
373 def _writecheck(self, zinfo):
Fred Drake484d7352000-10-02 21:14:52 +0000374 """Check for errors before writing a file to the archive."""
Raymond Hettinger54f02222002-06-01 14:18:47 +0000375 if zinfo.filename in self.NameToInfo:
Tim Peterse1190062001-01-15 03:34:38 +0000376 if self.debug: # Warning for duplicate names
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000377 print "Duplicate name:", zinfo.filename
378 if self.mode not in ("w", "a"):
379 raise RuntimeError, 'write() requires mode "w" or "a"'
380 if not self.fp:
381 raise RuntimeError, \
Fred Drake5db246d2000-09-29 20:44:48 +0000382 "Attempt to write ZIP archive that was already closed"
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000383 if zinfo.compress_type == ZIP_DEFLATED and not zlib:
384 raise RuntimeError, \
Fred Drake5db246d2000-09-29 20:44:48 +0000385 "Compression requires the (missing) zlib module"
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000386 if zinfo.compress_type not in (ZIP_STORED, ZIP_DEFLATED):
387 raise RuntimeError, \
Fred Drake5db246d2000-09-29 20:44:48 +0000388 "That compression method is not supported"
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000389
390 def write(self, filename, arcname=None, compress_type=None):
Fred Drake484d7352000-10-02 21:14:52 +0000391 """Put the bytes from filename into the archive under the name
392 arcname."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000393 st = os.stat(filename)
Raymond Hettinger32200ae2002-06-01 19:51:15 +0000394 mtime = time.localtime(st.st_mtime)
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000395 date_time = mtime[0:6]
396 # Create ZipInfo instance to store file information
397 if arcname is None:
Tim Peterse1190062001-01-15 03:34:38 +0000398 zinfo = ZipInfo(filename, date_time)
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000399 else:
Tim Peterse1190062001-01-15 03:34:38 +0000400 zinfo = ZipInfo(arcname, date_time)
Guido van Rossum1ae4c3d2002-08-12 15:15:51 +0000401 zinfo.external_attr = st[0] << 16L # Unix attributes
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000402 if compress_type is None:
Tim Peterse1190062001-01-15 03:34:38 +0000403 zinfo.compress_type = self.compression
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000404 else:
Tim Peterse1190062001-01-15 03:34:38 +0000405 zinfo.compress_type = compress_type
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000406 self._writecheck(zinfo)
407 fp = open(filename, "rb")
Finn Bock03a3bb82001-09-05 18:40:33 +0000408 zinfo.flag_bits = 0x00
Tim Peterse1190062001-01-15 03:34:38 +0000409 zinfo.header_offset = self.fp.tell() # Start of header bytes
Finn Bock03a3bb82001-09-05 18:40:33 +0000410 # Must overwrite CRC and sizes with correct data later
411 zinfo.CRC = CRC = 0
412 zinfo.compress_size = compress_size = 0
413 zinfo.file_size = file_size = 0
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000414 self.fp.write(zinfo.FileHeader())
Tim Peterse1190062001-01-15 03:34:38 +0000415 zinfo.file_offset = self.fp.tell() # Start of file bytes
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000416 if zinfo.compress_type == ZIP_DEFLATED:
417 cmpr = zlib.compressobj(zlib.Z_DEFAULT_COMPRESSION,
418 zlib.DEFLATED, -15)
419 else:
420 cmpr = None
421 while 1:
422 buf = fp.read(1024 * 8)
423 if not buf:
424 break
425 file_size = file_size + len(buf)
426 CRC = binascii.crc32(buf, CRC)
427 if cmpr:
428 buf = cmpr.compress(buf)
429 compress_size = compress_size + len(buf)
430 self.fp.write(buf)
431 fp.close()
432 if cmpr:
433 buf = cmpr.flush()
434 compress_size = compress_size + len(buf)
435 self.fp.write(buf)
436 zinfo.compress_size = compress_size
437 else:
438 zinfo.compress_size = file_size
439 zinfo.CRC = CRC
440 zinfo.file_size = file_size
Finn Bock03a3bb82001-09-05 18:40:33 +0000441 # Seek backwards and write CRC and file sizes
Tim Petersb64bec32001-09-18 02:26:39 +0000442 position = self.fp.tell() # Preserve current position in file
Finn Bock03a3bb82001-09-05 18:40:33 +0000443 self.fp.seek(zinfo.header_offset + 14, 0)
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000444 self.fp.write(struct.pack("<lll", zinfo.CRC, zinfo.compress_size,
445 zinfo.file_size))
Finn Bock03a3bb82001-09-05 18:40:33 +0000446 self.fp.seek(position, 0)
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000447 self.filelist.append(zinfo)
448 self.NameToInfo[zinfo.filename] = zinfo
449
450 def writestr(self, zinfo, bytes):
Fred Drake484d7352000-10-02 21:14:52 +0000451 """Write a file into the archive. The contents is the string
452 'bytes'."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000453 self._writecheck(zinfo)
Tim Peterse1190062001-01-15 03:34:38 +0000454 zinfo.file_size = len(bytes) # Uncompressed size
455 zinfo.CRC = binascii.crc32(bytes) # CRC-32 checksum
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000456 if zinfo.compress_type == ZIP_DEFLATED:
457 co = zlib.compressobj(zlib.Z_DEFAULT_COMPRESSION,
458 zlib.DEFLATED, -15)
459 bytes = co.compress(bytes) + co.flush()
Tim Peterse1190062001-01-15 03:34:38 +0000460 zinfo.compress_size = len(bytes) # Compressed size
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000461 else:
462 zinfo.compress_size = zinfo.file_size
Tim Peterse1190062001-01-15 03:34:38 +0000463 zinfo.header_offset = self.fp.tell() # Start of header bytes
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000464 self.fp.write(zinfo.FileHeader())
Tim Peterse1190062001-01-15 03:34:38 +0000465 zinfo.file_offset = self.fp.tell() # Start of file bytes
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000466 self.fp.write(bytes)
467 if zinfo.flag_bits & 0x08:
Tim Peterse1190062001-01-15 03:34:38 +0000468 # Write CRC and file sizes after the file data
469 self.fp.write(struct.pack("<lll", zinfo.CRC, zinfo.compress_size,
470 zinfo.file_size))
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000471 self.filelist.append(zinfo)
472 self.NameToInfo[zinfo.filename] = zinfo
473
474 def __del__(self):
Fred Drake484d7352000-10-02 21:14:52 +0000475 """Call the "close()" method in case the user forgot."""
Tim Petersd15f8bb2001-11-28 23:16:40 +0000476 self.close()
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000477
478 def close(self):
Fred Drake484d7352000-10-02 21:14:52 +0000479 """Close the file, and for mode "w" and "a" write the ending
480 records."""
Tim Petersd15f8bb2001-11-28 23:16:40 +0000481 if self.fp is None:
482 return
Tim Peterse1190062001-01-15 03:34:38 +0000483 if self.mode in ("w", "a"): # write ending records
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000484 count = 0
485 pos1 = self.fp.tell()
Tim Peterse1190062001-01-15 03:34:38 +0000486 for zinfo in self.filelist: # write central directory
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000487 count = count + 1
488 dt = zinfo.date_time
489 dosdate = (dt[0] - 1980) << 9 | dt[1] << 5 | dt[2]
Tim Peters3caca232001-12-06 06:23:26 +0000490 dostime = dt[3] << 11 | dt[4] << 5 | (dt[5] // 2)
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000491 centdir = struct.pack(structCentralDir,
492 stringCentralDir, zinfo.create_version,
493 zinfo.create_system, zinfo.extract_version, zinfo.reserved,
494 zinfo.flag_bits, zinfo.compress_type, dostime, dosdate,
495 zinfo.CRC, zinfo.compress_size, zinfo.file_size,
496 len(zinfo.filename), len(zinfo.extra), len(zinfo.comment),
497 0, zinfo.internal_attr, zinfo.external_attr,
498 zinfo.header_offset)
499 self.fp.write(centdir)
500 self.fp.write(zinfo.filename)
501 self.fp.write(zinfo.extra)
502 self.fp.write(zinfo.comment)
503 pos2 = self.fp.tell()
504 # Write end-of-zip-archive record
505 endrec = struct.pack(structEndArchive, stringEndArchive,
506 0, 0, count, count, pos2 - pos1, pos1, 0)
507 self.fp.write(endrec)
Guido van Rossumf85af612001-04-14 16:45:14 +0000508 self.fp.flush()
Fred Drake3d9091e2001-03-26 15:49:24 +0000509 if not self._filePassed:
510 self.fp.close()
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000511 self.fp = None
512
513
514class PyZipFile(ZipFile):
Fred Drake484d7352000-10-02 21:14:52 +0000515 """Class to create ZIP archives with Python library files and packages."""
516
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000517 def writepy(self, pathname, basename = ""):
518 """Add all files from "pathname" to the ZIP archive.
519
Fred Drake484d7352000-10-02 21:14:52 +0000520 If pathname is a package directory, search the directory and
521 all package subdirectories recursively for all *.py and enter
522 the modules into the archive. If pathname is a plain
523 directory, listdir *.py and enter all modules. Else, pathname
524 must be a Python *.py file and the module will be put into the
525 archive. Added modules are always module.pyo or module.pyc.
526 This method will compile the module.py into module.pyc if
527 necessary.
528 """
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000529 dir, name = os.path.split(pathname)
530 if os.path.isdir(pathname):
531 initname = os.path.join(pathname, "__init__.py")
532 if os.path.isfile(initname):
533 # This is a package directory, add it
534 if basename:
535 basename = "%s/%s" % (basename, name)
536 else:
537 basename = name
538 if self.debug:
539 print "Adding package in", pathname, "as", basename
540 fname, arcname = self._get_codename(initname[0:-3], basename)
541 if self.debug:
542 print "Adding", arcname
543 self.write(fname, arcname)
544 dirlist = os.listdir(pathname)
545 dirlist.remove("__init__.py")
546 # Add all *.py files and package subdirectories
547 for filename in dirlist:
548 path = os.path.join(pathname, filename)
549 root, ext = os.path.splitext(filename)
550 if os.path.isdir(path):
551 if os.path.isfile(os.path.join(path, "__init__.py")):
552 # This is a package directory, add it
553 self.writepy(path, basename) # Recursive call
554 elif ext == ".py":
555 fname, arcname = self._get_codename(path[0:-3],
556 basename)
557 if self.debug:
558 print "Adding", arcname
559 self.write(fname, arcname)
560 else:
561 # This is NOT a package directory, add its files at top level
562 if self.debug:
563 print "Adding files from directory", pathname
564 for filename in os.listdir(pathname):
565 path = os.path.join(pathname, filename)
566 root, ext = os.path.splitext(filename)
567 if ext == ".py":
568 fname, arcname = self._get_codename(path[0:-3],
569 basename)
570 if self.debug:
571 print "Adding", arcname
572 self.write(fname, arcname)
573 else:
574 if pathname[-3:] != ".py":
575 raise RuntimeError, \
Fred Drake5db246d2000-09-29 20:44:48 +0000576 'Files added with writepy() must end with ".py"'
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000577 fname, arcname = self._get_codename(pathname[0:-3], basename)
578 if self.debug:
579 print "Adding file", arcname
580 self.write(fname, arcname)
581
582 def _get_codename(self, pathname, basename):
583 """Return (filename, archivename) for the path.
584
Fred Drake484d7352000-10-02 21:14:52 +0000585 Given a module name path, return the correct file path and
586 archive name, compiling if necessary. For example, given
587 /python/lib/string, return (/python/lib/string.pyc, string).
588 """
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000589 file_py = pathname + ".py"
590 file_pyc = pathname + ".pyc"
591 file_pyo = pathname + ".pyo"
592 if os.path.isfile(file_pyo) and \
Raymond Hettinger32200ae2002-06-01 19:51:15 +0000593 os.stat(file_pyo).st_mtime >= os.stat(file_py).st_mtime:
Tim Peterse1190062001-01-15 03:34:38 +0000594 fname = file_pyo # Use .pyo file
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000595 elif not os.path.isfile(file_pyc) or \
Raymond Hettinger32200ae2002-06-01 19:51:15 +0000596 os.stat(file_pyc).st_mtime < os.stat(file_py).st_mtime:
Fred Drake484d7352000-10-02 21:14:52 +0000597 import py_compile
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000598 if self.debug:
599 print "Compiling", file_py
600 py_compile.compile(file_py, file_pyc)
601 fname = file_pyc
602 else:
603 fname = file_pyc
604 archivename = os.path.split(fname)[1]
605 if basename:
606 archivename = "%s/%s" % (basename, archivename)
607 return (fname, archivename)