blob: 2df16ba7ee8a966365492a9ad642a3ddd05ee849 [file] [log] [blame]
Fred Drake484d7352000-10-02 21:14:52 +00001"Read and write ZIP files."
Guido van Rossum32abe6f2000-03-31 17:30:02 +00002# Written by James C. Ahlstrom jim@interet.com
3# All rights transferred to CNRI pursuant to the Python contribution agreement
4
5import struct, os, time
Fred Drake484d7352000-10-02 21:14:52 +00006import binascii
Guido van Rossum32abe6f2000-03-31 17:30:02 +00007
8try:
Tim Peterse1190062001-01-15 03:34:38 +00009 import zlib # We may need its compression method
Guido van Rossum9c673f32001-04-10 15:37:12 +000010except ImportError:
Guido van Rossum32abe6f2000-03-31 17:30:02 +000011 zlib = None
12
Skip Montanaro40fc1602001-03-01 04:27:19 +000013__all__ = ["BadZipfile", "error", "ZIP_STORED", "ZIP_DEFLATED", "is_zipfile",
14 "ZipInfo", "ZipFile", "PyZipFile"]
15
Fred Drake5db246d2000-09-29 20:44:48 +000016class BadZipfile(Exception):
Guido van Rossum32abe6f2000-03-31 17:30:02 +000017 pass
Tim Peterse1190062001-01-15 03:34:38 +000018error = BadZipfile # The exception raised by this module
Guido van Rossum32abe6f2000-03-31 17:30:02 +000019
20# constants for Zip file compression methods
21ZIP_STORED = 0
22ZIP_DEFLATED = 8
23# Other ZIP compression methods not supported
24
25# Here are some struct module formats for reading headers
26structEndArchive = "<4s4H2lH" # 9 items, end of archive, 22 bytes
27stringEndArchive = "PK\005\006" # magic number for end of archive record
Guido van Rossum1ae4c3d2002-08-12 15:15:51 +000028structCentralDir = "<4s4B4H3l5HLl"# 19 items, central directory, 46 bytes
Guido van Rossum32abe6f2000-03-31 17:30:02 +000029stringCentralDir = "PK\001\002" # magic number for central directory
30structFileHeader = "<4s2B4H3l2H" # 12 items, file header record, 30 bytes
31stringFileHeader = "PK\003\004" # magic number for file header
32
Fred Drake3e038e52001-02-28 17:56:26 +000033# indexes of entries in the central directory structure
34_CD_SIGNATURE = 0
35_CD_CREATE_VERSION = 1
36_CD_CREATE_SYSTEM = 2
37_CD_EXTRACT_VERSION = 3
38_CD_EXTRACT_SYSTEM = 4 # is this meaningful?
39_CD_FLAG_BITS = 5
40_CD_COMPRESS_TYPE = 6
41_CD_TIME = 7
42_CD_DATE = 8
43_CD_CRC = 9
44_CD_COMPRESSED_SIZE = 10
45_CD_UNCOMPRESSED_SIZE = 11
46_CD_FILENAME_LENGTH = 12
47_CD_EXTRA_FIELD_LENGTH = 13
48_CD_COMMENT_LENGTH = 14
49_CD_DISK_NUMBER_START = 15
50_CD_INTERNAL_FILE_ATTRIBUTES = 16
51_CD_EXTERNAL_FILE_ATTRIBUTES = 17
52_CD_LOCAL_HEADER_OFFSET = 18
53
54# indexes of entries in the local file header structure
55_FH_SIGNATURE = 0
56_FH_EXTRACT_VERSION = 1
57_FH_EXTRACT_SYSTEM = 2 # is this meaningful?
58_FH_GENERAL_PURPOSE_FLAG_BITS = 3
59_FH_COMPRESSION_METHOD = 4
60_FH_LAST_MOD_TIME = 5
61_FH_LAST_MOD_DATE = 6
62_FH_CRC = 7
63_FH_COMPRESSED_SIZE = 8
64_FH_UNCOMPRESSED_SIZE = 9
65_FH_FILENAME_LENGTH = 10
66_FH_EXTRA_FIELD_LENGTH = 11
67
Guido van Rossum32abe6f2000-03-31 17:30:02 +000068def is_zipfile(filename):
69 """Quickly see if file is a ZIP file by checking the magic number.
70
Fred Drake484d7352000-10-02 21:14:52 +000071 Will not accept a ZIP archive with an ending comment.
72 """
Guido van Rossum32abe6f2000-03-31 17:30:02 +000073 try:
74 fpin = open(filename, "rb")
Tim Peterse1190062001-01-15 03:34:38 +000075 fpin.seek(-22, 2) # Seek to end-of-file record
Guido van Rossum32abe6f2000-03-31 17:30:02 +000076 endrec = fpin.read()
77 fpin.close()
78 if endrec[0:4] == "PK\005\006" and endrec[-2:] == "\000\000":
Guido van Rossum8ca162f2002-04-07 06:36:23 +000079 return True # file has correct magic number
Fred Drake7e473802001-05-11 19:52:57 +000080 except IOError:
Guido van Rossum32abe6f2000-03-31 17:30:02 +000081 pass
Guido van Rossum8ca162f2002-04-07 06:36:23 +000082 return False
Guido van Rossum32abe6f2000-03-31 17:30:02 +000083
Fred Drake484d7352000-10-02 21:14:52 +000084
Guido van Rossum32abe6f2000-03-31 17:30:02 +000085class ZipInfo:
Fred Drake484d7352000-10-02 21:14:52 +000086 """Class with attributes describing each file in the ZIP archive."""
87
Guido van Rossum32abe6f2000-03-31 17:30:02 +000088 def __init__(self, filename="NoName", date_time=(1980,1,1,0,0,0)):
Fred Drakea58947f2001-07-19 19:44:25 +000089 self.filename = _normpath(filename) # Name of the file in the archive
Tim Peterse1190062001-01-15 03:34:38 +000090 self.date_time = date_time # year, month, day, hour, min, sec
Guido van Rossum32abe6f2000-03-31 17:30:02 +000091 # Standard values:
Tim Peterse1190062001-01-15 03:34:38 +000092 self.compress_type = ZIP_STORED # Type of compression for the file
93 self.comment = "" # Comment for each file
94 self.extra = "" # ZIP extra data
95 self.create_system = 0 # System which created ZIP archive
96 self.create_version = 20 # Version which created ZIP archive
97 self.extract_version = 20 # Version needed to extract archive
98 self.reserved = 0 # Must be zero
99 self.flag_bits = 0 # ZIP flag bits
100 self.volume = 0 # Volume number of file header
101 self.internal_attr = 0 # Internal attributes
102 self.external_attr = 0 # External file attributes
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000103 # Other attributes are set by class ZipFile:
Tim Peterse1190062001-01-15 03:34:38 +0000104 # header_offset Byte offset to the file header
105 # file_offset Byte offset to the start of the file data
106 # CRC CRC-32 of the uncompressed file
107 # compress_size Size of the compressed file
108 # file_size Size of the uncompressed file
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000109
110 def FileHeader(self):
Fred Drake484d7352000-10-02 21:14:52 +0000111 """Return the per-file header as a string."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000112 dt = self.date_time
113 dosdate = (dt[0] - 1980) << 9 | dt[1] << 5 | dt[2]
Tim Peters3caca232001-12-06 06:23:26 +0000114 dostime = dt[3] << 11 | dt[4] << 5 | (dt[5] // 2)
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000115 if self.flag_bits & 0x08:
Tim Peterse1190062001-01-15 03:34:38 +0000116 # Set these to zero because we write them after the file data
117 CRC = compress_size = file_size = 0
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000118 else:
Tim Peterse1190062001-01-15 03:34:38 +0000119 CRC = self.CRC
120 compress_size = self.compress_size
121 file_size = self.file_size
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000122 header = struct.pack(structFileHeader, stringFileHeader,
123 self.extract_version, self.reserved, self.flag_bits,
124 self.compress_type, dostime, dosdate, CRC,
125 compress_size, file_size,
126 len(self.filename), len(self.extra))
127 return header + self.filename + self.extra
128
129
Fred Drakea58947f2001-07-19 19:44:25 +0000130# This is used to ensure paths in generated ZIP files always use
131# forward slashes as the directory separator, as required by the
132# ZIP format specification.
133if os.sep != "/":
134 def _normpath(path):
135 return path.replace(os.sep, "/")
136else:
137 def _normpath(path):
138 return path
139
140
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000141class ZipFile:
Tim Petersa19a1682001-03-29 04:36:09 +0000142 """ Class with methods to open, read, write, close, list zip files.
143
Fred Drake3d9091e2001-03-26 15:49:24 +0000144 z = ZipFile(file, mode="r", compression=ZIP_STORED)
Tim Petersa19a1682001-03-29 04:36:09 +0000145
Fred Drake3d9091e2001-03-26 15:49:24 +0000146 file: Either the path to the file, or a file-like object.
147 If it is a path, the file will be opened and closed by ZipFile.
148 mode: The mode can be either read "r", write "w" or append "a".
149 compression: ZIP_STORED (no compression) or ZIP_DEFLATED (requires zlib).
150 """
Fred Drake484d7352000-10-02 21:14:52 +0000151
Fred Drake90eac282001-02-28 05:29:34 +0000152 fp = None # Set here since __del__ checks it
153
Fred Drake3d9091e2001-03-26 15:49:24 +0000154 def __init__(self, file, mode="r", compression=ZIP_STORED):
Fred Drake484d7352000-10-02 21:14:52 +0000155 """Open the ZIP file with mode read "r", write "w" or append "a"."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000156 if compression == ZIP_STORED:
157 pass
158 elif compression == ZIP_DEFLATED:
159 if not zlib:
160 raise RuntimeError,\
Fred Drake5db246d2000-09-29 20:44:48 +0000161 "Compression requires the (missing) zlib module"
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000162 else:
163 raise RuntimeError, "That compression method is not supported"
Tim Peterse1190062001-01-15 03:34:38 +0000164 self.debug = 0 # Level of printing: 0 through 3
165 self.NameToInfo = {} # Find file info given name
166 self.filelist = [] # List of ZipInfo instances for archive
167 self.compression = compression # Method of compression
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000168 self.mode = key = mode[0]
Tim Petersa19a1682001-03-29 04:36:09 +0000169
Fred Drake3d9091e2001-03-26 15:49:24 +0000170 # Check if we were passed a file-like object
Walter Dörwald65230a22002-06-03 15:58:32 +0000171 if isinstance(file, basestring):
Fred Drake3d9091e2001-03-26 15:49:24 +0000172 self._filePassed = 0
173 self.filename = file
174 modeDict = {'r' : 'rb', 'w': 'wb', 'a' : 'r+b'}
175 self.fp = open(file, modeDict[mode])
176 else:
177 self._filePassed = 1
178 self.fp = file
179 self.filename = getattr(file, 'name', None)
Tim Petersa19a1682001-03-29 04:36:09 +0000180
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000181 if key == 'r':
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000182 self._GetContents()
183 elif key == 'w':
Fred Drake3d9091e2001-03-26 15:49:24 +0000184 pass
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000185 elif key == 'a':
Fred Drake3d9091e2001-03-26 15:49:24 +0000186 fp = self.fp
Tim Peterse1190062001-01-15 03:34:38 +0000187 fp.seek(-22, 2) # Seek to end-of-file record
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000188 endrec = fp.read()
189 if endrec[0:4] == stringEndArchive and \
190 endrec[-2:] == "\000\000":
Tim Peterse1190062001-01-15 03:34:38 +0000191 self._GetContents() # file is a zip file
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000192 # seek to start of directory and overwrite
193 fp.seek(self.start_dir, 0)
Tim Peterse1190062001-01-15 03:34:38 +0000194 else: # file is not a zip file, just append
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000195 fp.seek(0, 2)
196 else:
Tim Peters7d3bad62001-04-04 18:56:49 +0000197 if not self._filePassed:
198 self.fp.close()
199 self.fp = None
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000200 raise RuntimeError, 'Mode must be "r", "w" or "a"'
201
202 def _GetContents(self):
Tim Peters7d3bad62001-04-04 18:56:49 +0000203 """Read the directory, making sure we close the file if the format
204 is bad."""
205 try:
206 self._RealGetContents()
207 except BadZipfile:
208 if not self._filePassed:
209 self.fp.close()
210 self.fp = None
211 raise
212
213 def _RealGetContents(self):
Fred Drake484d7352000-10-02 21:14:52 +0000214 """Read in the table of contents for the ZIP file."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000215 fp = self.fp
Tim Peterse1190062001-01-15 03:34:38 +0000216 fp.seek(-22, 2) # Start of end-of-archive record
217 filesize = fp.tell() + 22 # Get file size
218 endrec = fp.read(22) # Archive must not end with a comment!
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000219 if endrec[0:4] != stringEndArchive or endrec[-2:] != "\000\000":
220 raise BadZipfile, "File is not a zip file, or ends with a comment"
221 endrec = struct.unpack(structEndArchive, endrec)
222 if self.debug > 1:
223 print endrec
Tim Peterse1190062001-01-15 03:34:38 +0000224 size_cd = endrec[5] # bytes in central directory
225 offset_cd = endrec[6] # offset of central directory
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000226 x = filesize - 22 - size_cd
227 # "concat" is zero, unless zip was concatenated to another file
228 concat = x - offset_cd
229 if self.debug > 2:
230 print "given, inferred, offset", offset_cd, x, concat
231 # self.start_dir: Position of start of central directory
232 self.start_dir = offset_cd + concat
233 fp.seek(self.start_dir, 0)
234 total = 0
235 while total < size_cd:
236 centdir = fp.read(46)
237 total = total + 46
238 if centdir[0:4] != stringCentralDir:
239 raise BadZipfile, "Bad magic number for central directory"
240 centdir = struct.unpack(structCentralDir, centdir)
241 if self.debug > 2:
242 print centdir
Fred Drake3e038e52001-02-28 17:56:26 +0000243 filename = fp.read(centdir[_CD_FILENAME_LENGTH])
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000244 # Create ZipInfo instance to store file information
245 x = ZipInfo(filename)
Fred Drake3e038e52001-02-28 17:56:26 +0000246 x.extra = fp.read(centdir[_CD_EXTRA_FIELD_LENGTH])
247 x.comment = fp.read(centdir[_CD_COMMENT_LENGTH])
248 total = (total + centdir[_CD_FILENAME_LENGTH]
249 + centdir[_CD_EXTRA_FIELD_LENGTH]
250 + centdir[_CD_COMMENT_LENGTH])
251 x.header_offset = centdir[_CD_LOCAL_HEADER_OFFSET] + concat
252 # file_offset must be computed below...
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000253 (x.create_version, x.create_system, x.extract_version, x.reserved,
254 x.flag_bits, x.compress_type, t, d,
255 x.CRC, x.compress_size, x.file_size) = centdir[1:12]
256 x.volume, x.internal_attr, x.external_attr = centdir[15:18]
257 # Convert date/time code to (year, month, day, hour, min, sec)
258 x.date_time = ( (d>>9)+1980, (d>>5)&0xF, d&0x1F,
Fred Drake414ca662000-06-13 18:49:53 +0000259 t>>11, (t>>5)&0x3F, (t&0x1F) * 2 )
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000260 self.filelist.append(x)
261 self.NameToInfo[x.filename] = x
262 if self.debug > 2:
263 print "total", total
264 for data in self.filelist:
265 fp.seek(data.header_offset, 0)
266 fheader = fp.read(30)
267 if fheader[0:4] != stringFileHeader:
268 raise BadZipfile, "Bad magic number for file header"
269 fheader = struct.unpack(structFileHeader, fheader)
Fred Drake3e038e52001-02-28 17:56:26 +0000270 # file_offset is computed here, since the extra field for
271 # the central directory and for the local file header
272 # refer to different fields, and they can have different
273 # lengths
274 data.file_offset = (data.header_offset + 30
275 + fheader[_FH_FILENAME_LENGTH]
276 + fheader[_FH_EXTRA_FIELD_LENGTH])
277 fname = fp.read(fheader[_FH_FILENAME_LENGTH])
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000278 if fname != data.filename:
279 raise RuntimeError, \
Fred Drake5db246d2000-09-29 20:44:48 +0000280 'File name in directory "%s" and header "%s" differ.' % (
281 data.filename, fname)
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000282
283 def namelist(self):
Fred Drake484d7352000-10-02 21:14:52 +0000284 """Return a list of file names in the archive."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000285 l = []
286 for data in self.filelist:
287 l.append(data.filename)
288 return l
289
290 def infolist(self):
Fred Drake484d7352000-10-02 21:14:52 +0000291 """Return a list of class ZipInfo instances for files in the
292 archive."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000293 return self.filelist
294
295 def printdir(self):
Fred Drake484d7352000-10-02 21:14:52 +0000296 """Print a table of contents for the zip file."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000297 print "%-46s %19s %12s" % ("File Name", "Modified ", "Size")
298 for zinfo in self.filelist:
299 date = "%d-%02d-%02d %02d:%02d:%02d" % zinfo.date_time
300 print "%-46s %s %12d" % (zinfo.filename, date, zinfo.file_size)
301
302 def testzip(self):
Fred Drake484d7352000-10-02 21:14:52 +0000303 """Read all the files and check the CRC."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000304 for zinfo in self.filelist:
305 try:
Tim Peterse1190062001-01-15 03:34:38 +0000306 self.read(zinfo.filename) # Check CRC-32
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000307 except:
308 return zinfo.filename
309
310 def getinfo(self, name):
Fred Drake484d7352000-10-02 21:14:52 +0000311 """Return the instance of ZipInfo given 'name'."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000312 return self.NameToInfo[name]
313
314 def read(self, name):
Fred Drake484d7352000-10-02 21:14:52 +0000315 """Return file bytes (as a string) for name."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000316 if self.mode not in ("r", "a"):
317 raise RuntimeError, 'read() requires mode "r" or "a"'
318 if not self.fp:
319 raise RuntimeError, \
Fred Drake5db246d2000-09-29 20:44:48 +0000320 "Attempt to read ZIP archive that was already closed"
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000321 zinfo = self.getinfo(name)
322 filepos = self.fp.tell()
323 self.fp.seek(zinfo.file_offset, 0)
324 bytes = self.fp.read(zinfo.compress_size)
325 self.fp.seek(filepos, 0)
326 if zinfo.compress_type == ZIP_STORED:
327 pass
328 elif zinfo.compress_type == ZIP_DEFLATED:
329 if not zlib:
330 raise RuntimeError, \
Fred Drake5db246d2000-09-29 20:44:48 +0000331 "De-compression requires the (missing) zlib module"
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000332 # zlib compress/decompress code by Jeremy Hylton of CNRI
333 dc = zlib.decompressobj(-15)
334 bytes = dc.decompress(bytes)
335 # need to feed in unused pad byte so that zlib won't choke
336 ex = dc.decompress('Z') + dc.flush()
337 if ex:
338 bytes = bytes + ex
339 else:
340 raise BadZipfile, \
Fred Drake5db246d2000-09-29 20:44:48 +0000341 "Unsupported compression method %d for file %s" % \
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000342 (zinfo.compress_type, name)
343 crc = binascii.crc32(bytes)
344 if crc != zinfo.CRC:
345 raise BadZipfile, "Bad CRC-32 for file %s" % name
346 return bytes
347
348 def _writecheck(self, zinfo):
Fred Drake484d7352000-10-02 21:14:52 +0000349 """Check for errors before writing a file to the archive."""
Raymond Hettinger54f02222002-06-01 14:18:47 +0000350 if zinfo.filename in self.NameToInfo:
Tim Peterse1190062001-01-15 03:34:38 +0000351 if self.debug: # Warning for duplicate names
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000352 print "Duplicate name:", zinfo.filename
353 if self.mode not in ("w", "a"):
354 raise RuntimeError, 'write() requires mode "w" or "a"'
355 if not self.fp:
356 raise RuntimeError, \
Fred Drake5db246d2000-09-29 20:44:48 +0000357 "Attempt to write ZIP archive that was already closed"
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000358 if zinfo.compress_type == ZIP_DEFLATED and not zlib:
359 raise RuntimeError, \
Fred Drake5db246d2000-09-29 20:44:48 +0000360 "Compression requires the (missing) zlib module"
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000361 if zinfo.compress_type not in (ZIP_STORED, ZIP_DEFLATED):
362 raise RuntimeError, \
Fred Drake5db246d2000-09-29 20:44:48 +0000363 "That compression method is not supported"
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000364
365 def write(self, filename, arcname=None, compress_type=None):
Fred Drake484d7352000-10-02 21:14:52 +0000366 """Put the bytes from filename into the archive under the name
367 arcname."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000368 st = os.stat(filename)
Raymond Hettinger32200ae2002-06-01 19:51:15 +0000369 mtime = time.localtime(st.st_mtime)
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000370 date_time = mtime[0:6]
371 # Create ZipInfo instance to store file information
372 if arcname is None:
Tim Peterse1190062001-01-15 03:34:38 +0000373 zinfo = ZipInfo(filename, date_time)
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000374 else:
Tim Peterse1190062001-01-15 03:34:38 +0000375 zinfo = ZipInfo(arcname, date_time)
Guido van Rossum1ae4c3d2002-08-12 15:15:51 +0000376 zinfo.external_attr = st[0] << 16L # Unix attributes
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000377 if compress_type is None:
Tim Peterse1190062001-01-15 03:34:38 +0000378 zinfo.compress_type = self.compression
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000379 else:
Tim Peterse1190062001-01-15 03:34:38 +0000380 zinfo.compress_type = compress_type
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000381 self._writecheck(zinfo)
382 fp = open(filename, "rb")
Finn Bock03a3bb82001-09-05 18:40:33 +0000383 zinfo.flag_bits = 0x00
Tim Peterse1190062001-01-15 03:34:38 +0000384 zinfo.header_offset = self.fp.tell() # Start of header bytes
Finn Bock03a3bb82001-09-05 18:40:33 +0000385 # Must overwrite CRC and sizes with correct data later
386 zinfo.CRC = CRC = 0
387 zinfo.compress_size = compress_size = 0
388 zinfo.file_size = file_size = 0
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000389 self.fp.write(zinfo.FileHeader())
Tim Peterse1190062001-01-15 03:34:38 +0000390 zinfo.file_offset = self.fp.tell() # Start of file bytes
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000391 if zinfo.compress_type == ZIP_DEFLATED:
392 cmpr = zlib.compressobj(zlib.Z_DEFAULT_COMPRESSION,
393 zlib.DEFLATED, -15)
394 else:
395 cmpr = None
396 while 1:
397 buf = fp.read(1024 * 8)
398 if not buf:
399 break
400 file_size = file_size + len(buf)
401 CRC = binascii.crc32(buf, CRC)
402 if cmpr:
403 buf = cmpr.compress(buf)
404 compress_size = compress_size + len(buf)
405 self.fp.write(buf)
406 fp.close()
407 if cmpr:
408 buf = cmpr.flush()
409 compress_size = compress_size + len(buf)
410 self.fp.write(buf)
411 zinfo.compress_size = compress_size
412 else:
413 zinfo.compress_size = file_size
414 zinfo.CRC = CRC
415 zinfo.file_size = file_size
Finn Bock03a3bb82001-09-05 18:40:33 +0000416 # Seek backwards and write CRC and file sizes
Tim Petersb64bec32001-09-18 02:26:39 +0000417 position = self.fp.tell() # Preserve current position in file
Finn Bock03a3bb82001-09-05 18:40:33 +0000418 self.fp.seek(zinfo.header_offset + 14, 0)
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000419 self.fp.write(struct.pack("<lll", zinfo.CRC, zinfo.compress_size,
420 zinfo.file_size))
Finn Bock03a3bb82001-09-05 18:40:33 +0000421 self.fp.seek(position, 0)
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000422 self.filelist.append(zinfo)
423 self.NameToInfo[zinfo.filename] = zinfo
424
425 def writestr(self, zinfo, bytes):
Fred Drake484d7352000-10-02 21:14:52 +0000426 """Write a file into the archive. The contents is the string
427 'bytes'."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000428 self._writecheck(zinfo)
Tim Peterse1190062001-01-15 03:34:38 +0000429 zinfo.file_size = len(bytes) # Uncompressed size
430 zinfo.CRC = binascii.crc32(bytes) # CRC-32 checksum
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000431 if zinfo.compress_type == ZIP_DEFLATED:
432 co = zlib.compressobj(zlib.Z_DEFAULT_COMPRESSION,
433 zlib.DEFLATED, -15)
434 bytes = co.compress(bytes) + co.flush()
Tim Peterse1190062001-01-15 03:34:38 +0000435 zinfo.compress_size = len(bytes) # Compressed size
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000436 else:
437 zinfo.compress_size = zinfo.file_size
Tim Peterse1190062001-01-15 03:34:38 +0000438 zinfo.header_offset = self.fp.tell() # Start of header bytes
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000439 self.fp.write(zinfo.FileHeader())
Tim Peterse1190062001-01-15 03:34:38 +0000440 zinfo.file_offset = self.fp.tell() # Start of file bytes
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000441 self.fp.write(bytes)
442 if zinfo.flag_bits & 0x08:
Tim Peterse1190062001-01-15 03:34:38 +0000443 # Write CRC and file sizes after the file data
444 self.fp.write(struct.pack("<lll", zinfo.CRC, zinfo.compress_size,
445 zinfo.file_size))
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000446 self.filelist.append(zinfo)
447 self.NameToInfo[zinfo.filename] = zinfo
448
449 def __del__(self):
Fred Drake484d7352000-10-02 21:14:52 +0000450 """Call the "close()" method in case the user forgot."""
Tim Petersd15f8bb2001-11-28 23:16:40 +0000451 self.close()
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000452
453 def close(self):
Fred Drake484d7352000-10-02 21:14:52 +0000454 """Close the file, and for mode "w" and "a" write the ending
455 records."""
Tim Petersd15f8bb2001-11-28 23:16:40 +0000456 if self.fp is None:
457 return
Tim Peterse1190062001-01-15 03:34:38 +0000458 if self.mode in ("w", "a"): # write ending records
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000459 count = 0
460 pos1 = self.fp.tell()
Tim Peterse1190062001-01-15 03:34:38 +0000461 for zinfo in self.filelist: # write central directory
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000462 count = count + 1
463 dt = zinfo.date_time
464 dosdate = (dt[0] - 1980) << 9 | dt[1] << 5 | dt[2]
Tim Peters3caca232001-12-06 06:23:26 +0000465 dostime = dt[3] << 11 | dt[4] << 5 | (dt[5] // 2)
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000466 centdir = struct.pack(structCentralDir,
467 stringCentralDir, zinfo.create_version,
468 zinfo.create_system, zinfo.extract_version, zinfo.reserved,
469 zinfo.flag_bits, zinfo.compress_type, dostime, dosdate,
470 zinfo.CRC, zinfo.compress_size, zinfo.file_size,
471 len(zinfo.filename), len(zinfo.extra), len(zinfo.comment),
472 0, zinfo.internal_attr, zinfo.external_attr,
473 zinfo.header_offset)
474 self.fp.write(centdir)
475 self.fp.write(zinfo.filename)
476 self.fp.write(zinfo.extra)
477 self.fp.write(zinfo.comment)
478 pos2 = self.fp.tell()
479 # Write end-of-zip-archive record
480 endrec = struct.pack(structEndArchive, stringEndArchive,
481 0, 0, count, count, pos2 - pos1, pos1, 0)
482 self.fp.write(endrec)
Guido van Rossumf85af612001-04-14 16:45:14 +0000483 self.fp.flush()
Fred Drake3d9091e2001-03-26 15:49:24 +0000484 if not self._filePassed:
485 self.fp.close()
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000486 self.fp = None
487
488
489class PyZipFile(ZipFile):
Fred Drake484d7352000-10-02 21:14:52 +0000490 """Class to create ZIP archives with Python library files and packages."""
491
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000492 def writepy(self, pathname, basename = ""):
493 """Add all files from "pathname" to the ZIP archive.
494
Fred Drake484d7352000-10-02 21:14:52 +0000495 If pathname is a package directory, search the directory and
496 all package subdirectories recursively for all *.py and enter
497 the modules into the archive. If pathname is a plain
498 directory, listdir *.py and enter all modules. Else, pathname
499 must be a Python *.py file and the module will be put into the
500 archive. Added modules are always module.pyo or module.pyc.
501 This method will compile the module.py into module.pyc if
502 necessary.
503 """
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000504 dir, name = os.path.split(pathname)
505 if os.path.isdir(pathname):
506 initname = os.path.join(pathname, "__init__.py")
507 if os.path.isfile(initname):
508 # This is a package directory, add it
509 if basename:
510 basename = "%s/%s" % (basename, name)
511 else:
512 basename = name
513 if self.debug:
514 print "Adding package in", pathname, "as", basename
515 fname, arcname = self._get_codename(initname[0:-3], basename)
516 if self.debug:
517 print "Adding", arcname
518 self.write(fname, arcname)
519 dirlist = os.listdir(pathname)
520 dirlist.remove("__init__.py")
521 # Add all *.py files and package subdirectories
522 for filename in dirlist:
523 path = os.path.join(pathname, filename)
524 root, ext = os.path.splitext(filename)
525 if os.path.isdir(path):
526 if os.path.isfile(os.path.join(path, "__init__.py")):
527 # This is a package directory, add it
528 self.writepy(path, basename) # Recursive call
529 elif ext == ".py":
530 fname, arcname = self._get_codename(path[0:-3],
531 basename)
532 if self.debug:
533 print "Adding", arcname
534 self.write(fname, arcname)
535 else:
536 # This is NOT a package directory, add its files at top level
537 if self.debug:
538 print "Adding files from directory", pathname
539 for filename in os.listdir(pathname):
540 path = os.path.join(pathname, filename)
541 root, ext = os.path.splitext(filename)
542 if ext == ".py":
543 fname, arcname = self._get_codename(path[0:-3],
544 basename)
545 if self.debug:
546 print "Adding", arcname
547 self.write(fname, arcname)
548 else:
549 if pathname[-3:] != ".py":
550 raise RuntimeError, \
Fred Drake5db246d2000-09-29 20:44:48 +0000551 'Files added with writepy() must end with ".py"'
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000552 fname, arcname = self._get_codename(pathname[0:-3], basename)
553 if self.debug:
554 print "Adding file", arcname
555 self.write(fname, arcname)
556
557 def _get_codename(self, pathname, basename):
558 """Return (filename, archivename) for the path.
559
Fred Drake484d7352000-10-02 21:14:52 +0000560 Given a module name path, return the correct file path and
561 archive name, compiling if necessary. For example, given
562 /python/lib/string, return (/python/lib/string.pyc, string).
563 """
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000564 file_py = pathname + ".py"
565 file_pyc = pathname + ".pyc"
566 file_pyo = pathname + ".pyo"
567 if os.path.isfile(file_pyo) and \
Raymond Hettinger32200ae2002-06-01 19:51:15 +0000568 os.stat(file_pyo).st_mtime >= os.stat(file_py).st_mtime:
Tim Peterse1190062001-01-15 03:34:38 +0000569 fname = file_pyo # Use .pyo file
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000570 elif not os.path.isfile(file_pyc) or \
Raymond Hettinger32200ae2002-06-01 19:51:15 +0000571 os.stat(file_pyc).st_mtime < os.stat(file_py).st_mtime:
Fred Drake484d7352000-10-02 21:14:52 +0000572 import py_compile
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000573 if self.debug:
574 print "Compiling", file_py
575 py_compile.compile(file_py, file_pyc)
576 fname = file_pyc
577 else:
578 fname = file_pyc
579 archivename = os.path.split(fname)[1]
580 if basename:
581 archivename = "%s/%s" % (basename, archivename)
582 return (fname, archivename)