blob: c0a0fe556627b41be98269ef5b6d4b06904e8e1a [file] [log] [blame]
Fred Drake484d7352000-10-02 21:14:52 +00001"Read and write ZIP files."
Guido van Rossum32abe6f2000-03-31 17:30:02 +00002# Written by James C. Ahlstrom jim@interet.com
3# All rights transferred to CNRI pursuant to the Python contribution agreement
4
5import struct, os, time
Fred Drake484d7352000-10-02 21:14:52 +00006import binascii
Guido van Rossum32abe6f2000-03-31 17:30:02 +00007
8try:
Tim Peterse1190062001-01-15 03:34:38 +00009 import zlib # We may need its compression method
Guido van Rossum9c673f32001-04-10 15:37:12 +000010except ImportError:
Guido van Rossum32abe6f2000-03-31 17:30:02 +000011 zlib = None
12
Skip Montanaro40fc1602001-03-01 04:27:19 +000013__all__ = ["BadZipfile", "error", "ZIP_STORED", "ZIP_DEFLATED", "is_zipfile",
14 "ZipInfo", "ZipFile", "PyZipFile"]
15
Fred Drake5db246d2000-09-29 20:44:48 +000016class BadZipfile(Exception):
Guido van Rossum32abe6f2000-03-31 17:30:02 +000017 pass
Tim Peterse1190062001-01-15 03:34:38 +000018error = BadZipfile # The exception raised by this module
Guido van Rossum32abe6f2000-03-31 17:30:02 +000019
20# constants for Zip file compression methods
21ZIP_STORED = 0
22ZIP_DEFLATED = 8
23# Other ZIP compression methods not supported
24
25# Here are some struct module formats for reading headers
26structEndArchive = "<4s4H2lH" # 9 items, end of archive, 22 bytes
27stringEndArchive = "PK\005\006" # magic number for end of archive record
28structCentralDir = "<4s4B4H3l5H2l"# 19 items, central directory, 46 bytes
29stringCentralDir = "PK\001\002" # magic number for central directory
30structFileHeader = "<4s2B4H3l2H" # 12 items, file header record, 30 bytes
31stringFileHeader = "PK\003\004" # magic number for file header
32
Fred Drake3e038e52001-02-28 17:56:26 +000033# indexes of entries in the central directory structure
34_CD_SIGNATURE = 0
35_CD_CREATE_VERSION = 1
36_CD_CREATE_SYSTEM = 2
37_CD_EXTRACT_VERSION = 3
38_CD_EXTRACT_SYSTEM = 4 # is this meaningful?
39_CD_FLAG_BITS = 5
40_CD_COMPRESS_TYPE = 6
41_CD_TIME = 7
42_CD_DATE = 8
43_CD_CRC = 9
44_CD_COMPRESSED_SIZE = 10
45_CD_UNCOMPRESSED_SIZE = 11
46_CD_FILENAME_LENGTH = 12
47_CD_EXTRA_FIELD_LENGTH = 13
48_CD_COMMENT_LENGTH = 14
49_CD_DISK_NUMBER_START = 15
50_CD_INTERNAL_FILE_ATTRIBUTES = 16
51_CD_EXTERNAL_FILE_ATTRIBUTES = 17
52_CD_LOCAL_HEADER_OFFSET = 18
53
54# indexes of entries in the local file header structure
55_FH_SIGNATURE = 0
56_FH_EXTRACT_VERSION = 1
57_FH_EXTRACT_SYSTEM = 2 # is this meaningful?
58_FH_GENERAL_PURPOSE_FLAG_BITS = 3
59_FH_COMPRESSION_METHOD = 4
60_FH_LAST_MOD_TIME = 5
61_FH_LAST_MOD_DATE = 6
62_FH_CRC = 7
63_FH_COMPRESSED_SIZE = 8
64_FH_UNCOMPRESSED_SIZE = 9
65_FH_FILENAME_LENGTH = 10
66_FH_EXTRA_FIELD_LENGTH = 11
67
Fred Drake3d9091e2001-03-26 15:49:24 +000068# Used to compare file passed to ZipFile
Guido van Rossumdbb718f2001-09-21 19:22:34 +000069import types
70_STRING_TYPES = (types.StringType,)
71if hasattr(types, "UnicodeType"):
72 _STRING_TYPES = _STRING_TYPES + (types.UnicodeType,)
Fred Drake3d9091e2001-03-26 15:49:24 +000073
Fred Drake484d7352000-10-02 21:14:52 +000074
Guido van Rossum32abe6f2000-03-31 17:30:02 +000075def is_zipfile(filename):
76 """Quickly see if file is a ZIP file by checking the magic number.
77
Fred Drake484d7352000-10-02 21:14:52 +000078 Will not accept a ZIP archive with an ending comment.
79 """
Guido van Rossum32abe6f2000-03-31 17:30:02 +000080 try:
81 fpin = open(filename, "rb")
Tim Peterse1190062001-01-15 03:34:38 +000082 fpin.seek(-22, 2) # Seek to end-of-file record
Guido van Rossum32abe6f2000-03-31 17:30:02 +000083 endrec = fpin.read()
84 fpin.close()
85 if endrec[0:4] == "PK\005\006" and endrec[-2:] == "\000\000":
Guido van Rossum8ca162f2002-04-07 06:36:23 +000086 return True # file has correct magic number
Fred Drake7e473802001-05-11 19:52:57 +000087 except IOError:
Guido van Rossum32abe6f2000-03-31 17:30:02 +000088 pass
Guido van Rossum8ca162f2002-04-07 06:36:23 +000089 return False
Guido van Rossum32abe6f2000-03-31 17:30:02 +000090
Fred Drake484d7352000-10-02 21:14:52 +000091
Guido van Rossum32abe6f2000-03-31 17:30:02 +000092class ZipInfo:
Fred Drake484d7352000-10-02 21:14:52 +000093 """Class with attributes describing each file in the ZIP archive."""
94
Guido van Rossum32abe6f2000-03-31 17:30:02 +000095 def __init__(self, filename="NoName", date_time=(1980,1,1,0,0,0)):
Fred Drakea58947f2001-07-19 19:44:25 +000096 self.filename = _normpath(filename) # Name of the file in the archive
Tim Peterse1190062001-01-15 03:34:38 +000097 self.date_time = date_time # year, month, day, hour, min, sec
Guido van Rossum32abe6f2000-03-31 17:30:02 +000098 # Standard values:
Tim Peterse1190062001-01-15 03:34:38 +000099 self.compress_type = ZIP_STORED # Type of compression for the file
100 self.comment = "" # Comment for each file
101 self.extra = "" # ZIP extra data
102 self.create_system = 0 # System which created ZIP archive
103 self.create_version = 20 # Version which created ZIP archive
104 self.extract_version = 20 # Version needed to extract archive
105 self.reserved = 0 # Must be zero
106 self.flag_bits = 0 # ZIP flag bits
107 self.volume = 0 # Volume number of file header
108 self.internal_attr = 0 # Internal attributes
109 self.external_attr = 0 # External file attributes
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000110 # Other attributes are set by class ZipFile:
Tim Peterse1190062001-01-15 03:34:38 +0000111 # header_offset Byte offset to the file header
112 # file_offset Byte offset to the start of the file data
113 # CRC CRC-32 of the uncompressed file
114 # compress_size Size of the compressed file
115 # file_size Size of the uncompressed file
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000116
117 def FileHeader(self):
Fred Drake484d7352000-10-02 21:14:52 +0000118 """Return the per-file header as a string."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000119 dt = self.date_time
120 dosdate = (dt[0] - 1980) << 9 | dt[1] << 5 | dt[2]
Tim Peters3caca232001-12-06 06:23:26 +0000121 dostime = dt[3] << 11 | dt[4] << 5 | (dt[5] // 2)
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000122 if self.flag_bits & 0x08:
Tim Peterse1190062001-01-15 03:34:38 +0000123 # Set these to zero because we write them after the file data
124 CRC = compress_size = file_size = 0
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000125 else:
Tim Peterse1190062001-01-15 03:34:38 +0000126 CRC = self.CRC
127 compress_size = self.compress_size
128 file_size = self.file_size
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000129 header = struct.pack(structFileHeader, stringFileHeader,
130 self.extract_version, self.reserved, self.flag_bits,
131 self.compress_type, dostime, dosdate, CRC,
132 compress_size, file_size,
133 len(self.filename), len(self.extra))
134 return header + self.filename + self.extra
135
136
Fred Drakea58947f2001-07-19 19:44:25 +0000137# This is used to ensure paths in generated ZIP files always use
138# forward slashes as the directory separator, as required by the
139# ZIP format specification.
140if os.sep != "/":
141 def _normpath(path):
142 return path.replace(os.sep, "/")
143else:
144 def _normpath(path):
145 return path
146
147
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000148class ZipFile:
Tim Petersa19a1682001-03-29 04:36:09 +0000149 """ Class with methods to open, read, write, close, list zip files.
150
Fred Drake3d9091e2001-03-26 15:49:24 +0000151 z = ZipFile(file, mode="r", compression=ZIP_STORED)
Tim Petersa19a1682001-03-29 04:36:09 +0000152
Fred Drake3d9091e2001-03-26 15:49:24 +0000153 file: Either the path to the file, or a file-like object.
154 If it is a path, the file will be opened and closed by ZipFile.
155 mode: The mode can be either read "r", write "w" or append "a".
156 compression: ZIP_STORED (no compression) or ZIP_DEFLATED (requires zlib).
157 """
Fred Drake484d7352000-10-02 21:14:52 +0000158
Fred Drake90eac282001-02-28 05:29:34 +0000159 fp = None # Set here since __del__ checks it
160
Fred Drake3d9091e2001-03-26 15:49:24 +0000161 def __init__(self, file, mode="r", compression=ZIP_STORED):
Fred Drake484d7352000-10-02 21:14:52 +0000162 """Open the ZIP file with mode read "r", write "w" or append "a"."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000163 if compression == ZIP_STORED:
164 pass
165 elif compression == ZIP_DEFLATED:
166 if not zlib:
167 raise RuntimeError,\
Fred Drake5db246d2000-09-29 20:44:48 +0000168 "Compression requires the (missing) zlib module"
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000169 else:
170 raise RuntimeError, "That compression method is not supported"
Tim Peterse1190062001-01-15 03:34:38 +0000171 self.debug = 0 # Level of printing: 0 through 3
172 self.NameToInfo = {} # Find file info given name
173 self.filelist = [] # List of ZipInfo instances for archive
174 self.compression = compression # Method of compression
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000175 self.mode = key = mode[0]
Tim Petersa19a1682001-03-29 04:36:09 +0000176
Fred Drake3d9091e2001-03-26 15:49:24 +0000177 # Check if we were passed a file-like object
178 if type(file) in _STRING_TYPES:
179 self._filePassed = 0
180 self.filename = file
181 modeDict = {'r' : 'rb', 'w': 'wb', 'a' : 'r+b'}
182 self.fp = open(file, modeDict[mode])
183 else:
184 self._filePassed = 1
185 self.fp = file
186 self.filename = getattr(file, 'name', None)
Tim Petersa19a1682001-03-29 04:36:09 +0000187
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000188 if key == 'r':
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000189 self._GetContents()
190 elif key == 'w':
Fred Drake3d9091e2001-03-26 15:49:24 +0000191 pass
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000192 elif key == 'a':
Fred Drake3d9091e2001-03-26 15:49:24 +0000193 fp = self.fp
Tim Peterse1190062001-01-15 03:34:38 +0000194 fp.seek(-22, 2) # Seek to end-of-file record
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000195 endrec = fp.read()
196 if endrec[0:4] == stringEndArchive and \
197 endrec[-2:] == "\000\000":
Tim Peterse1190062001-01-15 03:34:38 +0000198 self._GetContents() # file is a zip file
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000199 # seek to start of directory and overwrite
200 fp.seek(self.start_dir, 0)
Tim Peterse1190062001-01-15 03:34:38 +0000201 else: # file is not a zip file, just append
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000202 fp.seek(0, 2)
203 else:
Tim Peters7d3bad62001-04-04 18:56:49 +0000204 if not self._filePassed:
205 self.fp.close()
206 self.fp = None
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000207 raise RuntimeError, 'Mode must be "r", "w" or "a"'
208
209 def _GetContents(self):
Tim Peters7d3bad62001-04-04 18:56:49 +0000210 """Read the directory, making sure we close the file if the format
211 is bad."""
212 try:
213 self._RealGetContents()
214 except BadZipfile:
215 if not self._filePassed:
216 self.fp.close()
217 self.fp = None
218 raise
219
220 def _RealGetContents(self):
Fred Drake484d7352000-10-02 21:14:52 +0000221 """Read in the table of contents for the ZIP file."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000222 fp = self.fp
Tim Peterse1190062001-01-15 03:34:38 +0000223 fp.seek(-22, 2) # Start of end-of-archive record
224 filesize = fp.tell() + 22 # Get file size
225 endrec = fp.read(22) # Archive must not end with a comment!
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000226 if endrec[0:4] != stringEndArchive or endrec[-2:] != "\000\000":
227 raise BadZipfile, "File is not a zip file, or ends with a comment"
228 endrec = struct.unpack(structEndArchive, endrec)
229 if self.debug > 1:
230 print endrec
Tim Peterse1190062001-01-15 03:34:38 +0000231 size_cd = endrec[5] # bytes in central directory
232 offset_cd = endrec[6] # offset of central directory
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000233 x = filesize - 22 - size_cd
234 # "concat" is zero, unless zip was concatenated to another file
235 concat = x - offset_cd
236 if self.debug > 2:
237 print "given, inferred, offset", offset_cd, x, concat
238 # self.start_dir: Position of start of central directory
239 self.start_dir = offset_cd + concat
240 fp.seek(self.start_dir, 0)
241 total = 0
242 while total < size_cd:
243 centdir = fp.read(46)
244 total = total + 46
245 if centdir[0:4] != stringCentralDir:
246 raise BadZipfile, "Bad magic number for central directory"
247 centdir = struct.unpack(structCentralDir, centdir)
248 if self.debug > 2:
249 print centdir
Fred Drake3e038e52001-02-28 17:56:26 +0000250 filename = fp.read(centdir[_CD_FILENAME_LENGTH])
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000251 # Create ZipInfo instance to store file information
252 x = ZipInfo(filename)
Fred Drake3e038e52001-02-28 17:56:26 +0000253 x.extra = fp.read(centdir[_CD_EXTRA_FIELD_LENGTH])
254 x.comment = fp.read(centdir[_CD_COMMENT_LENGTH])
255 total = (total + centdir[_CD_FILENAME_LENGTH]
256 + centdir[_CD_EXTRA_FIELD_LENGTH]
257 + centdir[_CD_COMMENT_LENGTH])
258 x.header_offset = centdir[_CD_LOCAL_HEADER_OFFSET] + concat
259 # file_offset must be computed below...
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000260 (x.create_version, x.create_system, x.extract_version, x.reserved,
261 x.flag_bits, x.compress_type, t, d,
262 x.CRC, x.compress_size, x.file_size) = centdir[1:12]
263 x.volume, x.internal_attr, x.external_attr = centdir[15:18]
264 # Convert date/time code to (year, month, day, hour, min, sec)
265 x.date_time = ( (d>>9)+1980, (d>>5)&0xF, d&0x1F,
Fred Drake414ca662000-06-13 18:49:53 +0000266 t>>11, (t>>5)&0x3F, (t&0x1F) * 2 )
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000267 self.filelist.append(x)
268 self.NameToInfo[x.filename] = x
269 if self.debug > 2:
270 print "total", total
271 for data in self.filelist:
272 fp.seek(data.header_offset, 0)
273 fheader = fp.read(30)
274 if fheader[0:4] != stringFileHeader:
275 raise BadZipfile, "Bad magic number for file header"
276 fheader = struct.unpack(structFileHeader, fheader)
Fred Drake3e038e52001-02-28 17:56:26 +0000277 # file_offset is computed here, since the extra field for
278 # the central directory and for the local file header
279 # refer to different fields, and they can have different
280 # lengths
281 data.file_offset = (data.header_offset + 30
282 + fheader[_FH_FILENAME_LENGTH]
283 + fheader[_FH_EXTRA_FIELD_LENGTH])
284 fname = fp.read(fheader[_FH_FILENAME_LENGTH])
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000285 if fname != data.filename:
286 raise RuntimeError, \
Fred Drake5db246d2000-09-29 20:44:48 +0000287 'File name in directory "%s" and header "%s" differ.' % (
288 data.filename, fname)
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000289
290 def namelist(self):
Fred Drake484d7352000-10-02 21:14:52 +0000291 """Return a list of file names in the archive."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000292 l = []
293 for data in self.filelist:
294 l.append(data.filename)
295 return l
296
297 def infolist(self):
Fred Drake484d7352000-10-02 21:14:52 +0000298 """Return a list of class ZipInfo instances for files in the
299 archive."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000300 return self.filelist
301
302 def printdir(self):
Fred Drake484d7352000-10-02 21:14:52 +0000303 """Print a table of contents for the zip file."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000304 print "%-46s %19s %12s" % ("File Name", "Modified ", "Size")
305 for zinfo in self.filelist:
306 date = "%d-%02d-%02d %02d:%02d:%02d" % zinfo.date_time
307 print "%-46s %s %12d" % (zinfo.filename, date, zinfo.file_size)
308
309 def testzip(self):
Fred Drake484d7352000-10-02 21:14:52 +0000310 """Read all the files and check the CRC."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000311 for zinfo in self.filelist:
312 try:
Tim Peterse1190062001-01-15 03:34:38 +0000313 self.read(zinfo.filename) # Check CRC-32
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000314 except:
315 return zinfo.filename
316
317 def getinfo(self, name):
Fred Drake484d7352000-10-02 21:14:52 +0000318 """Return the instance of ZipInfo given 'name'."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000319 return self.NameToInfo[name]
320
321 def read(self, name):
Fred Drake484d7352000-10-02 21:14:52 +0000322 """Return file bytes (as a string) for name."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000323 if self.mode not in ("r", "a"):
324 raise RuntimeError, 'read() requires mode "r" or "a"'
325 if not self.fp:
326 raise RuntimeError, \
Fred Drake5db246d2000-09-29 20:44:48 +0000327 "Attempt to read ZIP archive that was already closed"
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000328 zinfo = self.getinfo(name)
329 filepos = self.fp.tell()
330 self.fp.seek(zinfo.file_offset, 0)
331 bytes = self.fp.read(zinfo.compress_size)
332 self.fp.seek(filepos, 0)
333 if zinfo.compress_type == ZIP_STORED:
334 pass
335 elif zinfo.compress_type == ZIP_DEFLATED:
336 if not zlib:
337 raise RuntimeError, \
Fred Drake5db246d2000-09-29 20:44:48 +0000338 "De-compression requires the (missing) zlib module"
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000339 # zlib compress/decompress code by Jeremy Hylton of CNRI
340 dc = zlib.decompressobj(-15)
341 bytes = dc.decompress(bytes)
342 # need to feed in unused pad byte so that zlib won't choke
343 ex = dc.decompress('Z') + dc.flush()
344 if ex:
345 bytes = bytes + ex
346 else:
347 raise BadZipfile, \
Fred Drake5db246d2000-09-29 20:44:48 +0000348 "Unsupported compression method %d for file %s" % \
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000349 (zinfo.compress_type, name)
350 crc = binascii.crc32(bytes)
351 if crc != zinfo.CRC:
352 raise BadZipfile, "Bad CRC-32 for file %s" % name
353 return bytes
354
355 def _writecheck(self, zinfo):
Fred Drake484d7352000-10-02 21:14:52 +0000356 """Check for errors before writing a file to the archive."""
Raymond Hettinger54f02222002-06-01 14:18:47 +0000357 if zinfo.filename in self.NameToInfo:
Tim Peterse1190062001-01-15 03:34:38 +0000358 if self.debug: # Warning for duplicate names
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000359 print "Duplicate name:", zinfo.filename
360 if self.mode not in ("w", "a"):
361 raise RuntimeError, 'write() requires mode "w" or "a"'
362 if not self.fp:
363 raise RuntimeError, \
Fred Drake5db246d2000-09-29 20:44:48 +0000364 "Attempt to write ZIP archive that was already closed"
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000365 if zinfo.compress_type == ZIP_DEFLATED and not zlib:
366 raise RuntimeError, \
Fred Drake5db246d2000-09-29 20:44:48 +0000367 "Compression requires the (missing) zlib module"
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000368 if zinfo.compress_type not in (ZIP_STORED, ZIP_DEFLATED):
369 raise RuntimeError, \
Fred Drake5db246d2000-09-29 20:44:48 +0000370 "That compression method is not supported"
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000371
372 def write(self, filename, arcname=None, compress_type=None):
Fred Drake484d7352000-10-02 21:14:52 +0000373 """Put the bytes from filename into the archive under the name
374 arcname."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000375 st = os.stat(filename)
376 mtime = time.localtime(st[8])
377 date_time = mtime[0:6]
378 # Create ZipInfo instance to store file information
379 if arcname is None:
Tim Peterse1190062001-01-15 03:34:38 +0000380 zinfo = ZipInfo(filename, date_time)
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000381 else:
Tim Peterse1190062001-01-15 03:34:38 +0000382 zinfo = ZipInfo(arcname, date_time)
383 zinfo.external_attr = st[0] << 16 # Unix attributes
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000384 if compress_type is None:
Tim Peterse1190062001-01-15 03:34:38 +0000385 zinfo.compress_type = self.compression
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000386 else:
Tim Peterse1190062001-01-15 03:34:38 +0000387 zinfo.compress_type = compress_type
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000388 self._writecheck(zinfo)
389 fp = open(filename, "rb")
Finn Bock03a3bb82001-09-05 18:40:33 +0000390 zinfo.flag_bits = 0x00
Tim Peterse1190062001-01-15 03:34:38 +0000391 zinfo.header_offset = self.fp.tell() # Start of header bytes
Finn Bock03a3bb82001-09-05 18:40:33 +0000392 # Must overwrite CRC and sizes with correct data later
393 zinfo.CRC = CRC = 0
394 zinfo.compress_size = compress_size = 0
395 zinfo.file_size = file_size = 0
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000396 self.fp.write(zinfo.FileHeader())
Tim Peterse1190062001-01-15 03:34:38 +0000397 zinfo.file_offset = self.fp.tell() # Start of file bytes
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000398 if zinfo.compress_type == ZIP_DEFLATED:
399 cmpr = zlib.compressobj(zlib.Z_DEFAULT_COMPRESSION,
400 zlib.DEFLATED, -15)
401 else:
402 cmpr = None
403 while 1:
404 buf = fp.read(1024 * 8)
405 if not buf:
406 break
407 file_size = file_size + len(buf)
408 CRC = binascii.crc32(buf, CRC)
409 if cmpr:
410 buf = cmpr.compress(buf)
411 compress_size = compress_size + len(buf)
412 self.fp.write(buf)
413 fp.close()
414 if cmpr:
415 buf = cmpr.flush()
416 compress_size = compress_size + len(buf)
417 self.fp.write(buf)
418 zinfo.compress_size = compress_size
419 else:
420 zinfo.compress_size = file_size
421 zinfo.CRC = CRC
422 zinfo.file_size = file_size
Finn Bock03a3bb82001-09-05 18:40:33 +0000423 # Seek backwards and write CRC and file sizes
Tim Petersb64bec32001-09-18 02:26:39 +0000424 position = self.fp.tell() # Preserve current position in file
Finn Bock03a3bb82001-09-05 18:40:33 +0000425 self.fp.seek(zinfo.header_offset + 14, 0)
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000426 self.fp.write(struct.pack("<lll", zinfo.CRC, zinfo.compress_size,
427 zinfo.file_size))
Finn Bock03a3bb82001-09-05 18:40:33 +0000428 self.fp.seek(position, 0)
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000429 self.filelist.append(zinfo)
430 self.NameToInfo[zinfo.filename] = zinfo
431
432 def writestr(self, zinfo, bytes):
Fred Drake484d7352000-10-02 21:14:52 +0000433 """Write a file into the archive. The contents is the string
434 'bytes'."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000435 self._writecheck(zinfo)
Tim Peterse1190062001-01-15 03:34:38 +0000436 zinfo.file_size = len(bytes) # Uncompressed size
437 zinfo.CRC = binascii.crc32(bytes) # CRC-32 checksum
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000438 if zinfo.compress_type == ZIP_DEFLATED:
439 co = zlib.compressobj(zlib.Z_DEFAULT_COMPRESSION,
440 zlib.DEFLATED, -15)
441 bytes = co.compress(bytes) + co.flush()
Tim Peterse1190062001-01-15 03:34:38 +0000442 zinfo.compress_size = len(bytes) # Compressed size
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000443 else:
444 zinfo.compress_size = zinfo.file_size
Tim Peterse1190062001-01-15 03:34:38 +0000445 zinfo.header_offset = self.fp.tell() # Start of header bytes
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000446 self.fp.write(zinfo.FileHeader())
Tim Peterse1190062001-01-15 03:34:38 +0000447 zinfo.file_offset = self.fp.tell() # Start of file bytes
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000448 self.fp.write(bytes)
449 if zinfo.flag_bits & 0x08:
Tim Peterse1190062001-01-15 03:34:38 +0000450 # Write CRC and file sizes after the file data
451 self.fp.write(struct.pack("<lll", zinfo.CRC, zinfo.compress_size,
452 zinfo.file_size))
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000453 self.filelist.append(zinfo)
454 self.NameToInfo[zinfo.filename] = zinfo
455
456 def __del__(self):
Fred Drake484d7352000-10-02 21:14:52 +0000457 """Call the "close()" method in case the user forgot."""
Tim Petersd15f8bb2001-11-28 23:16:40 +0000458 self.close()
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000459
460 def close(self):
Fred Drake484d7352000-10-02 21:14:52 +0000461 """Close the file, and for mode "w" and "a" write the ending
462 records."""
Tim Petersd15f8bb2001-11-28 23:16:40 +0000463 if self.fp is None:
464 return
Tim Peterse1190062001-01-15 03:34:38 +0000465 if self.mode in ("w", "a"): # write ending records
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000466 count = 0
467 pos1 = self.fp.tell()
Tim Peterse1190062001-01-15 03:34:38 +0000468 for zinfo in self.filelist: # write central directory
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000469 count = count + 1
470 dt = zinfo.date_time
471 dosdate = (dt[0] - 1980) << 9 | dt[1] << 5 | dt[2]
Tim Peters3caca232001-12-06 06:23:26 +0000472 dostime = dt[3] << 11 | dt[4] << 5 | (dt[5] // 2)
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000473 centdir = struct.pack(structCentralDir,
474 stringCentralDir, zinfo.create_version,
475 zinfo.create_system, zinfo.extract_version, zinfo.reserved,
476 zinfo.flag_bits, zinfo.compress_type, dostime, dosdate,
477 zinfo.CRC, zinfo.compress_size, zinfo.file_size,
478 len(zinfo.filename), len(zinfo.extra), len(zinfo.comment),
479 0, zinfo.internal_attr, zinfo.external_attr,
480 zinfo.header_offset)
481 self.fp.write(centdir)
482 self.fp.write(zinfo.filename)
483 self.fp.write(zinfo.extra)
484 self.fp.write(zinfo.comment)
485 pos2 = self.fp.tell()
486 # Write end-of-zip-archive record
487 endrec = struct.pack(structEndArchive, stringEndArchive,
488 0, 0, count, count, pos2 - pos1, pos1, 0)
489 self.fp.write(endrec)
Guido van Rossumf85af612001-04-14 16:45:14 +0000490 self.fp.flush()
Fred Drake3d9091e2001-03-26 15:49:24 +0000491 if not self._filePassed:
492 self.fp.close()
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000493 self.fp = None
494
495
496class PyZipFile(ZipFile):
Fred Drake484d7352000-10-02 21:14:52 +0000497 """Class to create ZIP archives with Python library files and packages."""
498
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000499 def writepy(self, pathname, basename = ""):
500 """Add all files from "pathname" to the ZIP archive.
501
Fred Drake484d7352000-10-02 21:14:52 +0000502 If pathname is a package directory, search the directory and
503 all package subdirectories recursively for all *.py and enter
504 the modules into the archive. If pathname is a plain
505 directory, listdir *.py and enter all modules. Else, pathname
506 must be a Python *.py file and the module will be put into the
507 archive. Added modules are always module.pyo or module.pyc.
508 This method will compile the module.py into module.pyc if
509 necessary.
510 """
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000511 dir, name = os.path.split(pathname)
512 if os.path.isdir(pathname):
513 initname = os.path.join(pathname, "__init__.py")
514 if os.path.isfile(initname):
515 # This is a package directory, add it
516 if basename:
517 basename = "%s/%s" % (basename, name)
518 else:
519 basename = name
520 if self.debug:
521 print "Adding package in", pathname, "as", basename
522 fname, arcname = self._get_codename(initname[0:-3], basename)
523 if self.debug:
524 print "Adding", arcname
525 self.write(fname, arcname)
526 dirlist = os.listdir(pathname)
527 dirlist.remove("__init__.py")
528 # Add all *.py files and package subdirectories
529 for filename in dirlist:
530 path = os.path.join(pathname, filename)
531 root, ext = os.path.splitext(filename)
532 if os.path.isdir(path):
533 if os.path.isfile(os.path.join(path, "__init__.py")):
534 # This is a package directory, add it
535 self.writepy(path, basename) # Recursive call
536 elif ext == ".py":
537 fname, arcname = self._get_codename(path[0:-3],
538 basename)
539 if self.debug:
540 print "Adding", arcname
541 self.write(fname, arcname)
542 else:
543 # This is NOT a package directory, add its files at top level
544 if self.debug:
545 print "Adding files from directory", pathname
546 for filename in os.listdir(pathname):
547 path = os.path.join(pathname, filename)
548 root, ext = os.path.splitext(filename)
549 if ext == ".py":
550 fname, arcname = self._get_codename(path[0:-3],
551 basename)
552 if self.debug:
553 print "Adding", arcname
554 self.write(fname, arcname)
555 else:
556 if pathname[-3:] != ".py":
557 raise RuntimeError, \
Fred Drake5db246d2000-09-29 20:44:48 +0000558 'Files added with writepy() must end with ".py"'
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000559 fname, arcname = self._get_codename(pathname[0:-3], basename)
560 if self.debug:
561 print "Adding file", arcname
562 self.write(fname, arcname)
563
564 def _get_codename(self, pathname, basename):
565 """Return (filename, archivename) for the path.
566
Fred Drake484d7352000-10-02 21:14:52 +0000567 Given a module name path, return the correct file path and
568 archive name, compiling if necessary. For example, given
569 /python/lib/string, return (/python/lib/string.pyc, string).
570 """
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000571 file_py = pathname + ".py"
572 file_pyc = pathname + ".pyc"
573 file_pyo = pathname + ".pyo"
574 if os.path.isfile(file_pyo) and \
575 os.stat(file_pyo)[8] >= os.stat(file_py)[8]:
Tim Peterse1190062001-01-15 03:34:38 +0000576 fname = file_pyo # Use .pyo file
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000577 elif not os.path.isfile(file_pyc) or \
Fred Drake484d7352000-10-02 21:14:52 +0000578 os.stat(file_pyc)[8] < os.stat(file_py)[8]:
579 import py_compile
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000580 if self.debug:
581 print "Compiling", file_py
582 py_compile.compile(file_py, file_pyc)
583 fname = file_pyc
584 else:
585 fname = file_pyc
586 archivename = os.path.split(fname)[1]
587 if basename:
588 archivename = "%s/%s" % (basename, archivename)
589 return (fname, archivename)