blob: 8a113a0fe106d944d56efb1c5fdbc588f6e62fd6 [file] [log] [blame]
Fred Drake484d7352000-10-02 21:14:52 +00001"Read and write ZIP files."
Guido van Rossum32abe6f2000-03-31 17:30:02 +00002# Written by James C. Ahlstrom jim@interet.com
3# All rights transferred to CNRI pursuant to the Python contribution agreement
4
5import struct, os, time
Fred Drake484d7352000-10-02 21:14:52 +00006import binascii
Guido van Rossum32abe6f2000-03-31 17:30:02 +00007
8try:
Tim Peterse1190062001-01-15 03:34:38 +00009 import zlib # We may need its compression method
Guido van Rossum9c673f32001-04-10 15:37:12 +000010except ImportError:
Guido van Rossum32abe6f2000-03-31 17:30:02 +000011 zlib = None
12
Skip Montanaro40fc1602001-03-01 04:27:19 +000013__all__ = ["BadZipfile", "error", "ZIP_STORED", "ZIP_DEFLATED", "is_zipfile",
14 "ZipInfo", "ZipFile", "PyZipFile"]
15
Fred Drake5db246d2000-09-29 20:44:48 +000016class BadZipfile(Exception):
Guido van Rossum32abe6f2000-03-31 17:30:02 +000017 pass
Tim Peterse1190062001-01-15 03:34:38 +000018error = BadZipfile # The exception raised by this module
Guido van Rossum32abe6f2000-03-31 17:30:02 +000019
20# constants for Zip file compression methods
21ZIP_STORED = 0
22ZIP_DEFLATED = 8
23# Other ZIP compression methods not supported
24
25# Here are some struct module formats for reading headers
26structEndArchive = "<4s4H2lH" # 9 items, end of archive, 22 bytes
27stringEndArchive = "PK\005\006" # magic number for end of archive record
28structCentralDir = "<4s4B4H3l5H2l"# 19 items, central directory, 46 bytes
29stringCentralDir = "PK\001\002" # magic number for central directory
30structFileHeader = "<4s2B4H3l2H" # 12 items, file header record, 30 bytes
31stringFileHeader = "PK\003\004" # magic number for file header
32
Fred Drake3e038e52001-02-28 17:56:26 +000033# indexes of entries in the central directory structure
34_CD_SIGNATURE = 0
35_CD_CREATE_VERSION = 1
36_CD_CREATE_SYSTEM = 2
37_CD_EXTRACT_VERSION = 3
38_CD_EXTRACT_SYSTEM = 4 # is this meaningful?
39_CD_FLAG_BITS = 5
40_CD_COMPRESS_TYPE = 6
41_CD_TIME = 7
42_CD_DATE = 8
43_CD_CRC = 9
44_CD_COMPRESSED_SIZE = 10
45_CD_UNCOMPRESSED_SIZE = 11
46_CD_FILENAME_LENGTH = 12
47_CD_EXTRA_FIELD_LENGTH = 13
48_CD_COMMENT_LENGTH = 14
49_CD_DISK_NUMBER_START = 15
50_CD_INTERNAL_FILE_ATTRIBUTES = 16
51_CD_EXTERNAL_FILE_ATTRIBUTES = 17
52_CD_LOCAL_HEADER_OFFSET = 18
53
54# indexes of entries in the local file header structure
55_FH_SIGNATURE = 0
56_FH_EXTRACT_VERSION = 1
57_FH_EXTRACT_SYSTEM = 2 # is this meaningful?
58_FH_GENERAL_PURPOSE_FLAG_BITS = 3
59_FH_COMPRESSION_METHOD = 4
60_FH_LAST_MOD_TIME = 5
61_FH_LAST_MOD_DATE = 6
62_FH_CRC = 7
63_FH_COMPRESSED_SIZE = 8
64_FH_UNCOMPRESSED_SIZE = 9
65_FH_FILENAME_LENGTH = 10
66_FH_EXTRA_FIELD_LENGTH = 11
67
Fred Drake3d9091e2001-03-26 15:49:24 +000068# Used to compare file passed to ZipFile
69_STRING_TYPES = (type('s'), type(u's'))
70
Fred Drake484d7352000-10-02 21:14:52 +000071
Guido van Rossum32abe6f2000-03-31 17:30:02 +000072def is_zipfile(filename):
73 """Quickly see if file is a ZIP file by checking the magic number.
74
Fred Drake484d7352000-10-02 21:14:52 +000075 Will not accept a ZIP archive with an ending comment.
76 """
Guido van Rossum32abe6f2000-03-31 17:30:02 +000077 try:
78 fpin = open(filename, "rb")
Tim Peterse1190062001-01-15 03:34:38 +000079 fpin.seek(-22, 2) # Seek to end-of-file record
Guido van Rossum32abe6f2000-03-31 17:30:02 +000080 endrec = fpin.read()
81 fpin.close()
82 if endrec[0:4] == "PK\005\006" and endrec[-2:] == "\000\000":
Tim Peterse1190062001-01-15 03:34:38 +000083 return 1 # file has correct magic number
Fred Drake7e473802001-05-11 19:52:57 +000084 except IOError:
Guido van Rossum32abe6f2000-03-31 17:30:02 +000085 pass
86
Fred Drake484d7352000-10-02 21:14:52 +000087
Guido van Rossum32abe6f2000-03-31 17:30:02 +000088class ZipInfo:
Fred Drake484d7352000-10-02 21:14:52 +000089 """Class with attributes describing each file in the ZIP archive."""
90
Guido van Rossum32abe6f2000-03-31 17:30:02 +000091 def __init__(self, filename="NoName", date_time=(1980,1,1,0,0,0)):
Tim Peterse1190062001-01-15 03:34:38 +000092 self.filename = filename # Name of the file in the archive
93 self.date_time = date_time # year, month, day, hour, min, sec
Guido van Rossum32abe6f2000-03-31 17:30:02 +000094 # Standard values:
Tim Peterse1190062001-01-15 03:34:38 +000095 self.compress_type = ZIP_STORED # Type of compression for the file
96 self.comment = "" # Comment for each file
97 self.extra = "" # ZIP extra data
98 self.create_system = 0 # System which created ZIP archive
99 self.create_version = 20 # Version which created ZIP archive
100 self.extract_version = 20 # Version needed to extract archive
101 self.reserved = 0 # Must be zero
102 self.flag_bits = 0 # ZIP flag bits
103 self.volume = 0 # Volume number of file header
104 self.internal_attr = 0 # Internal attributes
105 self.external_attr = 0 # External file attributes
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000106 # Other attributes are set by class ZipFile:
Tim Peterse1190062001-01-15 03:34:38 +0000107 # header_offset Byte offset to the file header
108 # file_offset Byte offset to the start of the file data
109 # CRC CRC-32 of the uncompressed file
110 # compress_size Size of the compressed file
111 # file_size Size of the uncompressed file
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000112
113 def FileHeader(self):
Fred Drake484d7352000-10-02 21:14:52 +0000114 """Return the per-file header as a string."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000115 dt = self.date_time
116 dosdate = (dt[0] - 1980) << 9 | dt[1] << 5 | dt[2]
117 dostime = dt[3] << 11 | dt[4] << 5 | dt[5] / 2
118 if self.flag_bits & 0x08:
Tim Peterse1190062001-01-15 03:34:38 +0000119 # Set these to zero because we write them after the file data
120 CRC = compress_size = file_size = 0
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000121 else:
Tim Peterse1190062001-01-15 03:34:38 +0000122 CRC = self.CRC
123 compress_size = self.compress_size
124 file_size = self.file_size
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000125 header = struct.pack(structFileHeader, stringFileHeader,
126 self.extract_version, self.reserved, self.flag_bits,
127 self.compress_type, dostime, dosdate, CRC,
128 compress_size, file_size,
129 len(self.filename), len(self.extra))
130 return header + self.filename + self.extra
131
132
133class ZipFile:
Tim Petersa19a1682001-03-29 04:36:09 +0000134 """ Class with methods to open, read, write, close, list zip files.
135
Fred Drake3d9091e2001-03-26 15:49:24 +0000136 z = ZipFile(file, mode="r", compression=ZIP_STORED)
Tim Petersa19a1682001-03-29 04:36:09 +0000137
Fred Drake3d9091e2001-03-26 15:49:24 +0000138 file: Either the path to the file, or a file-like object.
139 If it is a path, the file will be opened and closed by ZipFile.
140 mode: The mode can be either read "r", write "w" or append "a".
141 compression: ZIP_STORED (no compression) or ZIP_DEFLATED (requires zlib).
142 """
Fred Drake484d7352000-10-02 21:14:52 +0000143
Fred Drake90eac282001-02-28 05:29:34 +0000144 fp = None # Set here since __del__ checks it
145
Fred Drake3d9091e2001-03-26 15:49:24 +0000146 def __init__(self, file, mode="r", compression=ZIP_STORED):
Fred Drake484d7352000-10-02 21:14:52 +0000147 """Open the ZIP file with mode read "r", write "w" or append "a"."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000148 if compression == ZIP_STORED:
149 pass
150 elif compression == ZIP_DEFLATED:
151 if not zlib:
152 raise RuntimeError,\
Fred Drake5db246d2000-09-29 20:44:48 +0000153 "Compression requires the (missing) zlib module"
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000154 else:
155 raise RuntimeError, "That compression method is not supported"
Tim Peterse1190062001-01-15 03:34:38 +0000156 self.debug = 0 # Level of printing: 0 through 3
157 self.NameToInfo = {} # Find file info given name
158 self.filelist = [] # List of ZipInfo instances for archive
159 self.compression = compression # Method of compression
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000160 self.mode = key = mode[0]
Tim Petersa19a1682001-03-29 04:36:09 +0000161
Fred Drake3d9091e2001-03-26 15:49:24 +0000162 # Check if we were passed a file-like object
163 if type(file) in _STRING_TYPES:
164 self._filePassed = 0
165 self.filename = file
166 modeDict = {'r' : 'rb', 'w': 'wb', 'a' : 'r+b'}
167 self.fp = open(file, modeDict[mode])
168 else:
169 self._filePassed = 1
170 self.fp = file
171 self.filename = getattr(file, 'name', None)
Tim Petersa19a1682001-03-29 04:36:09 +0000172
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000173 if key == 'r':
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000174 self._GetContents()
175 elif key == 'w':
Fred Drake3d9091e2001-03-26 15:49:24 +0000176 pass
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000177 elif key == 'a':
Fred Drake3d9091e2001-03-26 15:49:24 +0000178 fp = self.fp
Tim Peterse1190062001-01-15 03:34:38 +0000179 fp.seek(-22, 2) # Seek to end-of-file record
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000180 endrec = fp.read()
181 if endrec[0:4] == stringEndArchive and \
182 endrec[-2:] == "\000\000":
Tim Peterse1190062001-01-15 03:34:38 +0000183 self._GetContents() # file is a zip file
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000184 # seek to start of directory and overwrite
185 fp.seek(self.start_dir, 0)
Tim Peterse1190062001-01-15 03:34:38 +0000186 else: # file is not a zip file, just append
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000187 fp.seek(0, 2)
188 else:
Tim Peters7d3bad62001-04-04 18:56:49 +0000189 if not self._filePassed:
190 self.fp.close()
191 self.fp = None
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000192 raise RuntimeError, 'Mode must be "r", "w" or "a"'
193
194 def _GetContents(self):
Tim Peters7d3bad62001-04-04 18:56:49 +0000195 """Read the directory, making sure we close the file if the format
196 is bad."""
197 try:
198 self._RealGetContents()
199 except BadZipfile:
200 if not self._filePassed:
201 self.fp.close()
202 self.fp = None
203 raise
204
205 def _RealGetContents(self):
Fred Drake484d7352000-10-02 21:14:52 +0000206 """Read in the table of contents for the ZIP file."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000207 fp = self.fp
Tim Peterse1190062001-01-15 03:34:38 +0000208 fp.seek(-22, 2) # Start of end-of-archive record
209 filesize = fp.tell() + 22 # Get file size
210 endrec = fp.read(22) # Archive must not end with a comment!
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000211 if endrec[0:4] != stringEndArchive or endrec[-2:] != "\000\000":
212 raise BadZipfile, "File is not a zip file, or ends with a comment"
213 endrec = struct.unpack(structEndArchive, endrec)
214 if self.debug > 1:
215 print endrec
Tim Peterse1190062001-01-15 03:34:38 +0000216 size_cd = endrec[5] # bytes in central directory
217 offset_cd = endrec[6] # offset of central directory
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000218 x = filesize - 22 - size_cd
219 # "concat" is zero, unless zip was concatenated to another file
220 concat = x - offset_cd
221 if self.debug > 2:
222 print "given, inferred, offset", offset_cd, x, concat
223 # self.start_dir: Position of start of central directory
224 self.start_dir = offset_cd + concat
225 fp.seek(self.start_dir, 0)
226 total = 0
227 while total < size_cd:
228 centdir = fp.read(46)
229 total = total + 46
230 if centdir[0:4] != stringCentralDir:
231 raise BadZipfile, "Bad magic number for central directory"
232 centdir = struct.unpack(structCentralDir, centdir)
233 if self.debug > 2:
234 print centdir
Fred Drake3e038e52001-02-28 17:56:26 +0000235 filename = fp.read(centdir[_CD_FILENAME_LENGTH])
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000236 # Create ZipInfo instance to store file information
237 x = ZipInfo(filename)
Fred Drake3e038e52001-02-28 17:56:26 +0000238 x.extra = fp.read(centdir[_CD_EXTRA_FIELD_LENGTH])
239 x.comment = fp.read(centdir[_CD_COMMENT_LENGTH])
240 total = (total + centdir[_CD_FILENAME_LENGTH]
241 + centdir[_CD_EXTRA_FIELD_LENGTH]
242 + centdir[_CD_COMMENT_LENGTH])
243 x.header_offset = centdir[_CD_LOCAL_HEADER_OFFSET] + concat
244 # file_offset must be computed below...
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000245 (x.create_version, x.create_system, x.extract_version, x.reserved,
246 x.flag_bits, x.compress_type, t, d,
247 x.CRC, x.compress_size, x.file_size) = centdir[1:12]
248 x.volume, x.internal_attr, x.external_attr = centdir[15:18]
249 # Convert date/time code to (year, month, day, hour, min, sec)
250 x.date_time = ( (d>>9)+1980, (d>>5)&0xF, d&0x1F,
Fred Drake414ca662000-06-13 18:49:53 +0000251 t>>11, (t>>5)&0x3F, (t&0x1F) * 2 )
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000252 self.filelist.append(x)
253 self.NameToInfo[x.filename] = x
254 if self.debug > 2:
255 print "total", total
256 for data in self.filelist:
257 fp.seek(data.header_offset, 0)
258 fheader = fp.read(30)
259 if fheader[0:4] != stringFileHeader:
260 raise BadZipfile, "Bad magic number for file header"
261 fheader = struct.unpack(structFileHeader, fheader)
Fred Drake3e038e52001-02-28 17:56:26 +0000262 # file_offset is computed here, since the extra field for
263 # the central directory and for the local file header
264 # refer to different fields, and they can have different
265 # lengths
266 data.file_offset = (data.header_offset + 30
267 + fheader[_FH_FILENAME_LENGTH]
268 + fheader[_FH_EXTRA_FIELD_LENGTH])
269 fname = fp.read(fheader[_FH_FILENAME_LENGTH])
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000270 if fname != data.filename:
271 raise RuntimeError, \
Fred Drake5db246d2000-09-29 20:44:48 +0000272 'File name in directory "%s" and header "%s" differ.' % (
273 data.filename, fname)
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000274
275 def namelist(self):
Fred Drake484d7352000-10-02 21:14:52 +0000276 """Return a list of file names in the archive."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000277 l = []
278 for data in self.filelist:
279 l.append(data.filename)
280 return l
281
282 def infolist(self):
Fred Drake484d7352000-10-02 21:14:52 +0000283 """Return a list of class ZipInfo instances for files in the
284 archive."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000285 return self.filelist
286
287 def printdir(self):
Fred Drake484d7352000-10-02 21:14:52 +0000288 """Print a table of contents for the zip file."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000289 print "%-46s %19s %12s" % ("File Name", "Modified ", "Size")
290 for zinfo in self.filelist:
291 date = "%d-%02d-%02d %02d:%02d:%02d" % zinfo.date_time
292 print "%-46s %s %12d" % (zinfo.filename, date, zinfo.file_size)
293
294 def testzip(self):
Fred Drake484d7352000-10-02 21:14:52 +0000295 """Read all the files and check the CRC."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000296 for zinfo in self.filelist:
297 try:
Tim Peterse1190062001-01-15 03:34:38 +0000298 self.read(zinfo.filename) # Check CRC-32
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000299 except:
300 return zinfo.filename
301
302 def getinfo(self, name):
Fred Drake484d7352000-10-02 21:14:52 +0000303 """Return the instance of ZipInfo given 'name'."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000304 return self.NameToInfo[name]
305
306 def read(self, name):
Fred Drake484d7352000-10-02 21:14:52 +0000307 """Return file bytes (as a string) for name."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000308 if self.mode not in ("r", "a"):
309 raise RuntimeError, 'read() requires mode "r" or "a"'
310 if not self.fp:
311 raise RuntimeError, \
Fred Drake5db246d2000-09-29 20:44:48 +0000312 "Attempt to read ZIP archive that was already closed"
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000313 zinfo = self.getinfo(name)
314 filepos = self.fp.tell()
315 self.fp.seek(zinfo.file_offset, 0)
316 bytes = self.fp.read(zinfo.compress_size)
317 self.fp.seek(filepos, 0)
318 if zinfo.compress_type == ZIP_STORED:
319 pass
320 elif zinfo.compress_type == ZIP_DEFLATED:
321 if not zlib:
322 raise RuntimeError, \
Fred Drake5db246d2000-09-29 20:44:48 +0000323 "De-compression requires the (missing) zlib module"
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000324 # zlib compress/decompress code by Jeremy Hylton of CNRI
325 dc = zlib.decompressobj(-15)
326 bytes = dc.decompress(bytes)
327 # need to feed in unused pad byte so that zlib won't choke
328 ex = dc.decompress('Z') + dc.flush()
329 if ex:
330 bytes = bytes + ex
331 else:
332 raise BadZipfile, \
Fred Drake5db246d2000-09-29 20:44:48 +0000333 "Unsupported compression method %d for file %s" % \
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000334 (zinfo.compress_type, name)
335 crc = binascii.crc32(bytes)
336 if crc != zinfo.CRC:
337 raise BadZipfile, "Bad CRC-32 for file %s" % name
338 return bytes
339
340 def _writecheck(self, zinfo):
Fred Drake484d7352000-10-02 21:14:52 +0000341 """Check for errors before writing a file to the archive."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000342 if self.NameToInfo.has_key(zinfo.filename):
Tim Peterse1190062001-01-15 03:34:38 +0000343 if self.debug: # Warning for duplicate names
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000344 print "Duplicate name:", zinfo.filename
345 if self.mode not in ("w", "a"):
346 raise RuntimeError, 'write() requires mode "w" or "a"'
347 if not self.fp:
348 raise RuntimeError, \
Fred Drake5db246d2000-09-29 20:44:48 +0000349 "Attempt to write ZIP archive that was already closed"
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000350 if zinfo.compress_type == ZIP_DEFLATED and not zlib:
351 raise RuntimeError, \
Fred Drake5db246d2000-09-29 20:44:48 +0000352 "Compression requires the (missing) zlib module"
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000353 if zinfo.compress_type not in (ZIP_STORED, ZIP_DEFLATED):
354 raise RuntimeError, \
Fred Drake5db246d2000-09-29 20:44:48 +0000355 "That compression method is not supported"
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000356
357 def write(self, filename, arcname=None, compress_type=None):
Fred Drake484d7352000-10-02 21:14:52 +0000358 """Put the bytes from filename into the archive under the name
359 arcname."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000360 st = os.stat(filename)
361 mtime = time.localtime(st[8])
362 date_time = mtime[0:6]
363 # Create ZipInfo instance to store file information
364 if arcname is None:
Tim Peterse1190062001-01-15 03:34:38 +0000365 zinfo = ZipInfo(filename, date_time)
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000366 else:
Tim Peterse1190062001-01-15 03:34:38 +0000367 zinfo = ZipInfo(arcname, date_time)
368 zinfo.external_attr = st[0] << 16 # Unix attributes
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000369 if compress_type is None:
Tim Peterse1190062001-01-15 03:34:38 +0000370 zinfo.compress_type = self.compression
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000371 else:
Tim Peterse1190062001-01-15 03:34:38 +0000372 zinfo.compress_type = compress_type
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000373 self._writecheck(zinfo)
374 fp = open(filename, "rb")
375 zinfo.flag_bits = 0x08
Tim Peterse1190062001-01-15 03:34:38 +0000376 zinfo.header_offset = self.fp.tell() # Start of header bytes
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000377 self.fp.write(zinfo.FileHeader())
Tim Peterse1190062001-01-15 03:34:38 +0000378 zinfo.file_offset = self.fp.tell() # Start of file bytes
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000379 CRC = 0
380 compress_size = 0
381 file_size = 0
382 if zinfo.compress_type == ZIP_DEFLATED:
383 cmpr = zlib.compressobj(zlib.Z_DEFAULT_COMPRESSION,
384 zlib.DEFLATED, -15)
385 else:
386 cmpr = None
387 while 1:
388 buf = fp.read(1024 * 8)
389 if not buf:
390 break
391 file_size = file_size + len(buf)
392 CRC = binascii.crc32(buf, CRC)
393 if cmpr:
394 buf = cmpr.compress(buf)
395 compress_size = compress_size + len(buf)
396 self.fp.write(buf)
397 fp.close()
398 if cmpr:
399 buf = cmpr.flush()
400 compress_size = compress_size + len(buf)
401 self.fp.write(buf)
402 zinfo.compress_size = compress_size
403 else:
404 zinfo.compress_size = file_size
405 zinfo.CRC = CRC
406 zinfo.file_size = file_size
407 # Write CRC and file sizes after the file data
408 self.fp.write(struct.pack("<lll", zinfo.CRC, zinfo.compress_size,
409 zinfo.file_size))
410 self.filelist.append(zinfo)
411 self.NameToInfo[zinfo.filename] = zinfo
412
413 def writestr(self, zinfo, bytes):
Fred Drake484d7352000-10-02 21:14:52 +0000414 """Write a file into the archive. The contents is the string
415 'bytes'."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000416 self._writecheck(zinfo)
Tim Peterse1190062001-01-15 03:34:38 +0000417 zinfo.file_size = len(bytes) # Uncompressed size
418 zinfo.CRC = binascii.crc32(bytes) # CRC-32 checksum
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000419 if zinfo.compress_type == ZIP_DEFLATED:
420 co = zlib.compressobj(zlib.Z_DEFAULT_COMPRESSION,
421 zlib.DEFLATED, -15)
422 bytes = co.compress(bytes) + co.flush()
Tim Peterse1190062001-01-15 03:34:38 +0000423 zinfo.compress_size = len(bytes) # Compressed size
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000424 else:
425 zinfo.compress_size = zinfo.file_size
Tim Peterse1190062001-01-15 03:34:38 +0000426 zinfo.header_offset = self.fp.tell() # Start of header bytes
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000427 self.fp.write(zinfo.FileHeader())
Tim Peterse1190062001-01-15 03:34:38 +0000428 zinfo.file_offset = self.fp.tell() # Start of file bytes
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000429 self.fp.write(bytes)
430 if zinfo.flag_bits & 0x08:
Tim Peterse1190062001-01-15 03:34:38 +0000431 # Write CRC and file sizes after the file data
432 self.fp.write(struct.pack("<lll", zinfo.CRC, zinfo.compress_size,
433 zinfo.file_size))
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000434 self.filelist.append(zinfo)
435 self.NameToInfo[zinfo.filename] = zinfo
436
437 def __del__(self):
Fred Drake484d7352000-10-02 21:14:52 +0000438 """Call the "close()" method in case the user forgot."""
Fred Drake3d9091e2001-03-26 15:49:24 +0000439 if self.fp and not self._filePassed:
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000440 self.fp.close()
441 self.fp = None
442
443 def close(self):
Fred Drake484d7352000-10-02 21:14:52 +0000444 """Close the file, and for mode "w" and "a" write the ending
445 records."""
Tim Peterse1190062001-01-15 03:34:38 +0000446 if self.mode in ("w", "a"): # write ending records
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000447 count = 0
448 pos1 = self.fp.tell()
Tim Peterse1190062001-01-15 03:34:38 +0000449 for zinfo in self.filelist: # write central directory
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000450 count = count + 1
451 dt = zinfo.date_time
452 dosdate = (dt[0] - 1980) << 9 | dt[1] << 5 | dt[2]
453 dostime = dt[3] << 11 | dt[4] << 5 | dt[5] / 2
454 centdir = struct.pack(structCentralDir,
455 stringCentralDir, zinfo.create_version,
456 zinfo.create_system, zinfo.extract_version, zinfo.reserved,
457 zinfo.flag_bits, zinfo.compress_type, dostime, dosdate,
458 zinfo.CRC, zinfo.compress_size, zinfo.file_size,
459 len(zinfo.filename), len(zinfo.extra), len(zinfo.comment),
460 0, zinfo.internal_attr, zinfo.external_attr,
461 zinfo.header_offset)
462 self.fp.write(centdir)
463 self.fp.write(zinfo.filename)
464 self.fp.write(zinfo.extra)
465 self.fp.write(zinfo.comment)
466 pos2 = self.fp.tell()
467 # Write end-of-zip-archive record
468 endrec = struct.pack(structEndArchive, stringEndArchive,
469 0, 0, count, count, pos2 - pos1, pos1, 0)
470 self.fp.write(endrec)
Guido van Rossumf85af612001-04-14 16:45:14 +0000471 self.fp.flush()
Fred Drake3d9091e2001-03-26 15:49:24 +0000472 if not self._filePassed:
473 self.fp.close()
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000474 self.fp = None
475
476
477class PyZipFile(ZipFile):
Fred Drake484d7352000-10-02 21:14:52 +0000478 """Class to create ZIP archives with Python library files and packages."""
479
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000480 def writepy(self, pathname, basename = ""):
481 """Add all files from "pathname" to the ZIP archive.
482
Fred Drake484d7352000-10-02 21:14:52 +0000483 If pathname is a package directory, search the directory and
484 all package subdirectories recursively for all *.py and enter
485 the modules into the archive. If pathname is a plain
486 directory, listdir *.py and enter all modules. Else, pathname
487 must be a Python *.py file and the module will be put into the
488 archive. Added modules are always module.pyo or module.pyc.
489 This method will compile the module.py into module.pyc if
490 necessary.
491 """
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000492 dir, name = os.path.split(pathname)
493 if os.path.isdir(pathname):
494 initname = os.path.join(pathname, "__init__.py")
495 if os.path.isfile(initname):
496 # This is a package directory, add it
497 if basename:
498 basename = "%s/%s" % (basename, name)
499 else:
500 basename = name
501 if self.debug:
502 print "Adding package in", pathname, "as", basename
503 fname, arcname = self._get_codename(initname[0:-3], basename)
504 if self.debug:
505 print "Adding", arcname
506 self.write(fname, arcname)
507 dirlist = os.listdir(pathname)
508 dirlist.remove("__init__.py")
509 # Add all *.py files and package subdirectories
510 for filename in dirlist:
511 path = os.path.join(pathname, filename)
512 root, ext = os.path.splitext(filename)
513 if os.path.isdir(path):
514 if os.path.isfile(os.path.join(path, "__init__.py")):
515 # This is a package directory, add it
516 self.writepy(path, basename) # Recursive call
517 elif ext == ".py":
518 fname, arcname = self._get_codename(path[0:-3],
519 basename)
520 if self.debug:
521 print "Adding", arcname
522 self.write(fname, arcname)
523 else:
524 # This is NOT a package directory, add its files at top level
525 if self.debug:
526 print "Adding files from directory", pathname
527 for filename in os.listdir(pathname):
528 path = os.path.join(pathname, filename)
529 root, ext = os.path.splitext(filename)
530 if ext == ".py":
531 fname, arcname = self._get_codename(path[0:-3],
532 basename)
533 if self.debug:
534 print "Adding", arcname
535 self.write(fname, arcname)
536 else:
537 if pathname[-3:] != ".py":
538 raise RuntimeError, \
Fred Drake5db246d2000-09-29 20:44:48 +0000539 'Files added with writepy() must end with ".py"'
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000540 fname, arcname = self._get_codename(pathname[0:-3], basename)
541 if self.debug:
542 print "Adding file", arcname
543 self.write(fname, arcname)
544
545 def _get_codename(self, pathname, basename):
546 """Return (filename, archivename) for the path.
547
Fred Drake484d7352000-10-02 21:14:52 +0000548 Given a module name path, return the correct file path and
549 archive name, compiling if necessary. For example, given
550 /python/lib/string, return (/python/lib/string.pyc, string).
551 """
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000552 file_py = pathname + ".py"
553 file_pyc = pathname + ".pyc"
554 file_pyo = pathname + ".pyo"
555 if os.path.isfile(file_pyo) and \
556 os.stat(file_pyo)[8] >= os.stat(file_py)[8]:
Tim Peterse1190062001-01-15 03:34:38 +0000557 fname = file_pyo # Use .pyo file
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000558 elif not os.path.isfile(file_pyc) or \
Fred Drake484d7352000-10-02 21:14:52 +0000559 os.stat(file_pyc)[8] < os.stat(file_py)[8]:
560 import py_compile
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000561 if self.debug:
562 print "Compiling", file_py
563 py_compile.compile(file_py, file_pyc)
564 fname = file_pyc
565 else:
566 fname = file_pyc
567 archivename = os.path.split(fname)[1]
568 if basename:
569 archivename = "%s/%s" % (basename, archivename)
570 return (fname, archivename)