blob: 7659ce4333109f12c271bcb7f0eeb749619daba4 [file] [log] [blame]
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001"Read and write ZIP files"
2# Written by James C. Ahlstrom jim@interet.com
3# All rights transferred to CNRI pursuant to the Python contribution agreement
4
5import struct, os, time
6import binascii, py_compile
7
8try:
9 import zlib # We may need its compression method
10except:
11 zlib = None
12
13class _BadZipfile(Exception):
14 pass
15error = _BadZipfile # The exception raised by this module
16
17# constants for Zip file compression methods
18ZIP_STORED = 0
19ZIP_DEFLATED = 8
20# Other ZIP compression methods not supported
21
22# Here are some struct module formats for reading headers
23structEndArchive = "<4s4H2lH" # 9 items, end of archive, 22 bytes
24stringEndArchive = "PK\005\006" # magic number for end of archive record
25structCentralDir = "<4s4B4H3l5H2l"# 19 items, central directory, 46 bytes
26stringCentralDir = "PK\001\002" # magic number for central directory
27structFileHeader = "<4s2B4H3l2H" # 12 items, file header record, 30 bytes
28stringFileHeader = "PK\003\004" # magic number for file header
29
30def is_zipfile(filename):
31 """Quickly see if file is a ZIP file by checking the magic number.
32
33Will not accept a ZIP archive with an ending comment."""
34 try:
35 fpin = open(filename, "rb")
36 fpin.seek(-22, 2) # Seek to end-of-file record
37 endrec = fpin.read()
38 fpin.close()
39 if endrec[0:4] == "PK\005\006" and endrec[-2:] == "\000\000":
40 return 1 # file has correct magic number
41 except:
42 pass
43
44class ZipInfo:
45 "Class with attributes describing each file in the ZIP archive"
46 def __init__(self, filename="NoName", date_time=(1980,1,1,0,0,0)):
47 self.filename = filename # Name of the file in the archive
48 self.date_time = date_time # year, month, day, hour, min, sec
49 # Standard values:
50 self.compress_type = ZIP_STORED # Type of compression for the file
51 self.comment = "" # Comment for each file
52 self.extra = "" # ZIP extra data
53 self.create_system = 0 # System which created ZIP archive
54 self.create_version = 20 # Version which created ZIP archive
55 self.extract_version = 20 # Version needed to extract archive
56 self.reserved = 0 # Must be zero
57 self.flag_bits = 0 # ZIP flag bits
58 self.volume = 0 # Volume number of file header
59 self.internal_attr = 0 # Internal attributes
60 self.external_attr = 0 # External file attributes
61 # Other attributes are set by class ZipFile:
62 # header_offset Byte offset to the file header
63 # file_offset Byte offset to the start of the file data
64 # CRC CRC-32 of the uncompressed file
65 # compress_size Size of the compressed file
66 # file_size Size of the uncompressed file
67
68 def FileHeader(self):
69 'Return the per-file header as a string'
70 dt = self.date_time
71 dosdate = (dt[0] - 1980) << 9 | dt[1] << 5 | dt[2]
72 dostime = dt[3] << 11 | dt[4] << 5 | dt[5] / 2
73 if self.flag_bits & 0x08:
74 # Set these to zero because we write them after the file data
75 CRC = compress_size = file_size = 0
76 else:
77 CRC = self.CRC
78 compress_size = self.compress_size
79 file_size = self.file_size
80 header = struct.pack(structFileHeader, stringFileHeader,
81 self.extract_version, self.reserved, self.flag_bits,
82 self.compress_type, dostime, dosdate, CRC,
83 compress_size, file_size,
84 len(self.filename), len(self.extra))
85 return header + self.filename + self.extra
86
87
88class ZipFile:
89 "Class with methods to open, read, write, close, list zip files"
90 def __init__(self, filename, mode="r", compression=ZIP_STORED):
91 'Open the ZIP file with mode read "r", write "w" or append "a".'
92 if compression == ZIP_STORED:
93 pass
94 elif compression == ZIP_DEFLATED:
95 if not zlib:
96 raise RuntimeError,\
97 "Compression requires the (missing) zlib module"
98 else:
99 raise RuntimeError, "That compression method is not supported"
100 self.debug = 0 # Level of printing: 0 through 3
101 self.NameToInfo = {} # Find file info given name
102 self.filelist = [] # List of ZipInfo instances for archive
103 self.compression = compression # Method of compression
104 self.filename = filename
105 self.mode = key = mode[0]
106 if key == 'r':
107 self.fp = open(filename, "rb")
108 self._GetContents()
109 elif key == 'w':
110 self.fp = open(filename, "wb")
111 elif key == 'a':
112 fp = self.fp = open(filename, "r+b")
113 fp.seek(-22, 2) # Seek to end-of-file record
114 endrec = fp.read()
115 if endrec[0:4] == stringEndArchive and \
116 endrec[-2:] == "\000\000":
117 self._GetContents() # file is a zip file
118 # seek to start of directory and overwrite
119 fp.seek(self.start_dir, 0)
120 else: # file is not a zip file, just append
121 fp.seek(0, 2)
122 else:
123 raise RuntimeError, 'Mode must be "r", "w" or "a"'
124
125 def _GetContents(self):
126 "Read in the table of contents for the zip file"
127 fp = self.fp
128 fp.seek(-22, 2) # Start of end-of-archive record
129 filesize = fp.tell() + 22 # Get file size
130 endrec = fp.read(22) # Archive must not end with a comment!
131 if endrec[0:4] != stringEndArchive or endrec[-2:] != "\000\000":
132 raise BadZipfile, "File is not a zip file, or ends with a comment"
133 endrec = struct.unpack(structEndArchive, endrec)
134 if self.debug > 1:
135 print endrec
136 size_cd = endrec[5] # bytes in central directory
137 offset_cd = endrec[6] # offset of central directory
138 x = filesize - 22 - size_cd
139 # "concat" is zero, unless zip was concatenated to another file
140 concat = x - offset_cd
141 if self.debug > 2:
142 print "given, inferred, offset", offset_cd, x, concat
143 # self.start_dir: Position of start of central directory
144 self.start_dir = offset_cd + concat
145 fp.seek(self.start_dir, 0)
146 total = 0
147 while total < size_cd:
148 centdir = fp.read(46)
149 total = total + 46
150 if centdir[0:4] != stringCentralDir:
151 raise BadZipfile, "Bad magic number for central directory"
152 centdir = struct.unpack(structCentralDir, centdir)
153 if self.debug > 2:
154 print centdir
155 filename = fp.read(centdir[12])
156 # Create ZipInfo instance to store file information
157 x = ZipInfo(filename)
158 x.extra = fp.read(centdir[13])
159 x.comment = fp.read(centdir[14])
160 total = total + centdir[12] + centdir[13] + centdir[14]
161 x.header_offset = centdir[18] + concat
162 x.file_offset = x.header_offset + 30 + centdir[12] + centdir[13]
163 (x.create_version, x.create_system, x.extract_version, x.reserved,
164 x.flag_bits, x.compress_type, t, d,
165 x.CRC, x.compress_size, x.file_size) = centdir[1:12]
166 x.volume, x.internal_attr, x.external_attr = centdir[15:18]
167 # Convert date/time code to (year, month, day, hour, min, sec)
168 x.date_time = ( (d>>9)+1980, (d>>5)&0xF, d&0x1F,
169 t>>11, (t>>5)&0x3F, t&0x1F * 2 )
170 self.filelist.append(x)
171 self.NameToInfo[x.filename] = x
172 if self.debug > 2:
173 print "total", total
174 for data in self.filelist:
175 fp.seek(data.header_offset, 0)
176 fheader = fp.read(30)
177 if fheader[0:4] != stringFileHeader:
178 raise BadZipfile, "Bad magic number for file header"
179 fheader = struct.unpack(structFileHeader, fheader)
180 fname = fp.read(fheader[10])
181 if fname != data.filename:
182 raise RuntimeError, \
183 'File name in Central Directory "%s" and File Header "%s" differ.' % (
184 data.filename, fname)
185
186 def namelist(self):
187 "Return a list of file names in the archive"
188 l = []
189 for data in self.filelist:
190 l.append(data.filename)
191 return l
192
193 def infolist(self):
194 "Return a list of class ZipInfo instances for files in the archive"
195 return self.filelist
196
197 def printdir(self):
198 "Print a table of contents for the zip file"
199 print "%-46s %19s %12s" % ("File Name", "Modified ", "Size")
200 for zinfo in self.filelist:
201 date = "%d-%02d-%02d %02d:%02d:%02d" % zinfo.date_time
202 print "%-46s %s %12d" % (zinfo.filename, date, zinfo.file_size)
203
204 def testzip(self):
205 "Read all the files and check the CRC"
206 for zinfo in self.filelist:
207 try:
208 self.read(zinfo.filename) # Check CRC-32
209 except:
210 return zinfo.filename
211
212 def getinfo(self, name):
213 'Return the instance of ZipInfo given "name"'
214 return self.NameToInfo[name]
215
216 def read(self, name):
217 "Return file bytes (as a string) for name"
218 if self.mode not in ("r", "a"):
219 raise RuntimeError, 'read() requires mode "r" or "a"'
220 if not self.fp:
221 raise RuntimeError, \
222 "Attempt to read ZIP archive that was already closed"
223 zinfo = self.getinfo(name)
224 filepos = self.fp.tell()
225 self.fp.seek(zinfo.file_offset, 0)
226 bytes = self.fp.read(zinfo.compress_size)
227 self.fp.seek(filepos, 0)
228 if zinfo.compress_type == ZIP_STORED:
229 pass
230 elif zinfo.compress_type == ZIP_DEFLATED:
231 if not zlib:
232 raise RuntimeError, \
233 "De-compression requires the (missing) zlib module"
234 # zlib compress/decompress code by Jeremy Hylton of CNRI
235 dc = zlib.decompressobj(-15)
236 bytes = dc.decompress(bytes)
237 # need to feed in unused pad byte so that zlib won't choke
238 ex = dc.decompress('Z') + dc.flush()
239 if ex:
240 bytes = bytes + ex
241 else:
242 raise BadZipfile, \
243 "Unsupported compression method %d for file %s" % \
244 (zinfo.compress_type, name)
245 crc = binascii.crc32(bytes)
246 if crc != zinfo.CRC:
247 raise BadZipfile, "Bad CRC-32 for file %s" % name
248 return bytes
249
250 def _writecheck(self, zinfo):
251 'Check for errors before writing a file to the archive'
252 if self.NameToInfo.has_key(zinfo.filename):
253 if self.debug: # Warning for duplicate names
254 print "Duplicate name:", zinfo.filename
255 if self.mode not in ("w", "a"):
256 raise RuntimeError, 'write() requires mode "w" or "a"'
257 if not self.fp:
258 raise RuntimeError, \
259 "Attempt to write ZIP archive that was already closed"
260 if zinfo.compress_type == ZIP_DEFLATED and not zlib:
261 raise RuntimeError, \
262 "Compression requires the (missing) zlib module"
263 if zinfo.compress_type not in (ZIP_STORED, ZIP_DEFLATED):
264 raise RuntimeError, \
265 "That compression method is not supported"
266
267 def write(self, filename, arcname=None, compress_type=None):
268 'Put the bytes from filename into the archive under the name arcname.'
269 st = os.stat(filename)
270 mtime = time.localtime(st[8])
271 date_time = mtime[0:6]
272 # Create ZipInfo instance to store file information
273 if arcname is None:
274 zinfo = ZipInfo(filename, date_time)
275 else:
276 zinfo = ZipInfo(arcname, date_time)
277 zinfo.external_attr = st[0] << 16 # Unix attributes
278 if compress_type is None:
279 zinfo.compress_type = self.compression
280 else:
281 zinfo.compress_type = compress_type
282 self._writecheck(zinfo)
283 fp = open(filename, "rb")
284 zinfo.flag_bits = 0x08
285 zinfo.header_offset = self.fp.tell() # Start of header bytes
286 self.fp.write(zinfo.FileHeader())
287 zinfo.file_offset = self.fp.tell() # Start of file bytes
288 CRC = 0
289 compress_size = 0
290 file_size = 0
291 if zinfo.compress_type == ZIP_DEFLATED:
292 cmpr = zlib.compressobj(zlib.Z_DEFAULT_COMPRESSION,
293 zlib.DEFLATED, -15)
294 else:
295 cmpr = None
296 while 1:
297 buf = fp.read(1024 * 8)
298 if not buf:
299 break
300 file_size = file_size + len(buf)
301 CRC = binascii.crc32(buf, CRC)
302 if cmpr:
303 buf = cmpr.compress(buf)
304 compress_size = compress_size + len(buf)
305 self.fp.write(buf)
306 fp.close()
307 if cmpr:
308 buf = cmpr.flush()
309 compress_size = compress_size + len(buf)
310 self.fp.write(buf)
311 zinfo.compress_size = compress_size
312 else:
313 zinfo.compress_size = file_size
314 zinfo.CRC = CRC
315 zinfo.file_size = file_size
316 # Write CRC and file sizes after the file data
317 self.fp.write(struct.pack("<lll", zinfo.CRC, zinfo.compress_size,
318 zinfo.file_size))
319 self.filelist.append(zinfo)
320 self.NameToInfo[zinfo.filename] = zinfo
321
322 def writestr(self, zinfo, bytes):
323 'Write a file into the archive. The contents is the string "bytes"'
324 self._writecheck(zinfo)
325 zinfo.file_size = len(bytes) # Uncompressed size
326 zinfo.CRC = binascii.crc32(bytes) # CRC-32 checksum
327 if zinfo.compress_type == ZIP_DEFLATED:
328 co = zlib.compressobj(zlib.Z_DEFAULT_COMPRESSION,
329 zlib.DEFLATED, -15)
330 bytes = co.compress(bytes) + co.flush()
331 zinfo.compress_size = len(bytes) # Compressed size
332 else:
333 zinfo.compress_size = zinfo.file_size
334 zinfo.header_offset = self.fp.tell() # Start of header bytes
335 self.fp.write(zinfo.FileHeader())
336 zinfo.file_offset = self.fp.tell() # Start of file bytes
337 self.fp.write(bytes)
338 if zinfo.flag_bits & 0x08:
339 # Write CRC and file sizes after the file data
340 self.fp.write(struct.pack("<lll", zinfo.CRC, zinfo.compress_size,
341 zinfo.file_size))
342 self.filelist.append(zinfo)
343 self.NameToInfo[zinfo.filename] = zinfo
344
345 def __del__(self):
346 'Call the "close()" method in case the user forgot'
347 if self.fp:
348 self.fp.close()
349 self.fp = None
350
351 def close(self):
352 'Close the file, and for mode "w" and "a" write the ending records'
353 if self.mode in ("w", "a"): # write ending records
354 count = 0
355 pos1 = self.fp.tell()
356 for zinfo in self.filelist: # write central directory
357 count = count + 1
358 dt = zinfo.date_time
359 dosdate = (dt[0] - 1980) << 9 | dt[1] << 5 | dt[2]
360 dostime = dt[3] << 11 | dt[4] << 5 | dt[5] / 2
361 centdir = struct.pack(structCentralDir,
362 stringCentralDir, zinfo.create_version,
363 zinfo.create_system, zinfo.extract_version, zinfo.reserved,
364 zinfo.flag_bits, zinfo.compress_type, dostime, dosdate,
365 zinfo.CRC, zinfo.compress_size, zinfo.file_size,
366 len(zinfo.filename), len(zinfo.extra), len(zinfo.comment),
367 0, zinfo.internal_attr, zinfo.external_attr,
368 zinfo.header_offset)
369 self.fp.write(centdir)
370 self.fp.write(zinfo.filename)
371 self.fp.write(zinfo.extra)
372 self.fp.write(zinfo.comment)
373 pos2 = self.fp.tell()
374 # Write end-of-zip-archive record
375 endrec = struct.pack(structEndArchive, stringEndArchive,
376 0, 0, count, count, pos2 - pos1, pos1, 0)
377 self.fp.write(endrec)
378 self.fp.close()
379 self.fp = None
380
381
382class PyZipFile(ZipFile):
383 "Class to create ZIP archives with Python library files and packages"
384 def writepy(self, pathname, basename = ""):
385 """Add all files from "pathname" to the ZIP archive.
386
387If pathname is a package directory, search the directory and all
388package subdirectories recursively for all *.py and enter the modules into
389the archive. If pathname is a plain directory, listdir *.py and enter all
390modules. Else, pathname must be a Python *.py file and the module will be
391put into the archive. Added modules are always module.pyo or module.pyc.
392This method will compile the module.py into module.pyc if necessary."""
393 dir, name = os.path.split(pathname)
394 if os.path.isdir(pathname):
395 initname = os.path.join(pathname, "__init__.py")
396 if os.path.isfile(initname):
397 # This is a package directory, add it
398 if basename:
399 basename = "%s/%s" % (basename, name)
400 else:
401 basename = name
402 if self.debug:
403 print "Adding package in", pathname, "as", basename
404 fname, arcname = self._get_codename(initname[0:-3], basename)
405 if self.debug:
406 print "Adding", arcname
407 self.write(fname, arcname)
408 dirlist = os.listdir(pathname)
409 dirlist.remove("__init__.py")
410 # Add all *.py files and package subdirectories
411 for filename in dirlist:
412 path = os.path.join(pathname, filename)
413 root, ext = os.path.splitext(filename)
414 if os.path.isdir(path):
415 if os.path.isfile(os.path.join(path, "__init__.py")):
416 # This is a package directory, add it
417 self.writepy(path, basename) # Recursive call
418 elif ext == ".py":
419 fname, arcname = self._get_codename(path[0:-3],
420 basename)
421 if self.debug:
422 print "Adding", arcname
423 self.write(fname, arcname)
424 else:
425 # This is NOT a package directory, add its files at top level
426 if self.debug:
427 print "Adding files from directory", pathname
428 for filename in os.listdir(pathname):
429 path = os.path.join(pathname, filename)
430 root, ext = os.path.splitext(filename)
431 if ext == ".py":
432 fname, arcname = self._get_codename(path[0:-3],
433 basename)
434 if self.debug:
435 print "Adding", arcname
436 self.write(fname, arcname)
437 else:
438 if pathname[-3:] != ".py":
439 raise RuntimeError, \
440 'Files added with writepy() must end with ".py"'
441 fname, arcname = self._get_codename(pathname[0:-3], basename)
442 if self.debug:
443 print "Adding file", arcname
444 self.write(fname, arcname)
445
446 def _get_codename(self, pathname, basename):
447 """Return (filename, archivename) for the path.
448
449Given a module name path, return the correct file path and archive name,
450compiling if necessary. For example, given /python/lib/string,
451return (/python/lib/string.pyc, string)"""
452 file_py = pathname + ".py"
453 file_pyc = pathname + ".pyc"
454 file_pyo = pathname + ".pyo"
455 if os.path.isfile(file_pyo) and \
456 os.stat(file_pyo)[8] >= os.stat(file_py)[8]:
457 fname = file_pyo # Use .pyo file
458 elif not os.path.isfile(file_pyc) or \
459 os.stat(file_pyc)[8] < os.stat(file_py)[8]:
460 if self.debug:
461 print "Compiling", file_py
462 py_compile.compile(file_py, file_pyc)
463 fname = file_pyc
464 else:
465 fname = file_pyc
466 archivename = os.path.split(fname)[1]
467 if basename:
468 archivename = "%s/%s" % (basename, archivename)
469 return (fname, archivename)
470