blob: c5c9c08fec625563b377383d9102ac5ce8707f38 [file] [log] [blame]
Tor Norbye3a2425a2013-11-04 10:16:08 -08001"""
2Read and write ZIP files.
3"""
4import struct, os, time, sys
5import binascii, cStringIO
6
7try:
8 import zlib # We may need its compression method
9except ImportError:
10 zlib = None
11
12__all__ = ["BadZipfile", "error", "ZIP_STORED", "ZIP_DEFLATED", "is_zipfile",
13 "ZipInfo", "ZipFile", "PyZipFile", "LargeZipFile" ]
14
15is_jython = sys.platform.startswith('java')
16
17class BadZipfile(Exception):
18 pass
19
20
21class LargeZipFile(Exception):
22 """
23 Raised when writing a zipfile, the zipfile requires ZIP64 extensions
24 and those extensions are disabled.
25 """
26
27error = BadZipfile # The exception raised by this module
28
29ZIP64_LIMIT= (1 << 31) - 1
30
31# constants for Zip file compression methods
32ZIP_STORED = 0
33ZIP_DEFLATED = 8
34# Other ZIP compression methods not supported
35
36# Here are some struct module formats for reading headers
37structEndArchive = "<4s4H2LH" # 9 items, end of archive, 22 bytes
38stringEndArchive = "PK\005\006" # magic number for end of archive record
39structCentralDir = "<4s4B4HlLL5HLL"# 19 items, central directory, 46 bytes
40stringCentralDir = "PK\001\002" # magic number for central directory
41structFileHeader = "<4s2B4HlLL2H" # 12 items, file header record, 30 bytes
42stringFileHeader = "PK\003\004" # magic number for file header
43structEndArchive64Locator = "<4slql" # 4 items, locate Zip64 header, 20 bytes
44stringEndArchive64Locator = "PK\x06\x07" # magic token for locator header
45structEndArchive64 = "<4sqhhllqqqq" # 10 items, end of archive (Zip64), 56 bytes
46stringEndArchive64 = "PK\x06\x06" # magic token for Zip64 header
47
48
49# indexes of entries in the central directory structure
50_CD_SIGNATURE = 0
51_CD_CREATE_VERSION = 1
52_CD_CREATE_SYSTEM = 2
53_CD_EXTRACT_VERSION = 3
54_CD_EXTRACT_SYSTEM = 4 # is this meaningful?
55_CD_FLAG_BITS = 5
56_CD_COMPRESS_TYPE = 6
57_CD_TIME = 7
58_CD_DATE = 8
59_CD_CRC = 9
60_CD_COMPRESSED_SIZE = 10
61_CD_UNCOMPRESSED_SIZE = 11
62_CD_FILENAME_LENGTH = 12
63_CD_EXTRA_FIELD_LENGTH = 13
64_CD_COMMENT_LENGTH = 14
65_CD_DISK_NUMBER_START = 15
66_CD_INTERNAL_FILE_ATTRIBUTES = 16
67_CD_EXTERNAL_FILE_ATTRIBUTES = 17
68_CD_LOCAL_HEADER_OFFSET = 18
69
70# indexes of entries in the local file header structure
71_FH_SIGNATURE = 0
72_FH_EXTRACT_VERSION = 1
73_FH_EXTRACT_SYSTEM = 2 # is this meaningful?
74_FH_GENERAL_PURPOSE_FLAG_BITS = 3
75_FH_COMPRESSION_METHOD = 4
76_FH_LAST_MOD_TIME = 5
77_FH_LAST_MOD_DATE = 6
78_FH_CRC = 7
79_FH_COMPRESSED_SIZE = 8
80_FH_UNCOMPRESSED_SIZE = 9
81_FH_FILENAME_LENGTH = 10
82_FH_EXTRA_FIELD_LENGTH = 11
83
84def is_zipfile(filename):
85 """Quickly see if file is a ZIP file by checking the magic number."""
86 try:
87 fpin = open(filename, "rb")
88 endrec = _EndRecData(fpin)
89 fpin.close()
90 if endrec:
91 return True # file has correct magic number
92 except IOError:
93 pass
94 return False
95
96def _EndRecData64(fpin, offset, endrec):
97 """
98 Read the ZIP64 end-of-archive records and use that to update endrec
99 """
100 locatorSize = struct.calcsize(structEndArchive64Locator)
101 fpin.seek(offset - locatorSize, 2)
102 data = fpin.read(locatorSize)
103 sig, diskno, reloff, disks = struct.unpack(structEndArchive64Locator, data)
104 if sig != stringEndArchive64Locator:
105 return endrec
106
107 if diskno != 0 or disks != 1:
108 raise BadZipfile("zipfiles that span multiple disks are not supported")
109
110 # Assume no 'zip64 extensible data'
111 endArchiveSize = struct.calcsize(structEndArchive64)
112 fpin.seek(offset - locatorSize - endArchiveSize, 2)
113 data = fpin.read(endArchiveSize)
114 sig, sz, create_version, read_version, disk_num, disk_dir, \
115 dircount, dircount2, dirsize, diroffset = \
116 struct.unpack(structEndArchive64, data)
117 if sig != stringEndArchive64:
118 return endrec
119
120 # Update the original endrec using data from the ZIP64 record
121 endrec[1] = disk_num
122 endrec[2] = disk_dir
123 endrec[3] = dircount
124 endrec[4] = dircount2
125 endrec[5] = dirsize
126 endrec[6] = diroffset
127 return endrec
128
129
130def _EndRecData(fpin):
131 """Return data from the "End of Central Directory" record, or None.
132
133 The data is a list of the nine items in the ZIP "End of central dir"
134 record followed by a tenth item, the file seek offset of this record."""
135 fpin.seek(-22, 2) # Assume no archive comment.
136 filesize = fpin.tell() + 22 # Get file size
137 data = fpin.read()
138 if data[0:4] == stringEndArchive and data[-2:] == "\000\000":
139 endrec = struct.unpack(structEndArchive, data)
140 endrec = list(endrec)
141 endrec.append("") # Append the archive comment
142 endrec.append(filesize - 22) # Append the record start offset
143 if endrec[-4] == -1 or endrec[-4] == 0xffffffff:
144 return _EndRecData64(fpin, -22, endrec)
145 return endrec
146 # Search the last END_BLOCK bytes of the file for the record signature.
147 # The comment is appended to the ZIP file and has a 16 bit length.
148 # So the comment may be up to 64K long. We limit the search for the
149 # signature to a few Kbytes at the end of the file for efficiency.
150 # also, the signature must not appear in the comment.
151 END_BLOCK = min(filesize, 1024 * 4)
152 fpin.seek(filesize - END_BLOCK, 0)
153 data = fpin.read()
154 start = data.rfind(stringEndArchive)
155 if start >= 0: # Correct signature string was found
156 endrec = struct.unpack(structEndArchive, data[start:start+22])
157 endrec = list(endrec)
158 comment = data[start+22:]
159 if endrec[7] == len(comment): # Comment length checks out
160 # Append the archive comment and start offset
161 endrec.append(comment)
162 endrec.append(filesize - END_BLOCK + start)
163 if endrec[-4] == -1 or endrec[-4] == 0xffffffff:
164 return _EndRecData64(fpin, - END_BLOCK + start, endrec)
165 return endrec
166 return # Error, return None
167
168
169class ZipInfo (object):
170 """Class with attributes describing each file in the ZIP archive."""
171
172 __slots__ = (
173 'orig_filename',
174 'filename',
175 'date_time',
176 'compress_type',
177 'comment',
178 'extra',
179 'create_system',
180 'create_version',
181 'extract_version',
182 'reserved',
183 'flag_bits',
184 'volume',
185 'internal_attr',
186 'external_attr',
187 'header_offset',
188 'CRC',
189 'compress_size',
190 'file_size',
191 )
192
193 def __init__(self, filename="NoName", date_time=(1980,1,1,0,0,0)):
194 self.orig_filename = filename # Original file name in archive
195
196 # Terminate the file name at the first null byte. Null bytes in file
197 # names are used as tricks by viruses in archives.
198 null_byte = filename.find(chr(0))
199 if null_byte >= 0:
200 filename = filename[0:null_byte]
201 # This is used to ensure paths in generated ZIP files always use
202 # forward slashes as the directory separator, as required by the
203 # ZIP format specification.
204 if os.sep != "/" and os.sep in filename:
205 filename = filename.replace(os.sep, "/")
206
207 self.filename = filename # Normalized file name
208 self.date_time = date_time # year, month, day, hour, min, sec
209 # Standard values:
210 self.compress_type = ZIP_STORED # Type of compression for the file
211 self.comment = "" # Comment for each file
212 self.extra = "" # ZIP extra data
213 if sys.platform == 'win32':
214 self.create_system = 0 # System which created ZIP archive
215 else:
216 # Assume everything else is unix-y
217 self.create_system = 3 # System which created ZIP archive
218 self.create_version = 20 # Version which created ZIP archive
219 self.extract_version = 20 # Version needed to extract archive
220 self.reserved = 0 # Must be zero
221 self.flag_bits = 0 # ZIP flag bits
222 self.volume = 0 # Volume number of file header
223 self.internal_attr = 0 # Internal attributes
224 self.external_attr = 0 # External file attributes
225 # Other attributes are set by class ZipFile:
226 # header_offset Byte offset to the file header
227 # CRC CRC-32 of the uncompressed file
228 # compress_size Size of the compressed file
229 # file_size Size of the uncompressed file
230
231 def FileHeader(self):
232 """Return the per-file header as a string."""
233 dt = self.date_time
234 dosdate = (dt[0] - 1980) << 9 | dt[1] << 5 | dt[2]
235 dostime = dt[3] << 11 | dt[4] << 5 | (dt[5] // 2)
236 if self.flag_bits & 0x08:
237 # Set these to zero because we write them after the file data
238 CRC = compress_size = file_size = 0
239 else:
240 CRC = self.CRC
241 compress_size = self.compress_size
242 file_size = self.file_size
243
244 extra = self.extra
245
246 if file_size > ZIP64_LIMIT or compress_size > ZIP64_LIMIT:
247 # File is larger than what fits into a 4 byte integer,
248 # fall back to the ZIP64 extension
249 fmt = '<hhqq'
250 extra = extra + struct.pack(fmt,
251 1, struct.calcsize(fmt)-4, file_size, compress_size)
252 file_size = 0xffffffff # -1
253 compress_size = 0xffffffff # -1
254 self.extract_version = max(45, self.extract_version)
255 self.create_version = max(45, self.extract_version)
256
257 header = struct.pack(structFileHeader, stringFileHeader,
258 self.extract_version, self.reserved, self.flag_bits,
259 self.compress_type, dostime, dosdate, CRC,
260 compress_size, file_size,
261 len(self.filename), len(extra))
262 return header + self.filename + extra
263
264 def _decodeExtra(self):
265 # Try to decode the extra field.
266 extra = self.extra
267 unpack = struct.unpack
268 while extra:
269 tp, ln = unpack('<hh', extra[:4])
270 if tp == 1:
271 if ln >= 24:
272 counts = unpack('<qqq', extra[4:28])
273 elif ln == 16:
274 counts = unpack('<qq', extra[4:20])
275 elif ln == 8:
276 counts = unpack('<q', extra[4:12])
277 elif ln == 0:
278 counts = ()
279 else:
280 raise RuntimeError, "Corrupt extra field %s"%(ln,)
281
282 idx = 0
283
284 # ZIP64 extension (large files and/or large archives)
285 if self.file_size == -1 or self.file_size == 0xFFFFFFFFL:
286 self.file_size = counts[idx]
287 idx += 1
288
289 if self.compress_size == -1 or self.compress_size == 0xFFFFFFFFL:
290 self.compress_size = counts[idx]
291 idx += 1
292
293 if self.header_offset == -1 or self.header_offset == 0xffffffffL:
294 old = self.header_offset
295 self.header_offset = counts[idx]
296 idx+=1
297
298 extra = extra[ln+4:]
299
300
301class ZipFile:
302 """ Class with methods to open, read, write, close, list zip files.
303
304 z = ZipFile(file, mode="r", compression=ZIP_STORED, allowZip64=True)
305
306 file: Either the path to the file, or a file-like object.
307 If it is a path, the file will be opened and closed by ZipFile.
308 mode: The mode can be either read "r", write "w" or append "a".
309 compression: ZIP_STORED (no compression) or ZIP_DEFLATED (requires zlib).
310 allowZip64: if True ZipFile will create files with ZIP64 extensions when
311 needed, otherwise it will raise an exception when this would
312 be necessary.
313
314 """
315
316 fp = None # Set here since __del__ checks it
317
318 def __init__(self, file, mode="r", compression=ZIP_STORED, allowZip64=False):
319 """Open the ZIP file with mode read "r", write "w" or append "a"."""
320 self._allowZip64 = allowZip64
321 self._didModify = False
322 if compression == ZIP_STORED:
323 pass
324 elif compression == ZIP_DEFLATED:
325 if not zlib:
326 raise RuntimeError,\
327 "Compression requires the (missing) zlib module"
328 else:
329 raise RuntimeError, "That compression method is not supported"
330 self.debug = 0 # Level of printing: 0 through 3
331 self.NameToInfo = {} # Find file info given name
332 self.filelist = [] # List of ZipInfo instances for archive
333 self.compression = compression # Method of compression
334 self.mode = key = mode.replace('b', '')[0]
335
336 # Check if we were passed a file-like object
337 if isinstance(file, basestring):
338 self._filePassed = 0
339 self.filename = file
340 modeDict = {'r' : 'rb', 'w': 'wb', 'a' : 'r+b'}
341 self.fp = open(file, modeDict[mode])
342 else:
343 self._filePassed = 1
344 self.fp = file
345 self.filename = getattr(file, 'name', None)
346
347 if key == 'r':
348 self._GetContents()
349 elif key == 'w':
350 pass
351 elif key == 'a':
352 try: # See if file is a zip file
353 self._RealGetContents()
354 # seek to start of directory and overwrite
355 self.fp.seek(self.start_dir, 0)
356 except BadZipfile: # file is not a zip file, just append
357 self.fp.seek(0, 2)
358 else:
359 if not self._filePassed:
360 self.fp.close()
361 self.fp = None
362 raise RuntimeError, 'Mode must be "r", "w" or "a"'
363
364 def _GetContents(self):
365 """Read the directory, making sure we close the file if the format
366 is bad."""
367 try:
368 self._RealGetContents()
369 except BadZipfile:
370 if not self._filePassed:
371 self.fp.close()
372 self.fp = None
373 raise
374
375 def _RealGetContents(self):
376 """Read in the table of contents for the ZIP file."""
377 fp = self.fp
378 endrec = _EndRecData(fp)
379 if not endrec:
380 raise BadZipfile, "File is not a zip file"
381 if self.debug > 1:
382 print endrec
383 size_cd = endrec[5] # bytes in central directory
384 offset_cd = endrec[6] # offset of central directory
385 self.comment = endrec[8] # archive comment
386 # endrec[9] is the offset of the "End of Central Dir" record
387 if endrec[9] > ZIP64_LIMIT:
388 x = endrec[9] - size_cd - 56 - 20
389 else:
390 x = endrec[9] - size_cd
391 # "concat" is zero, unless zip was concatenated to another file
392 concat = x - offset_cd
393 if self.debug > 2:
394 print "given, inferred, offset", offset_cd, x, concat
395 # self.start_dir: Position of start of central directory
396 self.start_dir = offset_cd + concat
397 fp.seek(self.start_dir, 0)
398 data = fp.read(size_cd)
399 fp = cStringIO.StringIO(data)
400 total = 0
401 while total < size_cd:
402 centdir = fp.read(46)
403 total = total + 46
404 if centdir[0:4] != stringCentralDir:
405 raise BadZipfile, "Bad magic number for central directory"
406 centdir = struct.unpack(structCentralDir, centdir)
407 if self.debug > 2:
408 print centdir
409 filename = fp.read(centdir[_CD_FILENAME_LENGTH])
410 # Create ZipInfo instance to store file information
411 x = ZipInfo(filename)
412 x.extra = fp.read(centdir[_CD_EXTRA_FIELD_LENGTH])
413 x.comment = fp.read(centdir[_CD_COMMENT_LENGTH])
414 total = (total + centdir[_CD_FILENAME_LENGTH]
415 + centdir[_CD_EXTRA_FIELD_LENGTH]
416 + centdir[_CD_COMMENT_LENGTH])
417 x.header_offset = centdir[_CD_LOCAL_HEADER_OFFSET]
418 (x.create_version, x.create_system, x.extract_version, x.reserved,
419 x.flag_bits, x.compress_type, t, d,
420 x.CRC, x.compress_size, x.file_size) = centdir[1:12]
421 x.volume, x.internal_attr, x.external_attr = centdir[15:18]
422 # Convert date/time code to (year, month, day, hour, min, sec)
423 x.date_time = ( (d>>9)+1980, (d>>5)&0xF, d&0x1F,
424 t>>11, (t>>5)&0x3F, (t&0x1F) * 2 )
425
426 x._decodeExtra()
427 x.header_offset = x.header_offset + concat
428 self.filelist.append(x)
429 self.NameToInfo[x.filename] = x
430 if self.debug > 2:
431 print "total", total
432
433
434 def namelist(self):
435 """Return a list of file names in the archive."""
436 l = []
437 for data in self.filelist:
438 l.append(data.filename)
439 return l
440
441 def infolist(self):
442 """Return a list of class ZipInfo instances for files in the
443 archive."""
444 return self.filelist
445
446 def printdir(self):
447 """Print a table of contents for the zip file."""
448 print "%-46s %19s %12s" % ("File Name", "Modified ", "Size")
449 for zinfo in self.filelist:
450 date = "%d-%02d-%02d %02d:%02d:%02d" % zinfo.date_time[:6]
451 print "%-46s %s %12d" % (zinfo.filename, date, zinfo.file_size)
452
453 def testzip(self):
454 """Read all the files and check the CRC."""
455 for zinfo in self.filelist:
456 try:
457 self.read(zinfo.filename) # Check CRC-32
458 except BadZipfile:
459 return zinfo.filename
460
461
462 def getinfo(self, name):
463 """Return the instance of ZipInfo given 'name'."""
464 return self.NameToInfo[name]
465
466 def read(self, name):
467 """Return file bytes (as a string) for name."""
468 if self.mode not in ("r", "a"):
469 raise RuntimeError, 'read() requires mode "r" or "a"'
470 if not self.fp:
471 raise RuntimeError, \
472 "Attempt to read ZIP archive that was already closed"
473 zinfo = self.getinfo(name)
474 filepos = self.fp.tell()
475
476 self.fp.seek(zinfo.header_offset, 0)
477
478 # Skip the file header:
479 fheader = self.fp.read(30)
480 if fheader[0:4] != stringFileHeader:
481 raise BadZipfile, "Bad magic number for file header"
482
483 fheader = struct.unpack(structFileHeader, fheader)
484 fname = self.fp.read(fheader[_FH_FILENAME_LENGTH])
485 if fheader[_FH_EXTRA_FIELD_LENGTH]:
486 self.fp.read(fheader[_FH_EXTRA_FIELD_LENGTH])
487
488 if fname != zinfo.orig_filename:
489 raise BadZipfile, \
490 'File name in directory "%s" and header "%s" differ.' % (
491 zinfo.orig_filename, fname)
492
493 bytes = self.fp.read(zinfo.compress_size)
494 self.fp.seek(filepos, 0)
495 if zinfo.compress_type == ZIP_STORED:
496 pass
497 elif zinfo.compress_type == ZIP_DEFLATED:
498 if not zlib:
499 raise RuntimeError, \
500 "De-compression requires the (missing) zlib module"
501 # zlib compress/decompress code by Jeremy Hylton of CNRI
502 dc = zlib.decompressobj(-15)
503 bytes = dc.decompress(bytes)
504 # need to feed in unused pad byte so that zlib won't choke
505 ex = dc.decompress('Z') + dc.flush()
506 if ex:
507 bytes = bytes + ex
508 else:
509 raise BadZipfile, \
510 "Unsupported compression method %d for file %s" % \
511 (zinfo.compress_type, name)
512 crc = binascii.crc32(bytes)
513 if crc != zinfo.CRC:
514 raise BadZipfile, "Bad CRC-32 for file %s" % name
515 return bytes
516
517 def _writecheck(self, zinfo):
518 """Check for errors before writing a file to the archive."""
519 if zinfo.filename in self.NameToInfo:
520 if self.debug: # Warning for duplicate names
521 print "Duplicate name:", zinfo.filename
522 if self.mode not in ("w", "a"):
523 raise RuntimeError, 'write() requires mode "w" or "a"'
524 if not self.fp:
525 raise RuntimeError, \
526 "Attempt to write ZIP archive that was already closed"
527 if zinfo.compress_type == ZIP_DEFLATED and not zlib:
528 raise RuntimeError, \
529 "Compression requires the (missing) zlib module"
530 if zinfo.compress_type not in (ZIP_STORED, ZIP_DEFLATED):
531 raise RuntimeError, \
532 "That compression method is not supported"
533 if zinfo.file_size > ZIP64_LIMIT:
534 if not self._allowZip64:
535 raise LargeZipFile("Filesize would require ZIP64 extensions")
536 if zinfo.header_offset > ZIP64_LIMIT:
537 if not self._allowZip64:
538 raise LargeZipFile("Zipfile size would require ZIP64 extensions")
539
540 def write(self, filename, arcname=None, compress_type=None):
541 """Put the bytes from filename into the archive under the name
542 arcname."""
543 st = os.stat(filename)
544 mtime = time.localtime(st.st_mtime)
545 date_time = mtime[0:6]
546 # Create ZipInfo instance to store file information
547 if arcname is None:
548 arcname = filename
549 arcname = os.path.normpath(os.path.splitdrive(arcname)[1])
550 while arcname[0] in (os.sep, os.altsep):
551 arcname = arcname[1:]
552 zinfo = ZipInfo(arcname, date_time)
553 zinfo.external_attr = (st[0] & 0xFFFF) << 16L # Unix attributes
554 if compress_type is None:
555 zinfo.compress_type = self.compression
556 else:
557 zinfo.compress_type = compress_type
558
559 zinfo.file_size = st.st_size
560 zinfo.flag_bits = 0x00
561 zinfo.header_offset = self.fp.tell() # Start of header bytes
562
563 self._writecheck(zinfo)
564 self._didModify = True
565 fp = open(filename, "rb")
566 # Must overwrite CRC and sizes with correct data later
567 zinfo.CRC = CRC = 0
568 zinfo.compress_size = compress_size = 0
569 zinfo.file_size = file_size = 0
570 self.fp.write(zinfo.FileHeader())
571 if zinfo.compress_type == ZIP_DEFLATED:
572 cmpr = zlib.compressobj(zlib.Z_DEFAULT_COMPRESSION,
573 zlib.DEFLATED, -15)
574 else:
575 cmpr = None
576 while 1:
577 buf = fp.read(1024 * 8)
578 if not buf:
579 break
580 file_size = file_size + len(buf)
581 CRC = binascii.crc32(buf, CRC)
582 if cmpr:
583 buf = cmpr.compress(buf)
584 compress_size = compress_size + len(buf)
585 self.fp.write(buf)
586 fp.close()
587 if cmpr:
588 buf = cmpr.flush()
589 compress_size = compress_size + len(buf)
590 self.fp.write(buf)
591 zinfo.compress_size = compress_size
592 else:
593 zinfo.compress_size = file_size
594 zinfo.CRC = CRC
595 zinfo.file_size = file_size
596 # Seek backwards and write CRC and file sizes
597 position = self.fp.tell() # Preserve current position in file
598 self.fp.seek(zinfo.header_offset + 14, 0)
599 self.fp.write(struct.pack("<lLL", zinfo.CRC, zinfo.compress_size,
600 zinfo.file_size))
601 self.fp.seek(position, 0)
602 self.filelist.append(zinfo)
603 self.NameToInfo[zinfo.filename] = zinfo
604
605 def writestr(self, zinfo_or_arcname, bytes):
606 """Write a file into the archive. The contents is the string
607 'bytes'. 'zinfo_or_arcname' is either a ZipInfo instance or
608 the name of the file in the archive."""
609 if not isinstance(zinfo_or_arcname, ZipInfo):
610 zinfo = ZipInfo(filename=zinfo_or_arcname,
611 date_time=time.localtime(time.time())[:6])
612 zinfo.compress_type = self.compression
613 else:
614 zinfo = zinfo_or_arcname
615 zinfo.file_size = len(bytes) # Uncompressed size
616 zinfo.header_offset = self.fp.tell() # Start of header bytes
617 self._writecheck(zinfo)
618 self._didModify = True
619 zinfo.CRC = binascii.crc32(bytes) # CRC-32 checksum
620 if zinfo.compress_type == ZIP_DEFLATED:
621 co = zlib.compressobj(zlib.Z_DEFAULT_COMPRESSION,
622 zlib.DEFLATED, -15)
623 bytes = co.compress(bytes) + co.flush()
624 zinfo.compress_size = len(bytes) # Compressed size
625 else:
626 zinfo.compress_size = zinfo.file_size
627 zinfo.header_offset = self.fp.tell() # Start of header bytes
628 self.fp.write(zinfo.FileHeader())
629 self.fp.write(bytes)
630 self.fp.flush()
631 if zinfo.flag_bits & 0x08:
632 # Write CRC and file sizes after the file data
633 self.fp.write(struct.pack("<lLL", zinfo.CRC, zinfo.compress_size,
634 zinfo.file_size))
635 self.filelist.append(zinfo)
636 self.NameToInfo[zinfo.filename] = zinfo
637
638 def __del__(self):
639 """Call the "close()" method in case the user forgot."""
640 self.close()
641
642 def close(self):
643 """Close the file, and for mode "w" and "a" write the ending
644 records."""
645 if self.fp is None:
646 return
647
648 if self.mode in ("w", "a") and self._didModify: # write ending records
649 count = 0
650 pos1 = self.fp.tell()
651 for zinfo in self.filelist: # write central directory
652 count = count + 1
653 dt = zinfo.date_time
654 dosdate = (dt[0] - 1980) << 9 | dt[1] << 5 | dt[2]
655 dostime = dt[3] << 11 | dt[4] << 5 | (dt[5] // 2)
656 extra = []
657 if zinfo.file_size > ZIP64_LIMIT \
658 or zinfo.compress_size > ZIP64_LIMIT:
659 extra.append(zinfo.file_size)
660 extra.append(zinfo.compress_size)
661 file_size = 0xffffffff #-1
662 compress_size = 0xffffffff #-1
663 else:
664 file_size = zinfo.file_size
665 compress_size = zinfo.compress_size
666
667 if zinfo.header_offset > ZIP64_LIMIT:
668 extra.append(zinfo.header_offset)
669 header_offset = -1 # struct "l" format: 32 one bits
670 else:
671 header_offset = zinfo.header_offset
672
673 extra_data = zinfo.extra
674 if extra:
675 # Append a ZIP64 field to the extra's
676 extra_data = struct.pack(
677 '<hh' + 'q'*len(extra),
678 1, 8*len(extra), *extra) + extra_data
679
680 extract_version = max(45, zinfo.extract_version)
681 create_version = max(45, zinfo.create_version)
682 else:
683 extract_version = zinfo.extract_version
684 create_version = zinfo.create_version
685
686 centdir = struct.pack(structCentralDir,
687 stringCentralDir, create_version,
688 zinfo.create_system, extract_version, zinfo.reserved,
689 zinfo.flag_bits, zinfo.compress_type, dostime, dosdate,
690 zinfo.CRC, compress_size, file_size,
691 len(zinfo.filename), len(extra_data), len(zinfo.comment),
692 0, zinfo.internal_attr, zinfo.external_attr,
693 header_offset)
694 self.fp.write(centdir)
695 self.fp.write(zinfo.filename)
696 self.fp.write(extra_data)
697 self.fp.write(zinfo.comment)
698
699 pos2 = self.fp.tell()
700 # Write end-of-zip-archive record
701 if pos1 > ZIP64_LIMIT:
702 # Need to write the ZIP64 end-of-archive records
703 zip64endrec = struct.pack(
704 structEndArchive64, stringEndArchive64,
705 44, 45, 45, 0, 0, count, count, pos2 - pos1, pos1)
706 self.fp.write(zip64endrec)
707
708 zip64locrec = struct.pack(
709 structEndArchive64Locator,
710 stringEndArchive64Locator, 0, pos2, 1)
711 self.fp.write(zip64locrec)
712
713 # XXX Why is `pos3` computed next? It's never referenced.
714 pos3 = self.fp.tell()
715 endrec = struct.pack(structEndArchive, stringEndArchive,
716 0, 0, count, count, pos2 - pos1, -1, 0)
717 self.fp.write(endrec)
718
719 else:
720 endrec = struct.pack(structEndArchive, stringEndArchive,
721 0, 0, count, count, pos2 - pos1, pos1, 0)
722 self.fp.write(endrec)
723 self.fp.flush()
724 if not self._filePassed:
725 self.fp.close()
726 self.fp = None
727
728
729class PyZipFile(ZipFile):
730 """Class to create ZIP archives with Python library files and packages."""
731
732 def writepy(self, pathname, basename = ""):
733 """Add all files from "pathname" to the ZIP archive.
734
735 If pathname is a package directory, search the directory and
736 all package subdirectories recursively for all *.py and enter
737 the modules into the archive. If pathname is a plain
738 directory, listdir *.py and enter all modules. Else, pathname
739 must be a Python *.py file and the module will be put into the
740 archive. Added modules are always module.pyo or module.pyc.
741 This method will compile the module.py into module.pyc if
742 necessary.
743 """
744 dir, name = os.path.split(pathname)
745 if os.path.isdir(pathname):
746 initname = os.path.join(pathname, "__init__.py")
747 if os.path.isfile(initname):
748 # This is a package directory, add it
749 if basename:
750 basename = "%s/%s" % (basename, name)
751 else:
752 basename = name
753 if self.debug:
754 print "Adding package in", pathname, "as", basename
755 fname, arcname = self._get_codename(initname[0:-3], basename)
756 if self.debug:
757 print "Adding", arcname
758 self.write(fname, arcname)
759 dirlist = os.listdir(pathname)
760 dirlist.remove("__init__.py")
761 # Add all *.py files and package subdirectories
762 for filename in dirlist:
763 path = os.path.join(pathname, filename)
764 root, ext = os.path.splitext(filename)
765 if os.path.isdir(path):
766 if os.path.isfile(os.path.join(path, "__init__.py")):
767 # This is a package directory, add it
768 self.writepy(path, basename) # Recursive call
769 elif ext == ".py":
770 fname, arcname = self._get_codename(path[0:-3],
771 basename)
772 if self.debug:
773 print "Adding", arcname
774 self.write(fname, arcname)
775 else:
776 # This is NOT a package directory, add its files at top level
777 if self.debug:
778 print "Adding files from directory", pathname
779 for filename in os.listdir(pathname):
780 path = os.path.join(pathname, filename)
781 root, ext = os.path.splitext(filename)
782 if ext == ".py":
783 fname, arcname = self._get_codename(path[0:-3],
784 basename)
785 if self.debug:
786 print "Adding", arcname
787 self.write(fname, arcname)
788 else:
789 if pathname[-3:] != ".py":
790 raise RuntimeError, \
791 'Files added with writepy() must end with ".py"'
792 fname, arcname = self._get_codename(pathname[0:-3], basename)
793 if self.debug:
794 print "Adding file", arcname
795 self.write(fname, arcname)
796
797 def _get_codename(self, pathname, basename):
798 """Return (filename, archivename) for the path.
799
800 Given a module name path, return the correct file path and
801 archive name, compiling if necessary. For example, given
802 /python/lib/string, return (/python/lib/string.pyc, string).
803 """
804 file_py = pathname + ".py"
805 file_pyc = pathname + (".pyc" if not is_jython else "$py.class")
806 file_pyo = pathname + ".pyo"
807 if os.path.isfile(file_pyo) and \
808 os.stat(file_pyo).st_mtime >= os.stat(file_py).st_mtime:
809 fname = file_pyo # Use .pyo file
810 elif not os.path.isfile(file_pyc) or \
811 os.stat(file_pyc).st_mtime < os.stat(file_py).st_mtime:
812 import py_compile
813 if self.debug:
814 print "Compiling", file_py
815 try:
816 py_compile.compile(file_py, file_pyc, None, True)
817 except py_compile.PyCompileError,err:
818 print err.msg
819 fname = file_pyc
820 else:
821 fname = file_pyc
822 archivename = os.path.split(fname)[1]
823 if basename:
824 archivename = "%s/%s" % (basename, archivename)
825 return (fname, archivename)
826
827
828def main(args = None):
829 import textwrap
830 USAGE=textwrap.dedent("""\
831 Usage:
832 zipfile.py -l zipfile.zip # Show listing of a zipfile
833 zipfile.py -t zipfile.zip # Test if a zipfile is valid
834 zipfile.py -e zipfile.zip target # Extract zipfile into target dir
835 zipfile.py -c zipfile.zip src ... # Create zipfile from sources
836 """)
837 if args is None:
838 args = sys.argv[1:]
839
840 if not args or args[0] not in ('-l', '-c', '-e', '-t'):
841 print USAGE
842 sys.exit(1)
843
844 if args[0] == '-l':
845 if len(args) != 2:
846 print USAGE
847 sys.exit(1)
848 zf = ZipFile(args[1], 'r')
849 zf.printdir()
850 zf.close()
851
852 elif args[0] == '-t':
853 if len(args) != 2:
854 print USAGE
855 sys.exit(1)
856 zf = ZipFile(args[1], 'r')
857 zf.testzip()
858 print "Done testing"
859
860 elif args[0] == '-e':
861 if len(args) != 3:
862 print USAGE
863 sys.exit(1)
864
865 zf = ZipFile(args[1], 'r')
866 out = args[2]
867 for path in zf.namelist():
868 if path.startswith('./'):
869 tgt = os.path.join(out, path[2:])
870 else:
871 tgt = os.path.join(out, path)
872
873 tgtdir = os.path.dirname(tgt)
874 if not os.path.exists(tgtdir):
875 os.makedirs(tgtdir)
876 fp = open(tgt, 'wb')
877 fp.write(zf.read(path))
878 fp.close()
879 zf.close()
880
881 elif args[0] == '-c':
882 if len(args) < 3:
883 print USAGE
884 sys.exit(1)
885
886 def addToZip(zf, path, zippath):
887 if os.path.isfile(path):
888 zf.write(path, zippath, ZIP_DEFLATED)
889 elif os.path.isdir(path):
890 for nm in os.listdir(path):
891 addToZip(zf,
892 os.path.join(path, nm), os.path.join(zippath, nm))
893 # else: ignore
894
895 zf = ZipFile(args[1], 'w', allowZip64=True)
896 for src in args[2:]:
897 addToZip(zf, src, os.path.basename(src))
898
899 zf.close()
900
901if __name__ == "__main__":
902 main()